Merge remote-tracking branch 'goog/androidx-platform-dev' into tm-dev

* goog/androidx-platform-dev: Sync from upstream. Sync from upstream. Sync from upstream. Descriptions: ====================================================================== Add some additional logging that will help diagnose b/218413237 ====================================================================== Mark VerbatimTokenizer::ResetToTokenStartingAfter as 'override'. ====================================================================== Support dump function for SchemaStore ====================================================================== Refactor DocumentStore::Initialize to improve readability of document store recovery. ====================================================================== Remove non-NDK API usages of ICU4C in libicing. ====================================================================== Move IcuDataFileHelper to the testing directory since it is a test-only util. ====================================================================== Support dump function for DocumentStore ====================================================================== Switch to use PRead rather than MMap in the proto log. ====================================================================== Support dump function for main/lite index and lexicon ====================================================================== Fix LiteIndex::AppendHits ====================================================================== Enable and fix DocumentStoreTest.LoadScoreCacheAndInitializeSuccessfully ====================================================================== Fix MainIndex::GetStorageInfo. ====================================================================== Fix icing-search-engine_fuzz_test by making IcuLanguageSegmenterIterator::Advance non-recursive. ====================================================================== Allow to return additional information for deleted documents in DeleteByQuery ====================================================================== Using enum class in Token::Type for better type safety. ====================================================================== Normalize Tokens by Token type when retrieving snippets ================ Rename max_window_bytes to max_window_utf32_length, Delete the max_tokens_per_doc field in IcingSearchEngineOptions. ================ Handle suggestion namespace ownership. ================ Fix OkStatus() is not a valid argument to StatusOr in Main_index.RetrieveMoreHits. ================ Allow advancing when current indices are negative in CharacterIterator ================ Adds support for verbatim tokenization and indexing in IcingLib ================ Renames TokenizerIterator Reset functions ================ Add term_match_type to SuggestionSpec proto ================ Unify the C++ proto enum style ================ Allow zero property weights in IcingLib Bug: 152934343 Bug: 158089703 Bug: 185845269 Bug: 203700301 Bug: 204333391 Bug: 205209589 Bug: 206147728 Bug: 209071710 Bug: 209993976 Bug: 218413237 Bug: 218413237 Bug: 223549255 Test: Presubmit Change-Id: I96665ba718f89e69ca99cd833ad80fa555edf436
author: Alexander Dorokhine <adorokhine@google.com> 2022-03-22 22:55:15 -0700
committer: Tim Barron <tjbarron@google.com> 2022-03-23 17:04:14 +0000
commit: 9ab600c39d0b5c87fc7dc4d8155d1efb535f1608 (patch)
tree: 93ed846d985900e348c166b14818348705d46ea9
parent: 19600c2c36c5add7e7a792b7e4f742d45b3f871f (diff)
parent: c4f46ed536752b4c07f7696e65ff79c2d5086f3f (diff)
download: icing-9ab600c39d0b5c87fc7dc4d8155d1efb535f1608.tar.gz
90 files changed, 3432 insertions, 1355 deletions
diff --git a/icing/file/file-backed-bitmap.cc b/icing/file/file-backed-bitmap.cc
index f1e568c..eec7668 100644
--- a/icing/file/file-backed-bitmap.cc
+++ b/icing/file/file-backed-bitmap.cc
@@ -50,7 +50,7 @@ FileBackedBitmap::Create(const Filesystem* filesystem,
   auto bitmap = std::unique_ptr<FileBackedBitmap>(
       new FileBackedBitmap(filesystem, file_path, mmap_strategy));
 
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = bitmap->Initialize();
   if (!status.ok()) {
@@ -122,7 +122,7 @@ libtextclassifier3::Status FileBackedBitmap::FileBackedBitmap::Initialize() {
                   << " of size: " << file_size;
   }
 
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = mmapper_->Remap(0, file_size);
   if (!status.ok()) {
@@ -198,7 +198,7 @@ int FileBackedBitmap::NumBits() const {
 libtextclassifier3::Status FileBackedBitmap::Set(int bit_index,
                                                  bool bit_value) {
   if (bit_index >= NumBits()) {
-    // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+    // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
     // that can support error logging.
     libtextclassifier3::Status status = GrowTo(bit_index);
     if (!status.ok()) {
@@ -261,7 +261,7 @@ libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) {
                                       file_path_.c_str(), new_file_size));
   }
 
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
   if (!status.ok()) {
@@ -281,7 +281,7 @@ libtextclassifier3::Status FileBackedBitmap::TruncateTo(int new_num_bits) {
   }
 
   const size_t new_file_size = FileSizeForBits(new_num_bits);
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
   if (!status.ok()) {
diff --git a/icing/file/filesystem.h b/icing/file/filesystem.h
index ca8c4a8..dd2c5d1 100644
--- a/icing/file/filesystem.h
+++ b/icing/file/filesystem.h
@@ -233,6 +233,11 @@ class Filesystem {
   // Increments to_increment by size if size is valid, or sets to_increment
   // to kBadFileSize if either size or to_increment is kBadFileSize.
   static void IncrementByOrSetInvalid(int64_t size, int64_t* to_increment);
+
+  // Return -1 if file_size is invalid. Otherwise, return file_size.
+  static int64_t SanitizeFileSize(int64_t file_size) {
+    return (file_size != kBadFileSize) ? file_size : -1;
+  }
 };
 // LINT.ThenChange(//depot/google3/icing/file/mock-filesystem.h)
 
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
index f676dc5..409ab96 100644
--- a/icing/file/portable-file-backed-proto-log.h
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -124,6 +124,8 @@ class PortableFileBackedProtoLog {
    public:
     static constexpr int32_t kMagic = 0xf4c6f67a;
 
+    // We should go directly from 0 to 2 the next time we have to change the
+    // format.
     static constexpr int32_t kFileFormatVersion = 0;
 
     uint32_t CalculateHeaderChecksum() const {
@@ -282,7 +284,7 @@ class PortableFileBackedProtoLog {
     // before updating our checksum.
     bool recalculated_checksum = false;
 
-    bool has_data_loss() {
+    bool has_data_loss() const {
       return data_loss == DataLoss::PARTIAL || data_loss == DataLoss::COMPLETE;
     }
   };
@@ -376,8 +378,7 @@ class PortableFileBackedProtoLog {
   // }
   class Iterator {
    public:
-    Iterator(const Filesystem& filesystem, const std::string& file_path,
-             int64_t initial_offset);
+    Iterator(const Filesystem& filesystem, int fd, int64_t initial_offset);
 
     // Advances to the position of next proto whether it has been erased or not.
     //
@@ -393,11 +394,12 @@ class PortableFileBackedProtoLog {
    private:
     static constexpr int64_t kInvalidOffset = -1;
     // Used to read proto metadata
-    MemoryMappedFile mmapped_file_;
     // Offset of first proto
+    const Filesystem* const filesystem_;
     int64_t initial_offset_;
     int64_t current_offset_;
     int64_t file_size_;
+    int fd_;
   };
 
   // Returns an iterator of current proto log. The caller needs to keep the
@@ -513,7 +515,7 @@ class PortableFileBackedProtoLog {
       const Filesystem* filesystem, const std::string& file_path,
       Crc32 initial_crc, int64_t start, int64_t end);
 
-  // Reads out the metadata of a proto located at file_offset from the file.
+  // Reads out the metadata of a proto located at file_offset from the fd.
   // Metadata will be returned in host byte order endianness.
   //
   // Returns:
@@ -521,7 +523,8 @@ class PortableFileBackedProtoLog {
   //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
   //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
   static libtextclassifier3::StatusOr<int32_t> ReadProtoMetadata(
-      MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
+      const Filesystem* const filesystem, int fd, int64_t file_offset,
+      int64_t file_size);
 
   // Writes metadata of a proto to the fd. Takes in a host byte order endianness
   // metadata and converts it into a portable metadata before writing.
@@ -937,35 +940,37 @@ template <typename ProtoT>
 libtextclassifier3::StatusOr<ProtoT>
 PortableFileBackedProtoLog<ProtoT>::ReadProto(int64_t file_offset) const {
   int64_t file_size = filesystem_->GetFileSize(fd_.get());
-  MemoryMappedFile mmapped_file(*filesystem_, file_path_,
-                                MemoryMappedFile::Strategy::READ_ONLY);
-  if (file_offset >= file_size) {
-    // file_size points to the next byte to write at, so subtract one to get
-    // the inclusive, actual size of file.
-    return absl_ports::OutOfRangeError(
-        IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
-                                      "out of range of the file size, %lld",
-                                      static_cast<long long>(file_offset),
-                                      static_cast<long long>(file_size - 1)));
-  }
-
   // Read out the metadata
+  if (file_size == Filesystem::kBadFileSize) {
+    return absl_ports::OutOfRangeError("Unable to correctly read size.");
+  }
   ICING_ASSIGN_OR_RETURN(
       int32_t metadata,
-      ReadProtoMetadata(&mmapped_file, file_offset, file_size));
+      ReadProtoMetadata(filesystem_, fd_.get(), file_offset, file_size));
 
   // Copy out however many bytes it says the proto is
   int stored_size = GetProtoSize(metadata);
+  file_offset += sizeof(metadata);
 
-  ICING_RETURN_IF_ERROR(
-      mmapped_file.Remap(file_offset + sizeof(metadata), stored_size));
+  // Read the compressed proto out.
+  if (file_offset + stored_size > file_size) {
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
+                                      "out of range of the file size, %lld",
+                                      static_cast<long long>(file_offset),
+                                      static_cast<long long>(file_size - 1)));
+  }
+  auto buf = std::make_unique<char[]>(stored_size);
+  if (!filesystem_->PRead(fd_.get(), buf.get(), stored_size, file_offset)) {
+    return absl_ports::InternalError("");
+  }
 
-  if (IsEmptyBuffer(mmapped_file.region(), mmapped_file.region_size())) {
+  if (IsEmptyBuffer(buf.get(), stored_size)) {
     return absl_ports::NotFoundError("The proto data has been erased.");
   }
 
-  google::protobuf::io::ArrayInputStream proto_stream(
-      mmapped_file.mutable_region(), stored_size);
+  google::protobuf::io::ArrayInputStream proto_stream(buf.get(),
+                                                          stored_size);
 
   // Deserialize proto
   ProtoT proto;
@@ -983,33 +988,29 @@ template <typename ProtoT>
 libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::EraseProto(
     int64_t file_offset) {
   int64_t file_size = filesystem_->GetFileSize(fd_.get());
-  if (file_offset >= file_size) {
-    // file_size points to the next byte to write at, so subtract one to get
-    // the inclusive, actual size of file.
-    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
-        "Trying to erase data at a location, %lld, "
-        "out of range of the file size, %lld",
-        static_cast<long long>(file_offset),
-        static_cast<long long>(file_size - 1)));
+  if (file_size == Filesystem::kBadFileSize) {
+    return absl_ports::OutOfRangeError("Unable to correctly read size.");
   }
 
-  MemoryMappedFile mmapped_file(
-      *filesystem_, file_path_,
-      MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
-
-  // Read out the metadata
   ICING_ASSIGN_OR_RETURN(
       int32_t metadata,
-      ReadProtoMetadata(&mmapped_file, file_offset, file_size));
-
-  ICING_RETURN_IF_ERROR(mmapped_file.Remap(file_offset + sizeof(metadata),
-                                           GetProtoSize(metadata)));
+      ReadProtoMetadata(filesystem_, fd_.get(), file_offset, file_size));
+  // Copy out however many bytes it says the proto is
+  int stored_size = GetProtoSize(metadata);
+  file_offset += sizeof(metadata);
+  if (file_offset + stored_size > file_size) {
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
+                                      "out of range of the file size, %lld",
+                                      static_cast<long long>(file_offset),
+                                      static_cast<long long>(file_size - 1)));
+  }
+  auto buf = std::make_unique<char[]>(stored_size);
 
   // We need to update the crc checksum if the erased area is before the
   // rewind position.
   int32_t new_crc;
-  int64_t erased_proto_offset = file_offset + sizeof(metadata);
-  if (erased_proto_offset < header_->GetRewindOffset()) {
+  if (file_offset < header_->GetRewindOffset()) {
     // Set to "dirty" before we start writing anything.
     header_->SetDirtyFlag(true);
     header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
@@ -1022,24 +1023,30 @@ libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::EraseProto(
     // We need to calculate [original string xor 0s].
     // The xored string is the same as the original string because 0 xor 0 =
     // 0, 1 xor 0 = 1.
-    const std::string_view xored_str(mmapped_file.region(),
-                                     mmapped_file.region_size());
+    // Read the compressed proto out.
+    if (!filesystem_->PRead(fd_.get(), buf.get(), stored_size, file_offset)) {
+      return absl_ports::InternalError("");
+    }
+    const std::string_view xored_str(buf.get(), stored_size);
 
     Crc32 crc(header_->GetLogChecksum());
     ICING_ASSIGN_OR_RETURN(
-        new_crc, crc.UpdateWithXor(
-                     xored_str,
-                     /*full_data_size=*/header_->GetRewindOffset() -
-                         kHeaderReservedBytes,
-                     /*position=*/erased_proto_offset - kHeaderReservedBytes));
+        new_crc,
+        crc.UpdateWithXor(xored_str,
+                          /*full_data_size=*/header_->GetRewindOffset() -
+                              kHeaderReservedBytes,
+                          /*position=*/file_offset - kHeaderReservedBytes));
   }
 
   // Clear the region.
-  memset(mmapped_file.mutable_region(), '\0', mmapped_file.region_size());
+  memset(buf.get(), '\0', stored_size);
+  if (!filesystem_->PWrite(fd_.get(), file_offset, buf.get(), stored_size)) {
+    return absl_ports::InternalError("");
+  }
 
   // If we cleared something in our checksummed area, we should update our
   // checksum and reset our dirty bit.
-  if (erased_proto_offset < header_->GetRewindOffset()) {
+  if (file_offset < header_->GetRewindOffset()) {
     header_->SetDirtyFlag(false);
     header_->SetLogChecksum(new_crc);
     header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
@@ -1077,13 +1084,12 @@ PortableFileBackedProtoLog<ProtoT>::GetElementsFileSize() const {
 
 template <typename ProtoT>
 PortableFileBackedProtoLog<ProtoT>::Iterator::Iterator(
-    const Filesystem& filesystem, const std::string& file_path,
-    int64_t initial_offset)
-    : mmapped_file_(filesystem, file_path,
-                    MemoryMappedFile::Strategy::READ_ONLY),
+    const Filesystem& filesystem, int fd, int64_t initial_offset)
+    : filesystem_(&filesystem),
       initial_offset_(initial_offset),
       current_offset_(kInvalidOffset),
-      file_size_(filesystem.GetFileSize(file_path.c_str())) {
+      fd_(fd) {
+  file_size_ = filesystem_->GetFileSize(fd_);
   if (file_size_ == Filesystem::kBadFileSize) {
     // Fails all Advance() calls
     file_size_ = 0;
@@ -1100,7 +1106,7 @@ PortableFileBackedProtoLog<ProtoT>::Iterator::Advance() {
     // Jumps to the next proto position
     ICING_ASSIGN_OR_RETURN(
         int32_t metadata,
-        ReadProtoMetadata(&mmapped_file_, current_offset_, file_size_));
+        ReadProtoMetadata(filesystem_, fd_, current_offset_, file_size_));
     current_offset_ += sizeof(metadata) + GetProtoSize(metadata);
   }
 
@@ -1122,14 +1128,15 @@ int64_t PortableFileBackedProtoLog<ProtoT>::Iterator::GetOffset() {
 template <typename ProtoT>
 typename PortableFileBackedProtoLog<ProtoT>::Iterator
 PortableFileBackedProtoLog<ProtoT>::GetIterator() {
-  return Iterator(*filesystem_, file_path_,
+  return Iterator(*filesystem_, fd_.get(),
                   /*initial_offset=*/kHeaderReservedBytes);
 }
 
 template <typename ProtoT>
 libtextclassifier3::StatusOr<int32_t>
 PortableFileBackedProtoLog<ProtoT>::ReadProtoMetadata(
-    MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size) {
+    const Filesystem* const filesystem, int fd, int64_t file_offset,
+    int64_t file_size) {
   // Checks file_offset
   if (file_offset >= file_size) {
     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
@@ -1147,9 +1154,9 @@ PortableFileBackedProtoLog<ProtoT>::ReadProtoMetadata(
         static_cast<long long>(file_size)));
   }
 
-  // Reads metadata
-  ICING_RETURN_IF_ERROR(mmapped_file->Remap(file_offset, metadata_size));
-  memcpy(&portable_metadata, mmapped_file->region(), metadata_size);
+  if (!filesystem->PRead(fd, &portable_metadata, metadata_size, file_offset)) {
+    return absl_ports::InternalError("");
+  }
 
   // Need to switch it back to host order endianness after reading from disk.
   int32_t host_order_metadata = GNetworkToHostL(portable_metadata);
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc
index f83ccd6..80a8011 100644
--- a/icing/file/portable-file-backed-proto-log_benchmark.cc
+++ b/icing/file/portable-file-backed-proto-log_benchmark.cc
@@ -55,7 +55,7 @@ namespace lib {
 
 namespace {
 
-static void BM_Write(benchmark::State& state) {
+void BM_Write(benchmark::State& state) {
   const Filesystem filesystem;
   int string_length = state.range(0);
   const std::string file_path = IcingStringUtil::StringPrintf(
@@ -108,7 +108,7 @@ BENCHMARK(BM_Write)
                               // 16MiB, and we need some extra space for the
                               // rest of the document properties
 
-static void BM_Read(benchmark::State& state) {
+void BM_Read(benchmark::State& state) {
   const Filesystem filesystem;
   int string_length = state.range(0);
   const std::string file_path = IcingStringUtil::StringPrintf(
@@ -164,7 +164,7 @@ BENCHMARK(BM_Read)
                               // 16MiB, and we need some extra space for the
                               // rest of the document properties
                               //
-static void BM_Erase(benchmark::State& state) {
+void BM_Erase(benchmark::State& state) {
   const Filesystem filesystem;
   const std::string file_path = IcingStringUtil::StringPrintf(
       "%s%s", GetTestTempDir().c_str(), "/proto.log");
@@ -204,7 +204,7 @@ static void BM_Erase(benchmark::State& state) {
 }
 BENCHMARK(BM_Erase);
 
-static void BM_ComputeChecksum(benchmark::State& state) {
+void BM_ComputeChecksum(benchmark::State& state) {
   const Filesystem filesystem;
   const std::string file_path = GetTestTempDir() + "/proto.log";
   int max_proto_size = (1 << 24) - 1;  // 16 MiB
@@ -246,7 +246,7 @@ static void BM_ComputeChecksum(benchmark::State& state) {
 }
 BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20);
 
-static void BM_ComputeChecksumWithCachedChecksum(benchmark::State& state) {
+void BM_ComputeChecksumWithCachedChecksum(benchmark::State& state) {
   const Filesystem filesystem;
   const std::string file_path = GetTestTempDir() + "/proto.log";
   int max_proto_size = (1 << 24) - 1;  // 16 MiB
@@ -290,7 +290,7 @@ static void BM_ComputeChecksumWithCachedChecksum(benchmark::State& state) {
 }
 BENCHMARK(BM_ComputeChecksumWithCachedChecksum);
 
-static void BM_ComputeChecksumOnlyForTail(benchmark::State& state) {
+void BM_ComputeChecksumOnlyForTail(benchmark::State& state) {
   const Filesystem filesystem;
   const std::string file_path = GetTestTempDir() + "/proto.log";
   int max_proto_size = (1 << 24) - 1;  // 16 MiB
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
index b5fee4b..795271a 100644
--- a/icing/file/portable-file-backed-proto-log_test.cc
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -851,11 +851,12 @@ TEST_F(PortableFileBackedProtoLogTest, Iterator) {
 
   {
     // Iterator with bad filesystem
+    ScopedFd sfd(filesystem_.OpenForRead(file_path_.c_str()));
     MockFilesystem mock_filesystem;
-    ON_CALL(mock_filesystem, GetFileSize(A<const char*>()))
+    ON_CALL(mock_filesystem, GetFileSize(A<int>()))
         .WillByDefault(Return(Filesystem::kBadFileSize));
     PortableFileBackedProtoLog<DocumentProto>::Iterator bad_iterator(
-        mock_filesystem, file_path_, /*initial_offset=*/0);
+        mock_filesystem, sfd.get(), /*initial_offset=*/0);
     ASSERT_THAT(bad_iterator.Advance(),
                 StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
   }
diff --git a/icing/icing-search-engine-with-icu-file_test.cc b/icing/icing-search-engine-with-icu-file_test.cc
index 48e81e5..1012b47 100644
--- a/icing/icing-search-engine-with-icu-file_test.cc
+++ b/icing/icing-search-engine-with-icu-file_test.cc
@@ -37,13 +37,13 @@ namespace {
 using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::Eq;
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
-    PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
+    PropertyConfigProto::Cardinality::REQUIRED;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
 
-constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
 
 std::string GetTestBaseDir() {
   return GetTestTempDir() + "/icing_with_icu_files";
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 9aa833b..952ba21 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -18,6 +18,7 @@
 #include <memory>
 #include <string>
 #include <string_view>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -59,6 +60,7 @@
 #include "icing/scoring/scoring-processor.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
+#include "icing/store/namespace-checker-impl.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/transform/normalizer-factory.h"
@@ -87,17 +89,22 @@ constexpr std::string_view kOptimizeStatusFilename = "optimize_status";
 // fresh state.
 constexpr int kMaxUnsuccessfulInitAttempts = 5;
 
-libtextclassifier3::Status ValidateOptions(
-    const IcingSearchEngineOptions& options) {
-  // These options are only used in IndexProcessor, which won't be created
-  // until the first Put call. So they must be checked here, so that any
-  // errors can be surfaced in Initialize.
-  if (options.max_tokens_per_doc() <= 0) {
-    return absl_ports::InvalidArgumentError(
-        "Options::max_tokens_per_doc must be greater than zero.");
+// A pair that holds namespace and type.
+struct NamespaceTypePair {
+  std::string namespace_;
+  std::string type;
+
+  bool operator==(const NamespaceTypePair& other) const {
+    return namespace_ == other.namespace_ && type == other.type;
   }
-  return libtextclassifier3::Status::OK;
-}
+};
+
+struct NamespaceTypePairHasher {
+  std::size_t operator()(const NamespaceTypePair& pair) const {
+    return std::hash<std::string>()(pair.namespace_) ^
+           std::hash<std::string>()(pair.type);
+  }
+};
 
 libtextclassifier3::Status ValidateResultSpec(
     const ResultSpecProto& result_spec) {
@@ -142,6 +149,11 @@ libtextclassifier3::Status ValidateSuggestionSpec(
     return absl_ports::InvalidArgumentError(
         absl_ports::StrCat("SuggestionSpecProto.prefix is empty!"));
   }
+  if (suggestion_spec.scoring_spec().scoring_match_type() ==
+      TermMatchType::UNKNOWN) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("SuggestionSpecProto.term_match_type is unknown!"));
+  }
   if (suggestion_spec.num_to_return() <= 0) {
     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
         "SuggestionSpecProto.num_to_return must be positive."));
@@ -261,6 +273,28 @@ void TransformStatus(const libtextclassifier3::Status& internal_status,
   status_proto->set_message(internal_status.error_message());
 }
 
+libtextclassifier3::Status RetrieveAndAddDocumentInfo(
+    const DocumentStore* document_store, DeleteByQueryResultProto& result_proto,
+    std::unordered_map<NamespaceTypePair,
+                       DeleteByQueryResultProto::DocumentGroupInfo*,
+                       NamespaceTypePairHasher>& info_map,
+    DocumentId document_id) {
+  ICING_ASSIGN_OR_RETURN(DocumentProto document,
+                         document_store->Get(document_id));
+  NamespaceTypePair key = {document.namespace_(), document.schema()};
+  auto iter = info_map.find(key);
+  if (iter == info_map.end()) {
+    auto entry = result_proto.add_deleted_documents();
+    entry->set_namespace_(std::move(document.namespace_()));
+    entry->set_schema(std::move(document.schema()));
+    entry->add_uris(std::move(document.uri()));
+    info_map[key] = entry;
+  } else {
+    iter->second->add_uris(std::move(document.uri()));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
 }  // namespace
 
 IcingSearchEngine::IcingSearchEngine(const IcingSearchEngineOptions& options,
@@ -399,7 +433,6 @@ InitializeResultProto IcingSearchEngine::InternalInitialize() {
 libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
     InitializeStatsProto* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(initialize_stats);
-  ICING_RETURN_IF_ERROR(ValidateOptions(options_));
 
   // Make sure the base directory exists
   if (!filesystem_->CreateDirectoryRecursively(options_.base_dir().c_str())) {
@@ -450,8 +483,6 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
     // last tried to set the schema.
     ICING_RETURN_IF_ERROR(InitializeDocumentStore(
         /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
-    initialize_stats->set_document_store_recovery_cause(
-        InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
 
     // We're going to need to build the index from scratch. So just delete its
     // files now.
@@ -941,7 +972,7 @@ DeleteResultProto IcingSearchEngine::Delete(const std::string_view name_space,
   delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SINGLE);
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = document_store_->Delete(name_space, uri);
   if (!status.ok()) {
@@ -975,7 +1006,7 @@ DeleteByNamespaceResultProto IcingSearchEngine::DeleteByNamespace(
   delete_stats->set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   DocumentStore::DeleteByGroupResult doc_store_result =
       document_store_->DeleteByNamespace(name_space);
@@ -1009,7 +1040,7 @@ DeleteBySchemaTypeResultProto IcingSearchEngine::DeleteBySchemaType(
   delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
 
   std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   DocumentStore::DeleteByGroupResult doc_store_result =
       document_store_->DeleteBySchemaType(schema_type);
@@ -1027,7 +1058,7 @@ DeleteBySchemaTypeResultProto IcingSearchEngine::DeleteBySchemaType(
 }
 
 DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
-    const SearchSpecProto& search_spec) {
+    const SearchSpecProto& search_spec, bool return_deleted_document_info) {
   ICING_VLOG(1) << "Deleting documents for query " << search_spec.query()
                 << " from doc store";
 
@@ -1081,12 +1112,27 @@ DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
 
   ICING_VLOG(2) << "Deleting the docs that matched the query.";
   int num_deleted = 0;
+  // A map used to group deleted documents.
+  // From the (namespace, type) pair to a list of uris.
+  std::unordered_map<NamespaceTypePair,
+                     DeleteByQueryResultProto::DocumentGroupInfo*,
+                     NamespaceTypePairHasher>
+      deleted_info_map;
 
   component_timer = clock_->GetNewTimer();
   while (query_results.root_iterator->Advance().ok()) {
     ICING_VLOG(3) << "Deleting doc "
                   << query_results.root_iterator->doc_hit_info().document_id();
     ++num_deleted;
+    if (return_deleted_document_info) {
+      status = RetrieveAndAddDocumentInfo(
+          document_store_.get(), result_proto, deleted_info_map,
+          query_results.root_iterator->doc_hit_info().document_id());
+      if (!status.ok()) {
+        TransformStatus(status, result_status);
+        return result_proto;
+      }
+    }
     status = document_store_->Delete(
         query_results.root_iterator->doc_hit_info().document_id());
     if (!status.ok()) {
@@ -1155,12 +1201,8 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
   std::unique_ptr<Timer> optimize_timer = clock_->GetNewTimer();
   OptimizeStatsProto* optimize_stats = result_proto.mutable_optimize_stats();
   int64_t before_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
-  if (before_size != Filesystem::kBadFileSize) {
-    optimize_stats->set_storage_size_before(before_size);
-  } else {
-    // Set -1 as a sentinel value when failures occur.
-    optimize_stats->set_storage_size_before(-1);
-  }
+  optimize_stats->set_storage_size_before(
+      Filesystem::SanitizeFileSize(before_size));
 
   // Flushes data to disk before doing optimization
   auto status = InternalPersistToDisk(PersistType::FULL);
@@ -1237,12 +1279,8 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
   optimize_status_file.Write(std::move(optimize_status));
 
   int64_t after_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
-  if (after_size != Filesystem::kBadFileSize) {
-    optimize_stats->set_storage_size_after(after_size);
-  } else {
-    // Set -1 as a sentinel value when failures occur.
-    optimize_stats->set_storage_size_after(-1);
-  }
+  optimize_stats->set_storage_size_after(
+      Filesystem::SanitizeFileSize(after_size));
   optimize_stats->set_latency_ms(optimize_timer->GetElapsedMilliseconds());
 
   TransformStatus(optimization_status, result_status);
@@ -1324,11 +1362,8 @@ StorageInfoResultProto IcingSearchEngine::GetStorageInfo() {
   }
 
   int64_t index_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
-  if (index_size != Filesystem::kBadFileSize) {
-    result.mutable_storage_info()->set_total_storage_size(index_size);
-  } else {
-    result.mutable_storage_info()->set_total_storage_size(-1);
-  }
+  result.mutable_storage_info()->set_total_storage_size(
+      Filesystem::SanitizeFileSize(index_size));
   *result.mutable_storage_info()->mutable_document_storage_info() =
       document_store_->GetStorageInfo();
   *result.mutable_storage_info()->mutable_schema_store_storage_info() =
@@ -1875,19 +1910,22 @@ SuggestionResponse IcingSearchEngine::SearchSuggestions(
   std::unique_ptr<SuggestionProcessor> suggestion_processor =
       std::move(suggestion_processor_or).ValueOrDie();
 
-  std::vector<NamespaceId> namespace_ids;
+  std::unordered_set<NamespaceId> namespace_ids;
   namespace_ids.reserve(suggestion_spec.namespace_filters_size());
   for (std::string_view name_space : suggestion_spec.namespace_filters()) {
     auto namespace_id_or = document_store_->GetNamespaceId(name_space);
     if (!namespace_id_or.ok()) {
       continue;
     }
-    namespace_ids.push_back(namespace_id_or.ValueOrDie());
+    namespace_ids.insert(namespace_id_or.ValueOrDie());
   }
 
   // Run suggestion based on given SuggestionSpec.
+  NamespaceCheckerImpl namespace_checker_impl(document_store_.get(),
+                                              std::move(namespace_ids));
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> terms_or =
-      suggestion_processor->QuerySuggestions(suggestion_spec, namespace_ids);
+      suggestion_processor->QuerySuggestions(suggestion_spec,
+                                             &namespace_checker_impl);
   if (!terms_or.ok()) {
     TransformStatus(terms_or.status(), response_status);
     return response;
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 0a79714..ff9c7fb 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -280,8 +280,9 @@ class IcingSearchEngine {
   //   NOT_FOUND if the query doesn't match any documents
   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
   //   INTERNAL_ERROR on IO error
-  DeleteByQueryResultProto DeleteByQuery(const SearchSpecProto& search_spec)
-      ICING_LOCKS_EXCLUDED(mutex_);
+  DeleteByQueryResultProto DeleteByQuery(
+      const SearchSpecProto& search_spec,
+      bool return_deleted_document_info = false) ICING_LOCKS_EXCLUDED(mutex_);
 
   // Retrieves, scores, ranks, and returns the results according to the specs.
   // Results can be empty. If there're multiple pages of results,
diff --git a/icing/icing-search-engine_fuzz_test.cc b/icing/icing-search-engine_fuzz_test.cc
index 2d07e37..bf486da 100644
--- a/icing/icing-search-engine_fuzz_test.cc
+++ b/icing/icing-search-engine_fuzz_test.cc
@@ -18,12 +18,12 @@
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/document-builder.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/icing-search-engine.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/schema-builder.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 
@@ -31,13 +31,13 @@ namespace icing {
 namespace lib {
 namespace {
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
-    PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
+    PropertyConfigProto::Cardinality::REQUIRED;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
 
-constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
 
 IcingSearchEngineOptions Setup() {
   IcingSearchEngineOptions icing_options;
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index b5206cd..7ed8885 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -27,7 +27,6 @@
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/mock-filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/legacy/index/icing-mock-filesystem.h"
 #include "icing/portable/endian.h"
 #include "icing/portable/equals-proto.h"
@@ -46,6 +45,7 @@
 #include "icing/store/document-log-creator.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/random-string.h"
 #include "icing/testing/snippet-helpers.h"
@@ -90,24 +90,24 @@ constexpr std::string_view kIpsumText =
     "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
     "placerat semper.";
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
-    PropertyConfigProto_Cardinality_Code_REQUIRED;
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
-    PropertyConfigProto_Cardinality_Code_REPEATED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
+    PropertyConfigProto::Cardinality::REQUIRED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
+    PropertyConfigProto::Cardinality::REPEATED;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
-    StringIndexingConfig_TokenizerType_Code_NONE;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_NONE =
+    StringIndexingConfig::TokenizerType::NONE;
 
 #ifndef ICING_JNI_TEST
-constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
 #endif  // !ICING_JNI_TEST
 
-constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
-constexpr TermMatchType_Code MATCH_NONE = TermMatchType_Code_UNKNOWN;
+constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
+constexpr TermMatchType::Code MATCH_NONE = TermMatchType::UNKNOWN;
 
 PortableFileBackedProtoLog<DocumentWrapper>::Header ReadDocumentLogHeader(
     Filesystem filesystem, const std::string& file_path) {
@@ -362,36 +362,6 @@ TEST_F(IcingSearchEngineTest, GoodIndexMergeSizeReturnsOk) {
   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
 }
 
-TEST_F(IcingSearchEngineTest,
-       NegativeMaxTokensPerDocSizeReturnsInvalidArgument) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  options.set_max_tokens_per_doc(-1);
-  IcingSearchEngine icing(options, GetTestJniCache());
-  EXPECT_THAT(icing.Initialize().status(),
-              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, ZeroMaxTokensPerDocSizeReturnsInvalidArgument) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  options.set_max_tokens_per_doc(0);
-  IcingSearchEngine icing(options, GetTestJniCache());
-  EXPECT_THAT(icing.Initialize().status(),
-              ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
-}
-
-TEST_F(IcingSearchEngineTest, GoodMaxTokensPerDocSizeReturnsOk) {
-  IcingSearchEngineOptions options = GetDefaultIcingOptions();
-  // INT_MAX is valid - it just means that we shouldn't limit the number of
-  // tokens per document. It would be pretty inconceivable that anyone would
-  // produce such a document - the text being indexed alone would take up at
-  // least ~4.3 GiB! - and the document would be rejected before indexing
-  // for exceeding max_document_size, but there's no reason to explicitly
-  // bar it.
-  options.set_max_tokens_per_doc(std::numeric_limits<int32_t>::max());
-  IcingSearchEngine icing(options, GetTestJniCache());
-  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-}
-
 TEST_F(IcingSearchEngineTest, NegativeMaxTokenLenReturnsInvalidArgument) {
   IcingSearchEngineOptions options = GetDefaultIcingOptions();
   options.set_max_token_length(-1);
@@ -2198,7 +2168,7 @@ TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) {
   search_spec.set_query("message");
 
   ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
 
@@ -2616,7 +2586,7 @@ TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
 
   ResultSpecProto result_spec;
   result_spec.set_num_per_page(2);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
 
@@ -3523,6 +3493,105 @@ TEST_F(IcingSearchEngineTest, DeleteByQuery) {
                                        expected_search_result_proto));
 }
 
+TEST_F(IcingSearchEngineTest, DeleteByQueryReturnInfo) {
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body1")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body2")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace2", "uri3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body3")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .Build();
+
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetTimerElapsedMilliseconds(7);
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::make_unique<IcingFilesystem>(),
+                              std::move(fake_clock), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document1;
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document2;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  *expected_get_result_proto.mutable_document() = document3;
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
+
+  // Delete all docs to test the information is correctly grouped.
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  DeleteByQueryResultProto result_proto =
+      icing.DeleteByQuery(search_spec, true);
+  EXPECT_THAT(result_proto.status(), ProtoIsOk());
+  DeleteByQueryStatsProto exp_stats;
+  exp_stats.set_latency_ms(7);
+  exp_stats.set_num_documents_deleted(3);
+  exp_stats.set_query_length(search_spec.query().length());
+  exp_stats.set_num_terms(1);
+  exp_stats.set_num_namespaces_filtered(0);
+  exp_stats.set_num_schema_types_filtered(0);
+  exp_stats.set_parse_query_latency_ms(7);
+  exp_stats.set_document_removal_latency_ms(7);
+  EXPECT_THAT(result_proto.delete_by_query_stats(), EqualsProto(exp_stats));
+
+  // Check that DeleteByQuery can return information for deleted documents.
+  DeleteByQueryResultProto::DocumentGroupInfo info1, info2;
+  info1.set_namespace_("namespace1");
+  info1.set_schema("Message");
+  info1.add_uris("uri1");
+  info2.set_namespace_("namespace2");
+  info2.set_schema("Message");
+  info2.add_uris("uri3");
+  info2.add_uris("uri2");
+  EXPECT_THAT(result_proto.deleted_documents(),
+              UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2)));
+
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance())
+          .status()
+          .code(),
+      Eq(StatusProto::NOT_FOUND));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance())
+          .status()
+          .code(),
+      Eq(StatusProto::NOT_FOUND));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri3", GetResultSpecProto::default_instance())
+          .status()
+          .code(),
+      Eq(StatusProto::NOT_FOUND));
+}
+
 TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) {
   DocumentProto document1 =
       DocumentBuilder()
@@ -6048,7 +6117,7 @@ TEST_F(IcingSearchEngineTest, SnippetNormalization) {
   search_spec.set_query("mdi Zürich");
 
   ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
 
@@ -6111,7 +6180,7 @@ TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) {
   search_spec.set_query("md Zür");
 
   ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(2);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
 
@@ -6166,7 +6235,7 @@ TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) {
   search_spec.set_query("body:Zür");
 
   ResultSpecProto result_spec;
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(10);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(10);
 
@@ -7694,7 +7763,7 @@ TEST_F(IcingSearchEngineTest, QueryStatsProtoTest) {
 
   ResultSpecProto result_spec;
   result_spec.set_num_per_page(2);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(64);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
 
@@ -7905,7 +7974,7 @@ TEST_F(IcingSearchEngineTest, SnippetErrorTest) {
   ResultSpecProto result_spec;
   result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(3);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(4);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(4);
   SearchResultProto search_results =
       icing.Search(search_spec, scoring_spec, result_spec);
 
@@ -8110,6 +8179,8 @@ TEST_F(IcingSearchEngineTest, SearchSuggestionsTest) {
   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("t");
   suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
 
   // Query all suggestions, and they will be ranked.
   SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
@@ -8130,6 +8201,316 @@ TEST_F(IcingSearchEngineTest, SearchSuggestionsTest) {
   ASSERT_THAT(response.suggestions().at(2).query(), "termfour");
 }
 
+TEST_F(IcingSearchEngineTest,
+       SearchSuggestionsTest_ShouldReturnInOneNamespace) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFoo;
+  suggestionFoo.set_query("foo");
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+
+  // namespace1 has 2 results.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.add_namespace_filters("namespace1");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFoo),
+                                   EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineTest,
+       SearchSuggestionsTest_ShouldReturnInMultipleNamespace) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fo")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "foo")
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace3", "uri3")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFoo;
+  suggestionFoo.set_query("foo");
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+
+  // namespace2 and namespace3 has 2 results.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.add_namespace_filters("namespace2");
+  suggestion_spec.add_namespace_filters("namespace3");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFoo),
+                                   EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineTest,
+       SearchSuggestionsTest_OtherNamespaceDontContributeToHitCount) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // Index 4 documents,
+  // namespace1 has 2 hit2 for term one
+  // namespace2 has 2 hit2 for term two and 1 hit for term one.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "termone")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace1", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "termone")
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "termone termtwo")
+                                .Build();
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace2", "uri3")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "termtwo")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionTermOne;
+  suggestionTermOne.set_query("termone");
+  SuggestionResponse::Suggestion suggestionTermTwo;
+  suggestionTermTwo.set_query("termtwo");
+
+  // only search suggestion for namespace2. The correctly order should be
+  // {"termtwo", "termone"}. If we're not filtering out namespace1 when
+  // calculating our score, then it will be {"termone", "termtwo"}.
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("t");
+  suggestion_spec.add_namespace_filters("namespace2");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              ElementsAre(EqualsProto(suggestionTermTwo),
+                          EqualsProto(suggestionTermOne)));
+}
+
+TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_DeletionTest) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(10)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+  SuggestionResponse::Suggestion suggestionFool;
+  suggestionFool.set_query("fool");
+
+  // namespace1 has this suggestion
+  SuggestionSpecProto suggestion_spec;
+  suggestion_spec.set_prefix("f");
+  suggestion_spec.add_namespace_filters("namespace1");
+  suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
+
+  SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+  // namespace2 has this suggestion
+  suggestion_spec.clear_namespace_filters();
+  suggestion_spec.add_namespace_filters("namespace2");
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+  // delete document from namespace 1
+  EXPECT_THAT(icing.Delete("namespace1", "uri1").status(), ProtoIsOk());
+
+  // Now namespace1 will return empty
+  suggestion_spec.clear_namespace_filters();
+  suggestion_spec.add_namespace_filters("namespace1");
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(), IsEmpty());
+
+  // namespace2 still has this suggestion, so we can prove the reason of
+  // namespace 1 cannot find it is we filter it out, not it doesn't exist.
+  suggestion_spec.add_namespace_filters("namespace2");
+  response = icing.SearchSuggestions(suggestion_spec);
+  ASSERT_THAT(response.status(), ProtoIsOk());
+  ASSERT_THAT(response.suggestions(),
+              UnorderedElementsAre(EqualsProto(suggestionFool)));
+}
+
+TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_ExpiredTest) {
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "uri1")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(100)
+                                .SetTtlMs(500)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "uri2")
+                                .SetSchema("Email")
+                                .SetCreationTimestampMs(100)
+                                .SetTtlMs(1000)
+                                .AddStringProperty("subject", "fool")
+                                .Build();
+  {
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetSystemTimeMilliseconds(400);
+
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+                ProtoIsOk());
+
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+    SuggestionResponse::Suggestion suggestionFool;
+    suggestionFool.set_query("fool");
+
+    // namespace1 has this suggestion
+    SuggestionSpecProto suggestion_spec;
+    suggestion_spec.set_prefix("f");
+    suggestion_spec.add_namespace_filters("namespace1");
+    suggestion_spec.set_num_to_return(10);
+    suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+        TermMatchType::PREFIX);
+
+    SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+    ASSERT_THAT(response.status(), ProtoIsOk());
+    ASSERT_THAT(response.suggestions(),
+                UnorderedElementsAre(EqualsProto(suggestionFool)));
+
+    // namespace2 has this suggestion
+    suggestion_spec.clear_namespace_filters();
+    suggestion_spec.add_namespace_filters("namespace2");
+    response = icing.SearchSuggestions(suggestion_spec);
+    ASSERT_THAT(response.status(), ProtoIsOk());
+    ASSERT_THAT(response.suggestions(),
+                UnorderedElementsAre(EqualsProto(suggestionFool)));
+  }
+  // We reinitialize here so we can feed in a fake clock this time
+  {
+    // Time needs to be past document1 creation time (100) + ttl (500) for it
+    // to count as "expired". document2 is not expired since its ttl is 1000.
+    auto fake_clock = std::make_unique<FakeClock>();
+    fake_clock->SetSystemTimeMilliseconds(800);
+
+    TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                                std::make_unique<Filesystem>(),
+                                std::make_unique<IcingFilesystem>(),
+                                std::move(fake_clock), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    SuggestionSpecProto suggestion_spec;
+    suggestion_spec.set_prefix("f");
+    suggestion_spec.add_namespace_filters("namespace1");
+    suggestion_spec.set_num_to_return(10);
+    suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+        TermMatchType::PREFIX);
+
+    // Now namespace1 will return empty
+    suggestion_spec.clear_namespace_filters();
+    suggestion_spec.add_namespace_filters("namespace1");
+    SuggestionResponse response = icing.SearchSuggestions(suggestion_spec);
+    ASSERT_THAT(response.status(), ProtoIsOk());
+    ASSERT_THAT(response.suggestions(), IsEmpty());
+
+    // namespace2 still has this suggestion
+    SuggestionResponse::Suggestion suggestionFool;
+    suggestionFool.set_query("fool");
+
+    suggestion_spec.add_namespace_filters("namespace2");
+    response = icing.SearchSuggestions(suggestion_spec);
+    ASSERT_THAT(response.status(), ProtoIsOk());
+    ASSERT_THAT(response.suggestions(),
+                UnorderedElementsAre(EqualsProto(suggestionFool)));
+  }
+}
+
 TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_emptyPrefix) {
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -8137,6 +8518,8 @@ TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_emptyPrefix) {
   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("");
   suggestion_spec.set_num_to_return(10);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
 
   ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
@@ -8149,6 +8532,8 @@ TEST_F(IcingSearchEngineTest, SearchSuggestionsTest_NonPositiveNumToReturn) {
   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("prefix");
   suggestion_spec.set_num_to_return(0);
+  suggestion_spec.mutable_scoring_spec()->set_scoring_match_type(
+      TermMatchType::PREFIX);
 
   ASSERT_THAT(icing.SearchSuggestions(suggestion_spec).status(),
               ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
@@ -8203,7 +8588,7 @@ TEST_F(IcingSearchEngineTest, MigrateToPortableFileBackedProtoLog) {
   EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
               Eq(InitializeStatsProto::NO_DATA_LOSS));
   EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
-              Eq(InitializeStatsProto::NONE));
+              Eq(InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT));
   EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
               Eq(InitializeStatsProto::NONE));
   EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 1aae732..207c033 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -73,9 +73,23 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
                      section.metadata.term_match_type, /*namespace_id=*/0);
     for (std::string_view token : section.token_sequence) {
       ++num_tokens;
-      std::string term = normalizer_.NormalizeTerm(token);
-      // Add this term to Hit buffer.
-      status = editor.BufferTerm(term.c_str());
+
+      switch (section.metadata.tokenizer) {
+        case StringIndexingConfig::TokenizerType::VERBATIM:
+          // data() is safe to use here because a token created from the
+          // VERBATIM tokenizer is the entire string value. The character at
+          // data() + token.length() is guaranteed to be a null char.
+          status = editor.BufferTerm(token.data());
+          break;
+        case StringIndexingConfig::TokenizerType::NONE:
+          ICING_LOG(WARNING)
+              << "Unexpected TokenizerType::NONE found when indexing document.";
+          [[fallthrough]];
+        case StringIndexingConfig::TokenizerType::PLAIN:
+          std::string normalized_term = normalizer_.NormalizeTerm(token);
+          status = editor.BufferTerm(normalized_term.c_str());
+      }
+
       if (!status.ok()) {
         // We've encountered a failure. Bail out. We'll mark this doc as deleted
         // and signal a failure to the client.
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index c4b77b5..269e41c 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -69,8 +69,6 @@ class IndexProcessor {
   IndexProcessor(const Normalizer* normalizer, Index* index, const Clock* clock)
       : normalizer_(*normalizer), index_(index), clock_(*clock) {}
 
-  std::string NormalizeToken(const Token& token);
-
   const Normalizer& normalizer_;
   Index* const index_;
   const Clock& clock_;
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 6e072c7..1aad7d0 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -16,7 +16,6 @@
 #include "gmock/gmock.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/index/index-processor.h"
 #include "icing/index/index.h"
 #include "icing/legacy/core/icing-string-util.h"
@@ -24,6 +23,7 @@
 #include "icing/schema/schema-util.h"
 #include "icing/schema/section-manager.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 449bc3e..bd310de 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -30,7 +30,6 @@
 #include "icing/absl_ports/str_join.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/index.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
@@ -49,6 +48,7 @@
 #include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/random-string.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
@@ -90,6 +90,8 @@ constexpr std::string_view kRepeatedProperty = "repeated";
 constexpr std::string_view kSubProperty = "submessage";
 constexpr std::string_view kNestedType = "NestedType";
 constexpr std::string_view kNestedProperty = "nested";
+constexpr std::string_view kExactVerbatimProperty = "verbatimExact";
+constexpr std::string_view kPrefixedVerbatimProperty = "verbatimPrefixed";
 
 constexpr DocumentId kDocumentId0 = 0;
 constexpr DocumentId kDocumentId1 = 1;
@@ -98,6 +100,8 @@ constexpr SectionId kExactSectionId = 0;
 constexpr SectionId kPrefixedSectionId = 1;
 constexpr SectionId kRepeatedSectionId = 2;
 constexpr SectionId kNestedSectionId = 3;
+constexpr SectionId kExactVerbatimSectionId = 4;
+constexpr SectionId kPrefixedVerbatimSectionId = 5;
 
 using Cardinality = PropertyConfigProto::Cardinality;
 using DataType = PropertyConfigProto::DataType;
@@ -106,21 +110,23 @@ using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::Test;
 
-constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
-    PropertyConfigProto_DataType_Code_STRING;
-constexpr PropertyConfigProto_DataType_Code TYPE_BYTES =
-    PropertyConfigProto_DataType_Code_BYTES;
+constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
+    PropertyConfigProto::DataType::STRING;
+constexpr PropertyConfigProto::DataType::Code TYPE_BYTES =
+    PropertyConfigProto::DataType::BYTES;
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
-    PropertyConfigProto_Cardinality_Code_REPEATED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
+    PropertyConfigProto::Cardinality::REPEATED;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_VERBATIM =
+    StringIndexingConfig::TokenizerType::VERBATIM;
 
-constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
-constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
 
 class IndexProcessorTest : public Test {
  protected:
@@ -180,6 +186,16 @@ class IndexProcessorTest : public Test {
                             .SetCardinality(CARDINALITY_REPEATED))
                     .AddProperty(
                         PropertyConfigBuilder()
+                            .SetName(kExactVerbatimProperty)
+                            .SetDataTypeString(MATCH_EXACT, TOKENIZER_VERBATIM)
+                            .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(
+                        PropertyConfigBuilder()
+                            .SetName(kPrefixedVerbatimProperty)
+                            .SetDataTypeString(MATCH_PREFIX, TOKENIZER_VERBATIM)
+                            .SetCardinality(CARDINALITY_REPEATED))
+                    .AddProperty(
+                        PropertyConfigBuilder()
                             .SetName(kSubProperty)
                             .SetDataTypeDocument(
                                 kNestedType, /*index_nested_properties=*/true)
@@ -797,6 +813,95 @@ TEST_F(IndexProcessorTest, IndexingDocMergeFailureResets) {
   EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
 }
 
+TEST_F(IndexProcessorTest, ExactVerbatimProperty) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactVerbatimProperty),
+                             "Hello, world!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_tokens(), 1);
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("Hello, world!", kSectionIdMaskAll,
+                          TermMatchType::EXACT_ONLY));
+  std::vector<DocHitInfo> hits = GetHits(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
+      {kExactVerbatimSectionId, 1}};
+
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expectedMap)));
+}
+
+TEST_F(IndexProcessorTest, PrefixVerbatimProperty) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPrefixedVerbatimProperty),
+                             "Hello, world!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_tokens(), 1);
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  // We expect to match the document we indexed as "Hello, w" is a prefix
+  // of "Hello, world!"
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("Hello, w", kSectionIdMaskAll,
+                                                 TermMatchType::PREFIX));
+  std::vector<DocHitInfo> hits = GetHits(std::move(itr));
+  std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
+      {kPrefixedVerbatimSectionId, 1}};
+
+  EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+                        kDocumentId0, expectedMap)));
+}
+
+TEST_F(IndexProcessorTest, VerbatimPropertyDoesntMatchSubToken) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPrefixedVerbatimProperty),
+                             "Hello, world!")
+          .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(tokenized_document.num_tokens(), 1);
+
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("world", kSectionIdMaskAll, TermMatchType::PREFIX));
+  std::vector<DocHitInfo> hits = GetHits(std::move(itr));
+
+  // We should not have hits for term "world" as the index processor should
+  // create a sole token "Hello, world! for the document.
+  EXPECT_THAT(hits, IsEmpty());
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 1bdab21..02ba699 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -71,24 +71,6 @@ IcingDynamicTrie::Options GetMainLexiconOptions() {
   return IcingDynamicTrie::Options();
 }
 
-// Helper function to check if a term is in the given namespaces.
-// TODO(tjbarron): Implement a method PropertyReadersAll.HasAnyProperty().
-bool IsTermInNamespaces(
-    const IcingDynamicTrie::PropertyReadersAll& property_reader,
-    uint32_t value_index, const std::vector<NamespaceId>& namespace_ids) {
-  if (namespace_ids.empty()) {
-    return true;
-  }
-  for (NamespaceId namespace_id : namespace_ids) {
-    if (property_reader.HasProperty(GetNamespacePropertyId(namespace_id),
-                                    value_index)) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
 enum class MergeAction { kTakeLiteTerm, kTakeMainTerm, kMergeTerms };
 
 // Merge the TermMetadata from lite index and main index. If the term exists in
@@ -137,7 +119,7 @@ std::vector<TermMetadata> MergeAndRankTermMetadatas(
         int total_est_hit_count =
             lite_term_itr->hit_count + main_term_itr->hit_count;
         PushToTermHeap(TermMetadata(std::move(lite_term_itr->content),
-                         total_est_hit_count),
+                                    total_est_hit_count),
                        num_to_return, merged_term_metadata_heap);
         ++lite_term_itr;
         ++main_term_itr;
@@ -228,32 +210,26 @@ Index::GetIterator(const std::string& term, SectionIdMask section_id_mask,
 
 libtextclassifier3::StatusOr<std::vector<TermMetadata>>
 Index::FindLiteTermsByPrefix(const std::string& prefix,
-                             const std::vector<NamespaceId>& namespace_ids) {
+                             const NamespaceChecker* namespace_checker) {
   // Finds all the terms that start with the given prefix in the lexicon.
   IcingDynamicTrie::Iterator term_iterator(lite_index_->lexicon(),
                                            prefix.c_str());
 
-  // A property reader to help check if a term has some property.
-  IcingDynamicTrie::PropertyReadersAll property_reader(lite_index_->lexicon());
-
   std::vector<TermMetadata> term_metadata_list;
   while (term_iterator.IsValid()) {
     uint32_t term_value_index = term_iterator.GetValueIndex();
 
-    // Skips the terms that don't exist in the given namespaces. We won't skip
-    // any terms if namespace_ids is empty.
-    if (!IsTermInNamespaces(property_reader, term_value_index, namespace_ids)) {
-      term_iterator.Advance();
-      continue;
-    }
-
     ICING_ASSIGN_OR_RETURN(
         uint32_t term_id,
         term_id_codec_->EncodeTvi(term_value_index, TviType::LITE),
         absl_ports::InternalError("Failed to access terms in lexicon."));
-
-    term_metadata_list.emplace_back(term_iterator.GetKey(),
-                                    lite_index_->CountHits(term_id));
+    ICING_ASSIGN_OR_RETURN(int hit_count,
+                           lite_index_->CountHits(term_id, namespace_checker));
+    if (hit_count > 0) {
+      // There is at least one document in the given namespace has this term.
+      term_metadata_list.push_back(
+          TermMetadata(term_iterator.GetKey(), hit_count));
+    }
 
     term_iterator.Advance();
   }
@@ -261,21 +237,20 @@ Index::FindLiteTermsByPrefix(const std::string& prefix,
 }
 
 libtextclassifier3::StatusOr<std::vector<TermMetadata>>
-Index::FindTermsByPrefix(const std::string& prefix,
-                         const std::vector<NamespaceId>& namespace_ids,
-                         int num_to_return) {
+Index::FindTermsByPrefix(const std::string& prefix, int num_to_return,
+                         TermMatchType::Code term_match_type,
+                         const NamespaceChecker* namespace_checker) {
   std::vector<TermMetadata> term_metadata_list;
   if (num_to_return <= 0) {
     return term_metadata_list;
   }
-
   // Get results from the LiteIndex.
   ICING_ASSIGN_OR_RETURN(std::vector<TermMetadata> lite_term_metadata_list,
-                         FindLiteTermsByPrefix(prefix, namespace_ids));
+                         FindLiteTermsByPrefix(prefix, namespace_checker));
   // Append results from the MainIndex.
   ICING_ASSIGN_OR_RETURN(std::vector<TermMetadata> main_term_metadata_list,
-                         main_index_->FindTermsByPrefix(prefix, namespace_ids));
-
+                         main_index_->FindTermsByPrefix(prefix, term_match_type,
+                                                        namespace_checker));
   return MergeAndRankTermMetadatas(std::move(lite_term_metadata_list),
                                    std::move(main_term_metadata_list),
                                    num_to_return);
@@ -284,11 +259,7 @@ Index::FindTermsByPrefix(const std::string& prefix,
 IndexStorageInfoProto Index::GetStorageInfo() const {
   IndexStorageInfoProto storage_info;
   int64_t directory_size = filesystem_->GetDiskUsage(options_.base_dir.c_str());
-  if (directory_size != Filesystem::kBadFileSize) {
-    storage_info.set_index_size(directory_size);
-  } else {
-    storage_info.set_index_size(-1);
-  }
+  storage_info.set_index_size(Filesystem::SanitizeFileSize(directory_size));
   storage_info = lite_index_->GetStorageInfo(std::move(storage_info));
   return main_index_->GetStorageInfo(std::move(storage_info));
 }
diff --git a/icing/index/index.h b/icing/index/index.h
index 693cf04..5c53349 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -32,10 +32,12 @@
 #include "icing/index/term-id-codec.h"
 #include "icing/index/term-metadata.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
+#include "icing/store/namespace-checker.h"
 #include "icing/store/namespace-id.h"
 #include "icing/util/crc32.h"
 
@@ -142,9 +144,14 @@ class Index {
   //                 index.
   // verbosity > 0, more detailed debug information including raw postings
   //                lists.
-  void GetDebugInfo(int verbosity, std::string* out) const {
-    lite_index_->GetDebugInfo(verbosity, out);
-    main_index_->GetDebugInfo(verbosity, out);
+  IndexDebugInfoProto GetDebugInfo(int verbosity) const {
+    IndexDebugInfoProto debug_info;
+    *debug_info.mutable_index_storage_info() = GetStorageInfo();
+    *debug_info.mutable_lite_index_info() =
+        lite_index_->GetDebugInfo(verbosity);
+    *debug_info.mutable_main_index_info() =
+        main_index_->GetDebugInfo(verbosity);
+    return debug_info;
   }
 
   // Returns the byte size of the all the elements held in the index. This
@@ -181,17 +188,17 @@ class Index {
       TermMatchType::Code term_match_type);
 
   // Finds terms with the given prefix in the given namespaces. If
-  // 'namespace_ids' is empty, returns results from all the namespaces. The
-  // input prefix must be normalized, otherwise inaccurate results may be
-  // returned. Results are not sorted specifically and are in their original
-  // order. Number of results are no more than 'num_to_return'.
+  // 'namespace_ids' is empty, returns results from all the namespaces. Results
+  // are sorted in decreasing order of hit count. Number of results are no more
+  // than 'num_to_return'.
   //
   // Returns:
   //   A list of TermMetadata on success
   //   INTERNAL_ERROR if failed to access term data.
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix(
-      const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
-      int num_to_return);
+      const std::string& prefix, int num_to_return,
+      TermMatchType::Code term_match_type,
+      const NamespaceChecker* namespace_checker);
 
   // A class that can be used to add hits to the index.
   //
@@ -267,7 +274,7 @@ class Index {
         filesystem_(filesystem) {}
 
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindLiteTermsByPrefix(
-      const std::string& prefix, const std::vector<NamespaceId>& namespace_ids);
+      const std::string& prefix, const NamespaceChecker* namespace_checker);
 
   std::unique_ptr<LiteIndex> lite_index_;
   std::unique_ptr<MainIndex> main_index_;
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 00d5ad6..8355c01 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -31,10 +31,12 @@
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
+#include "icing/testing/always-true-namespace-checker-impl.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/random-string.h"
 #include "icing/testing/tmp-directory.h"
@@ -89,22 +91,9 @@ constexpr DocumentId kDocumentId5 = 5;
 constexpr DocumentId kDocumentId6 = 6;
 constexpr DocumentId kDocumentId7 = 7;
 constexpr DocumentId kDocumentId8 = 8;
-constexpr DocumentId kDocumentId9 = 9;
-constexpr DocumentId kDocumentId10 = 10;
-constexpr DocumentId kDocumentId11 = 11;
-constexpr DocumentId kDocumentId12 = 12;
 constexpr SectionId kSectionId2 = 2;
 constexpr SectionId kSectionId3 = 3;
 
-// The value returned by IndexBlock::ApproximateFullPostingListHitsForBlock(
-//    GetBlockSize(),
-//    GetPostingListIndexBits(posting_list_utils::min_posting_list_size()));
-constexpr int kMinSizePlApproxHits = 3;
-// The value returned by IndexBlock::ApproximateFullPostingListHitsForBlock(
-//    GetBlockSize(),
-//    GetPostingListIndexBits(2 * posting_list_utils::min_posting_list_size()));
-constexpr int kSecondSmallestPlApproxHits = 7;
-
 std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
   std::vector<DocHitInfo> infos;
   while (iterator->Advance().ok()) {
@@ -920,148 +909,82 @@ TEST_F(IndexTest, InvalidHitBufferSize) {
 TEST_F(IndexTest, FindTermByPrefixShouldReturnEmpty) {
   Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
                                     TermMatchType::PREFIX, /*namespace_id=*/0);
+  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/0),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*num_to_return=*/0,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(IsEmpty()));
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/-1),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo",
+                                        /*num_to_return=*/-1,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(IsEmpty()));
 
   ICING_ASSERT_OK(index_->Merge());
 
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/0),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo",
+                                        /*num_to_return=*/0,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(IsEmpty()));
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/-1),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo",
+                                        /*num_to_return=*/-1,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(IsEmpty()));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectResult) {
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   // "b" should only match "bar" but not "foo".
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b",
+                                        /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
 
   ICING_ASSERT_OK(index_->Merge());
 
   // "b" should only match "bar" but not "foo".
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("bar", kMinSizePlApproxHits))));
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b",
+                                        /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
+              IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldRespectNumToReturn) {
   Index::Editor edit = index_->Edit(
       kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit.BufferTerm("fo"), IsOk());
   EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   // We have 3 results but only 2 should be returned.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/2),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
+                                        /*num_to_return=*/2,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(SizeIs(2)));
 
   ICING_ASSERT_OK(index_->Merge());
 
   // We have 3 results but only 2 should be returned.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/2),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
+                                        /*num_to_return=*/2,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(SizeIs(2)));
 }
 
-TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInOneNamespace) {
-  Index::Editor edit1 =
-      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
-  EXPECT_THAT(edit1.BufferTerm("foo"), IsOk());
-  EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
-
-  Index::Editor edit2 =
-      index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/1);
-  EXPECT_THAT(edit2.BufferTerm("fool"), IsOk());
-  EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
-
-  // namespace with id 0 has 2 results.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
-                                                EqualsTermMetadata("foo", 1))));
-  // namespace with id 1 has 1 result.
-  EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1},
-                                /*num_to_return=*/10),
-      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fool", 1))));
-
-  ICING_ASSERT_OK(index_->Merge());
-
-  // namespace with id 0 has 2 results.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("fo", kMinSizePlApproxHits),
-                  EqualsTermMetadata("foo", kMinSizePlApproxHits))));
-  // namespace with id 1 has 1 result.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("fool", kMinSizePlApproxHits))));
-}
-
-TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInMultipleNamespaces) {
-  Index::Editor edit1 =
-      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
-  EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
-
-  Index::Editor edit2 =
-      index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/1);
-  EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
-  EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
-
-  Index::Editor edit3 =
-      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/2);
-  EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
-  EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
-
-  // Should return "foo" and "fool" which are in namespaces with ids 1 and 2.
-  EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1, 2},
-                                /*num_to_return=*/10),
-      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
-                                        EqualsTermMetadata("fool", 1))));
-
-  ICING_ASSERT_OK(index_->Merge());
-
-  EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1, 2},
-                                /*num_to_return=*/10),
-      IsOkAndHolds(UnorderedElementsAre(
-          EqualsTermMetadata("foo", kMinSizePlApproxHits),
-          EqualsTermMetadata("fool", kMinSizePlApproxHits))));
-}
-
 TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
   Index::Editor edit1 =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
+  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
   EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
 
@@ -1078,8 +1001,9 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
   EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
 
   // Should return "fo", "foo" and "fool" across all namespaces.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{},
-                                        /*num_to_return=*/10),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
+                                        /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(UnorderedElementsAre(
                   EqualsTermMetadata("fo", 1), EqualsTermMetadata("foo", 1),
                   EqualsTermMetadata("fool", 1))));
@@ -1087,18 +1011,19 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
   ICING_ASSERT_OK(index_->Merge());
 
   // Should return "fo", "foo" and "fool" across all namespaces.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{},
-                                        /*num_to_return=*/10),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
+                                        /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("fo", kMinSizePlApproxHits),
-                  EqualsTermMetadata("foo", kMinSizePlApproxHits),
-                  EqualsTermMetadata("fool", kMinSizePlApproxHits))));
+                  EqualsTermMetadata("fo", 1), EqualsTermMetadata("foo", 1),
+                  EqualsTermMetadata("fool", 1))));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectHitCount) {
   Index::Editor edit1 =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
+  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit1.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit1.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
@@ -1110,20 +1035,19 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectHitCount) {
   EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
 
   // 'foo' has 1 hit, 'fool' has 2 hits.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
+                                        /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
                                        EqualsTermMetadata("foo", 1))));
 
   ICING_ASSERT_OK(index_->Merge());
 
-  // foo's one hit should fit on a min-sized pl, fool's two hits should also fit
-  // on a min-sized pl.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("foo", kMinSizePlApproxHits),
-                  EqualsTermMetadata("fool", kMinSizePlApproxHits))));
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
+                                        /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
+              IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
+                                       EqualsTermMetadata("foo", 1))));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldReturnInOrder) {
@@ -1132,6 +1056,7 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnInOrder) {
   Index::Editor edit1 =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
+  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit1.BufferTerm("term-one"), IsOk());
   EXPECT_THAT(edit1.BufferTerm("term-two"), IsOk());
   EXPECT_THAT(edit1.BufferTerm("term-three"), IsOk());
@@ -1181,8 +1106,9 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnInOrder) {
   EXPECT_THAT(edit6.IndexAllBufferedTerms(), IsOk());
 
   // verify the order in lite index is correct.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"t", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"t",
+                                        /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-six", 6),
                                        EqualsTermMetadata("term-five", 5),
                                        EqualsTermMetadata("term-four", 4),
@@ -1192,93 +1118,97 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnInOrder) {
 
   ICING_ASSERT_OK(index_->Merge());
 
-  // Since most of term has same approx hit count, we don't verify order in the
-  // main index.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"t", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("term-six", kSecondSmallestPlApproxHits),
-                  EqualsTermMetadata("term-five", kSecondSmallestPlApproxHits),
-                  EqualsTermMetadata("term-four", kMinSizePlApproxHits),
-                  EqualsTermMetadata("term-three", kMinSizePlApproxHits),
-                  EqualsTermMetadata("term-two", kMinSizePlApproxHits),
-                  EqualsTermMetadata("term-one", kMinSizePlApproxHits))));
-
-  // keep push terms to the lite index. For term 1-4, since they has same hit
-  // count kMinSizePlApproxHits, we will push 4 term-one, 3 term-two, 2
-  // term-three and one term-four to make them in reverse order. And for term
-  // 5 & 6, we will push 2 term-five and one term-six.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"t",
+                                        /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
+              IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-six", 6),
+                                       EqualsTermMetadata("term-five", 5),
+                                       EqualsTermMetadata("term-four", 4),
+                                       EqualsTermMetadata("term-three", 3),
+                                       EqualsTermMetadata("term-two", 2),
+                                       EqualsTermMetadata("term-one", 1))));
+
+  // keep push terms to the lite index. We will add 2 document to term-five,
+  // term-three and term-one. The output order should be 5-6-3-4-1-2.
   Index::Editor edit7 =
       index_->Edit(kDocumentId7, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
   EXPECT_THAT(edit7.BufferTerm("term-one"), IsOk());
-  EXPECT_THAT(edit7.BufferTerm("term-two"), IsOk());
   EXPECT_THAT(edit7.BufferTerm("term-three"), IsOk());
-  EXPECT_THAT(edit7.BufferTerm("term-four"), IsOk());
+  EXPECT_THAT(edit7.BufferTerm("term-five"), IsOk());
   EXPECT_THAT(edit7.IndexAllBufferedTerms(), IsOk());
 
   Index::Editor edit8 =
       index_->Edit(kDocumentId8, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
   EXPECT_THAT(edit8.BufferTerm("term-one"), IsOk());
-  EXPECT_THAT(edit8.BufferTerm("term-two"), IsOk());
   EXPECT_THAT(edit8.BufferTerm("term-three"), IsOk());
+  EXPECT_THAT(edit8.BufferTerm("term-five"), IsOk());
   EXPECT_THAT(edit8.IndexAllBufferedTerms(), IsOk());
 
-  Index::Editor edit9 =
-      index_->Edit(kDocumentId9, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit9.BufferTerm("term-one"), IsOk());
-  EXPECT_THAT(edit9.BufferTerm("term-two"), IsOk());
-  EXPECT_THAT(edit9.IndexAllBufferedTerms(), IsOk());
+  // verify the combination of lite index and main index is in correct order.
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(/*prefix=*/"t", /*num_to_return=*/10,
+                                TermMatchType::PREFIX, &impl),
+      IsOkAndHolds(ElementsAre(
+          EqualsTermMetadata("term-five", 7), EqualsTermMetadata("term-six", 6),
+          EqualsTermMetadata("term-three", 5),
+          EqualsTermMetadata("term-four", 4), EqualsTermMetadata("term-one", 3),
+          EqualsTermMetadata("term-two", 2))));
 
-  Index::Editor edit10 =
-      index_->Edit(kDocumentId10, kSectionId2, TermMatchType::EXACT_ONLY,
-                   /*namespace_id=*/0);
-  EXPECT_THAT(edit10.BufferTerm("term-one"), IsOk());
-  EXPECT_THAT(edit10.IndexAllBufferedTerms(), IsOk());
+  // Get the first three terms.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"t",
+                                        /*num_to_return=*/3,
+                                        TermMatchType::PREFIX, &impl),
+              IsOkAndHolds(ElementsAre(EqualsTermMetadata("term-five", 7),
+                                       EqualsTermMetadata("term-six", 6),
+                                       EqualsTermMetadata("term-three", 5))));
+}
 
-  Index::Editor edit11 =
-      index_->Edit(kDocumentId11, kSectionId2, TermMatchType::EXACT_ONLY,
+TEST_F(IndexTest, FindTermByPrefix_InTermMatchTypePrefix_ShouldReturnInOrder) {
+  Index::Editor edit1 =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit11.BufferTerm("term-five"), IsOk());
-  EXPECT_THAT(edit11.BufferTerm("term-six"), IsOk());
-  EXPECT_THAT(edit11.IndexAllBufferedTerms(), IsOk());
+  AlwaysTrueNamespaceCheckerImpl impl;
+  EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+  EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
 
-  Index::Editor edit12 =
-      index_->Edit(kDocumentId12, kSectionId2, TermMatchType::EXACT_ONLY,
+  Index::Editor edit2 =
+      index_->Edit(kDocumentId2, kSectionId2, TermMatchType::PREFIX,
                    /*namespace_id=*/0);
-  EXPECT_THAT(edit12.BufferTerm("term-five"), IsOk());
-  EXPECT_THAT(edit12.IndexAllBufferedTerms(), IsOk());
+  EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
+  EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
 
-  // verify the combination of lite index and main index is in correct order.
-  EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"t", /*namespace_ids=*/{0},
-                                /*num_to_return=*/10),
-      IsOkAndHolds(ElementsAre(
-          EqualsTermMetadata("term-five",
-                             kSecondSmallestPlApproxHits + 2),              // 9
-          EqualsTermMetadata("term-six", kSecondSmallestPlApproxHits + 1),  // 8
-          EqualsTermMetadata("term-one", kMinSizePlApproxHits + 4),         // 7
-          EqualsTermMetadata("term-two", kMinSizePlApproxHits + 3),         // 6
-          EqualsTermMetadata("term-three", kMinSizePlApproxHits + 2),       // 5
-          EqualsTermMetadata("term-four", kMinSizePlApproxHits + 1))));     // 4
+  Index::Editor edit3 =
+      index_->Edit(kDocumentId3, kSectionId2, TermMatchType::PREFIX,
+                   /*namespace_id=*/0);
+  EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
+  EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
 
-  // Get the first three terms.
-  EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"t", /*namespace_ids=*/{0},
-                                /*num_to_return=*/3),
-      IsOkAndHolds(ElementsAre(
-          EqualsTermMetadata("term-five",
-                             kSecondSmallestPlApproxHits + 2),              // 9
-          EqualsTermMetadata("term-six", kSecondSmallestPlApproxHits + 1),  // 8
-          EqualsTermMetadata("term-one", kMinSizePlApproxHits + 4))));      // 7
+  ICING_ASSERT_OK(index_->Merge());
+  // verify the order in pls is correct
+  // "fo"    { {doc0, exact_hit}, {doc1, prefix_hit}, {doc2, prefix_hit} }
+  // "foo"   { {doc1, exact_hit}, {doc2, prefix_hit} }
+  // "fool"  { {doc2, exact_hit} }
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
+                                        /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
+              IsOkAndHolds(ElementsAre(EqualsTermMetadata("fo", 3),
+                                       EqualsTermMetadata("foo", 2),
+                                       EqualsTermMetadata("fool", 1))));
+  // Find by exact only, all terms should be equally.
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*num_to_return=*/10,
+                                        TermMatchType::EXACT_ONLY, &impl),
+              IsOkAndHolds(UnorderedElementsAre(
+                  EqualsTermMetadata("fo", 1), EqualsTermMetadata("foo", 1),
+                  EqualsTermMetadata("fool", 1))));
 }
 
-TEST_F(IndexTest, FindTermByPrefixShouldReturnApproximateHitCountForMain) {
+TEST_F(IndexTest, FindTermByPrefixShouldReturnHitCountForMain) {
   Index::Editor edit =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
+  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
@@ -1313,25 +1243,26 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnApproximateHitCountForMain) {
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
   // 'foo' has 1 hit, 'fool' has 8 hits.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f",
+                                        /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
               IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 8),
                                        EqualsTermMetadata("foo", 1))));
 
   ICING_ASSERT_OK(index_->Merge());
 
-  // foo's hits should fit on a single pl. fool's hits will need two pls.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(UnorderedElementsAre(
-                  EqualsTermMetadata("foo", kMinSizePlApproxHits),
-                  EqualsTermMetadata("fool", kSecondSmallestPlApproxHits))));
+  EXPECT_THAT(
+      index_->FindTermsByPrefix(/*prefix=*/"f", /*num_to_return=*/10,
+                                TermMatchType::PREFIX, &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
+                                        EqualsTermMetadata("fool", 8))));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldReturnCombinedHitCount) {
   Index::Editor edit =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
+  AlwaysTrueNamespaceCheckerImpl impl;
   EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
@@ -1343,19 +1274,18 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnCombinedHitCount) {
   EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  // 'foo' has 1 hit in the main index, 'fool' has 1 hit in the main index and
-  // 1 hit in the lite index.
-  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                        /*num_to_return=*/10),
-              IsOkAndHolds(ElementsAre(
-                  EqualsTermMetadata("fool", kMinSizePlApproxHits + 1),
-                  EqualsTermMetadata("foo", kMinSizePlApproxHits))));
+  EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*num_to_return=*/10,
+                                        TermMatchType::PREFIX, &impl),
+              IsOkAndHolds(ElementsAre(EqualsTermMetadata("fool", 2),
+                                       EqualsTermMetadata("foo", 1))));
 }
 
 TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsFromBothIndices) {
   Index::Editor edit =
       index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
                    /*namespace_id=*/0);
+  AlwaysTrueNamespaceCheckerImpl impl;
+
   EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
@@ -1368,10 +1298,10 @@ TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsFromBothIndices) {
 
   // 'foo' has 1 hit in the main index, 'fool' has 1 hit in the lite index.
   EXPECT_THAT(
-      index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
-                                /*num_to_return=*/10),
-      IsOkAndHolds(ElementsAre(EqualsTermMetadata("foo", kMinSizePlApproxHits),
-                               EqualsTermMetadata("fool", 1))));
+      index_->FindTermsByPrefix(/*prefix=*/"f", /*num_to_return=*/10,
+                                TermMatchType::PREFIX, &impl),
+      IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
+                                        EqualsTermMetadata("fool", 1))));
 }
 
 TEST_F(IndexTest, GetElementsSize) {
@@ -1465,12 +1395,14 @@ TEST_F(IndexTest, GetDebugInfo) {
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
   edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
                       /*namespace_id=*/0);
+  index_->set_last_added_document_id(kDocumentId1);
   ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
   ICING_ASSERT_OK(index_->Merge());
 
   edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
                       /*namespace_id=*/0);
+  index_->set_last_added_document_id(kDocumentId2);
   ASSERT_THAT(edit.BufferTerm("footer"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
   edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
@@ -1478,40 +1410,45 @@ TEST_F(IndexTest, GetDebugInfo) {
   ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  std::string out0;
-  index_->GetDebugInfo(/*verbosity=*/0, &out0);
-  EXPECT_THAT(out0, Not(IsEmpty()));
+  IndexDebugInfoProto out0 = index_->GetDebugInfo(/*verbosity=*/0);
+  EXPECT_FALSE(out0.main_index_info().has_flash_index_storage_info());
+  EXPECT_THAT(out0.main_index_info().last_added_document_id(),
+              Eq(kDocumentId1));
+  EXPECT_THAT(out0.lite_index_info().curr_size(), Eq(2));
+  EXPECT_THAT(out0.lite_index_info().last_added_document_id(),
+              Eq(kDocumentId2));
 
-  std::string out1;
-  index_->GetDebugInfo(/*verbosity=*/1, &out1);
-  EXPECT_THAT(out1, SizeIs(Gt(out0.size())));
+  IndexDebugInfoProto out1 = index_->GetDebugInfo(/*verbosity=*/1);
+  EXPECT_THAT(out1.main_index_info().flash_index_storage_info(),
+              Not(IsEmpty()));
 
   // Add one more doc to the lite index. Debug strings should change.
   edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
                       /*namespace_id=*/0);
+  index_->set_last_added_document_id(kDocumentId3);
   ASSERT_THAT(edit.BufferTerm("far"), IsOk());
   EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
 
-  std::string out2;
-  index_->GetDebugInfo(/*verbosity=*/0, &out2);
-  EXPECT_THAT(out2, Ne(out0));
-
-  std::string out3;
-  index_->GetDebugInfo(/*verbosity=*/1, &out3);
-  EXPECT_THAT(out3, Ne(out1));
+  IndexDebugInfoProto out2 = index_->GetDebugInfo(/*verbosity=*/0);
+  EXPECT_THAT(out2.lite_index_info().curr_size(), Eq(3));
+  EXPECT_THAT(out2.lite_index_info().last_added_document_id(),
+              Eq(kDocumentId3));
 
   // Merge into the man index. Debuug strings should change again.
   ICING_ASSERT_OK(index_->Merge());
 
-  std::string out4;
-  index_->GetDebugInfo(/*verbosity=*/0, &out4);
-  EXPECT_THAT(out4, Ne(out0));
-  EXPECT_THAT(out4, Ne(out2));
-
-  std::string out5;
-  index_->GetDebugInfo(/*verbosity=*/1, &out5);
-  EXPECT_THAT(out5, Ne(out1));
-  EXPECT_THAT(out5, Ne(out3));
+  IndexDebugInfoProto out3 = index_->GetDebugInfo(/*verbosity=*/0);
+  EXPECT_TRUE(out3.has_index_storage_info());
+  EXPECT_THAT(out3.main_index_info().lexicon_info(), Not(IsEmpty()));
+  EXPECT_THAT(out3.main_index_info().last_added_document_id(),
+              Eq(kDocumentId3));
+  EXPECT_THAT(out3.lite_index_info().curr_size(), Eq(0));
+  EXPECT_THAT(out3.lite_index_info().hit_buffer_size(), Gt(0));
+  EXPECT_THAT(out3.lite_index_info().last_added_document_id(),
+              Eq(kInvalidDocumentId));
+  EXPECT_THAT(out3.lite_index_info().searchable_end(), Eq(0));
+  EXPECT_THAT(out3.lite_index_info().index_crc(), Gt(0));
+  EXPECT_THAT(out3.lite_index_info().lexicon_info(), Not(IsEmpty()));
 }
 
 TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 43a846b..7c6d924 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -48,13 +48,13 @@ using ::testing::ElementsAreArray;
 using ::testing::Eq;
 using ::testing::IsEmpty;
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
 
-constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
 
 class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
  protected:
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index 08df4fc..f215d63 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -77,7 +77,8 @@ libtextclassifier3::Status DocHitInfoIteratorTermLiteExact::RetrieveMoreHits() {
   ICING_ASSIGN_OR_RETURN(uint32_t term_id,
                          term_id_codec_->EncodeTvi(tvi, TviType::LITE));
   lite_index_->AppendHits(term_id, section_restrict_mask_,
-                          /*only_from_prefix_sections=*/false, &cached_hits_);
+                          /*only_from_prefix_sections=*/false,
+                          /*namespace_checker=*/nullptr, &cached_hits_);
   cached_hits_idx_ = 0;
   return libtextclassifier3::Status::OK;
 }
@@ -100,7 +101,7 @@ DocHitInfoIteratorTermLitePrefix::RetrieveMoreHits() {
         term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE));
     lite_index_->AppendHits(term_id, section_restrict_mask_,
                             /*only_from_prefix_sections=*/!exact_match,
-                            &cached_hits_);
+                            /*namespace_checker=*/nullptr, &cached_hits_);
     ++terms_matched;
   }
   if (terms_matched > 1) {
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index 9e4ac28..a5c6baf 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -336,9 +336,12 @@ libtextclassifier3::StatusOr<uint32_t> LiteIndex::GetTermId(
 
 int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
                           bool only_from_prefix_sections,
+                          const NamespaceChecker* namespace_checker,
                           std::vector<DocHitInfo>* hits_out) {
   int count = 0;
   DocumentId last_document_id = kInvalidDocumentId;
+  // Record whether the last document belongs to the given namespaces.
+  bool last_document_in_namespace = false;
   for (uint32_t idx = Seek(term_id); idx < header_->cur_size(); idx++) {
     TermIdHitPair term_id_hit_pair(
         hit_buffer_.array_cast<TermIdHitPair>()[idx]);
@@ -355,22 +358,31 @@ int LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
     }
     DocumentId document_id = hit.document_id();
     if (document_id != last_document_id) {
+      last_document_id = document_id;
+      last_document_in_namespace =
+          namespace_checker == nullptr ||
+          namespace_checker->BelongsToTargetNamespaces(document_id);
+      if (!last_document_in_namespace) {
+        // The document is removed or expired or not belongs to target
+        // namespaces.
+        continue;
+      }
       ++count;
       if (hits_out != nullptr) {
         hits_out->push_back(DocHitInfo(document_id));
       }
-      last_document_id = document_id;
     }
-    if (hits_out != nullptr) {
+    if (hits_out != nullptr && last_document_in_namespace) {
       hits_out->back().UpdateSection(hit.section_id(), hit.term_frequency());
     }
   }
   return count;
 }
 
-int LiteIndex::CountHits(uint32_t term_id) {
+libtextclassifier3::StatusOr<int> LiteIndex::CountHits(
+    uint32_t term_id, const NamespaceChecker* namespace_checker) {
   return AppendHits(term_id, kSectionIdMaskAll,
-                    /*only_from_prefix_sections=*/false,
+                    /*only_from_prefix_sections=*/false, namespace_checker,
                     /*hits_out=*/nullptr);
 }
 
@@ -379,15 +391,16 @@ bool LiteIndex::is_full() const {
           lexicon_.min_free_fraction() < (1.0 - kTrieFullFraction));
 }
 
-void LiteIndex::GetDebugInfo(int verbosity, std::string* out) const {
-  absl_ports::StrAppend(
-      out, IcingStringUtil::StringPrintf("Lite Index\nHit buffer %u/%u\n",
-                                         header_->cur_size(),
-                                         options_.hit_buffer_size));
-
-  // Lexicon.
-  out->append("Lexicon stats:\n");
-  lexicon_.GetDebugInfo(verbosity, out);
+IndexDebugInfoProto::LiteIndexDebugInfoProto LiteIndex::GetDebugInfo(
+    int verbosity) {
+  IndexDebugInfoProto::LiteIndexDebugInfoProto res;
+  res.set_curr_size(header_->cur_size());
+  res.set_hit_buffer_size(options_.hit_buffer_size);
+  res.set_last_added_document_id(header_->last_added_docid());
+  res.set_searchable_end(header_->searchable_end());
+  res.set_index_crc(ComputeChecksum().Get());
+  lexicon_.GetDebugInfo(verbosity, res.mutable_lexicon_info());
+  return res;
 }
 
 libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
@@ -408,12 +421,8 @@ IndexStorageInfoProto LiteIndex::GetStorageInfo(
     IndexStorageInfoProto storage_info) const {
   int64_t header_and_hit_buffer_file_size =
       filesystem_->GetFileSize(hit_buffer_fd_.get());
-  if (header_and_hit_buffer_file_size != Filesystem::kBadFileSize) {
-    storage_info.set_lite_index_hit_buffer_size(
-        header_and_hit_buffer_file_size);
-  } else {
-    storage_info.set_lite_index_hit_buffer_size(-1);
-  }
+  storage_info.set_lite_index_hit_buffer_size(
+      IcingFilesystem::SanitizeFileSize(header_and_hit_buffer_file_size));
   int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
   if (lexicon_disk_usage != Filesystem::kBadFileSize) {
     storage_info.set_lite_index_lexicon_size(lexicon_disk_usage);
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index b134aba..378fc94 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -37,10 +37,12 @@
 #include "icing/legacy/index/icing-lite-index-header.h"
 #include "icing/legacy/index/icing-lite-index-options.h"
 #include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/storage.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
+#include "icing/store/namespace-checker.h"
 #include "icing/store/namespace-id.h"
 #include "icing/util/bit-util.h"
 #include "icing/util/crc32.h"
@@ -140,13 +142,19 @@ class LiteIndex {
   // skipping hits in non-prefix sections if only_from_prefix_sections is true,
   // to hits_out. If hits_out is nullptr, no hits will be added.
   //
+  // Only those hits which belongs to the given namespaces will be counted and
+  // appended. A nullptr namespace checker  will disable this check.
+  //
   // Returns the number of hits that would be added to hits_out.
   int AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
                  bool only_from_prefix_sections,
+                 const NamespaceChecker* namespace_checker,
                  std::vector<DocHitInfo>* hits_out);
 
   // Returns the hit count of the term.
-  int CountHits(uint32_t term_id);
+  // Only those hits which belongs to the given namespaces will be counted.
+  libtextclassifier3::StatusOr<int> CountHits(
+      uint32_t term_id, const NamespaceChecker* namespace_checker);
 
   // Check if buffer has reached its capacity.
   bool is_full() const;
@@ -234,7 +242,7 @@ class LiteIndex {
   // Returns debug information for the index in out.
   // verbosity <= 0, simplest debug information - size of lexicon, hit buffer
   // verbosity > 0, more detailed debug information from the lexicon.
-  void GetDebugInfo(int verbosity, std::string* out) const;
+  IndexDebugInfoProto::LiteIndexDebugInfoProto GetDebugInfo(int verbosity);
 
   // Returns the byte size of all the elements held in the index. This excludes
   // the size of any internal metadata of the index, e.g. the index's header.
diff --git a/icing/index/lite/lite-index_test.cc b/icing/index/lite/lite-index_test.cc
new file mode 100644
index 0000000..825f830
--- /dev/null
+++ b/icing/index/lite/lite-index_test.cc
@@ -0,0 +1,110 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/lite/lite-index.h"
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/schema/section.h"
+#include "icing/store/namespace-checker.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+class AlwaysFalseNamespaceCheckerImpl : public NamespaceChecker {
+ public:
+  bool BelongsToTargetNamespaces(DocumentId document_id) const override {
+    return false;
+  }
+};
+
+class LiteIndexTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    index_dir_ = GetTestTempDir() + "/test_dir";
+    ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(index_dir_.c_str()));
+
+    std::string lite_index_file_name = index_dir_ + "/test_file.lite-idx.index";
+    LiteIndex::Options options(lite_index_file_name,
+                               /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+    ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
+                               LiteIndex::Create(options, &icing_filesystem_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        term_id_codec_,
+        TermIdCodec::Create(
+            IcingDynamicTrie::max_value_index(IcingDynamicTrie::Options()),
+            IcingDynamicTrie::max_value_index(options.lexicon_options)));
+  }
+
+  void TearDown() override {
+    ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(index_dir_.c_str()));
+  }
+
+  std::string index_dir_;
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
+  std::unique_ptr<LiteIndex> lite_index_;
+  std::unique_ptr<TermIdCodec> term_id_codec_;
+};
+
+constexpr NamespaceId kNamespace0 = 0;
+
+TEST_F(LiteIndexTest, LiteIndexAppendHits) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("foo", TermMatchType::PREFIX, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  Hit doc_hit0(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  Hit doc_hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit0));
+  ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc_hit1));
+
+  std::vector<DocHitInfo> hits1;
+  lite_index_->AppendHits(foo_term_id, kSectionIdMaskAll,
+                          /*only_from_prefix_sections=*/false,
+                          /*namespace_checker=*/nullptr, &hits1);
+  EXPECT_THAT(hits1, SizeIs(1));
+  EXPECT_THAT(hits1.back().document_id(), Eq(0));
+  // Check that the hits are coming from section 0 and section 1.
+  EXPECT_THAT(hits1.back().hit_section_ids_mask(), Eq(0b11));
+
+  std::vector<DocHitInfo> hits2;
+  AlwaysFalseNamespaceCheckerImpl always_false_namespace_checker;
+  lite_index_->AppendHits(foo_term_id, kSectionIdMaskAll,
+                          /*only_from_prefix_sections=*/false,
+                          &always_false_namespace_checker, &hits2);
+  // Check that no hits are returned because they get skipped by the namespace
+  // checker.
+  EXPECT_THAT(hits2, IsEmpty());
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/index/main/flash-index-storage.h b/icing/index/main/flash-index-storage.h
index 8d5b50b..6c6fbb8 100644
--- a/icing/index/main/flash-index-storage.h
+++ b/icing/index/main/flash-index-storage.h
@@ -159,6 +159,7 @@ class FlashIndexStorage {
 
   libtextclassifier3::Status Reset();
 
+  // TODO(b/222349894) Convert the string output to a protocol buffer instead.
   void GetDebugInfo(int verbosity, std::string* out) const;
 
  private:
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index b185138..2d6007b 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -133,18 +133,10 @@ libtextclassifier3::StatusOr<int64_t> MainIndex::GetElementsSize() const {
 
 IndexStorageInfoProto MainIndex::GetStorageInfo(
     IndexStorageInfoProto storage_info) const {
-  int64_t lexicon_elt_size = main_lexicon_->GetElementsSize();
-  if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
-    storage_info.set_main_index_lexicon_size(lexicon_elt_size);
-  } else {
-    storage_info.set_main_index_lexicon_size(-1);
-  }
-  int64_t index_elt_size = flash_index_storage_->GetElementsSize();
-  if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
-    storage_info.set_main_index_storage_size(index_elt_size);
-  } else {
-    storage_info.set_main_index_storage_size(-1);
-  }
+  storage_info.set_main_index_lexicon_size(
+      IcingFilesystem::SanitizeFileSize(main_lexicon_->GetElementsSize()));
+  storage_info.set_main_index_storage_size(
+      Filesystem::SanitizeFileSize(flash_index_storage_->GetElementsSize()));
   storage_info.set_main_index_block_size(flash_index_storage_->block_size());
   storage_info.set_num_blocks(flash_index_storage_->num_blocks());
   storage_info.set_min_free_fraction(flash_index_storage_->min_free_fraction());
@@ -186,7 +178,7 @@ MainIndex::GetAccessorForPrefixTerm(const std::string& prefix) {
   if (!exact && !hits_in_prefix_section.HasProperty(main_itr.GetValueIndex())) {
     // Found it, but it doesn't have prefix hits. Exit early. No need to
     // retrieve the posting list because there's nothing there for us.
-    return libtextclassifier3::Status::OK;
+    return absl_ports::NotFoundError("The term doesn't have any prefix hits.");
   }
   PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
   memcpy(&posting_list_id, main_itr.GetValue(), sizeof(posting_list_id));
@@ -217,35 +209,45 @@ bool IsTermInNamespaces(
 
 libtextclassifier3::StatusOr<std::vector<TermMetadata>>
 MainIndex::FindTermsByPrefix(const std::string& prefix,
-                             const std::vector<NamespaceId>& namespace_ids) {
+                             TermMatchType::Code term_match_type,
+                             const NamespaceChecker* namespace_checker) {
   // Finds all the terms that start with the given prefix in the lexicon.
   IcingDynamicTrie::Iterator term_iterator(*main_lexicon_, prefix.c_str());
 
-  // A property reader to help check if a term has some property.
-  IcingDynamicTrie::PropertyReadersAll property_reader(*main_lexicon_);
-
   std::vector<TermMetadata> term_metadata_list;
   while (term_iterator.IsValid()) {
-    uint32_t term_value_index = term_iterator.GetValueIndex();
+    int count = 0;
+    DocumentId last_document_id = kInvalidDocumentId;
 
-    // Skips the terms that don't exist in the given namespaces. We won't skip
-    // any terms if namespace_ids is empty.
-    if (!IsTermInNamespaces(property_reader, term_value_index, namespace_ids)) {
-      term_iterator.Advance();
-      continue;
-    }
     PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
     memcpy(&posting_list_id, term_iterator.GetValue(), sizeof(posting_list_id));
-    // Getting the actual hit count would require reading the entire posting
-    // list chain. We take an approximation to avoid all of those IO ops.
-    // Because we are not reading the posting lists, it is impossible to
-    // differentiate between single max-size posting lists and chains of
-    // max-size posting lists. We assume that the impact on scoring is not
-    // significant.
-    int approx_hit_count = IndexBlock::ApproximateFullPostingListHitsForBlock(
-        flash_index_storage_->block_size(),
-        posting_list_id.posting_list_index_bits());
-    term_metadata_list.emplace_back(term_iterator.GetKey(), approx_hit_count);
+    ICING_ASSIGN_OR_RETURN(PostingListAccessor pl_accessor,
+                           PostingListAccessor::CreateFromExisting(
+                               flash_index_storage_.get(), posting_list_id));
+    ICING_ASSIGN_OR_RETURN(std::vector<Hit> hits,
+                           pl_accessor.GetNextHitsBatch());
+    for (const Hit& hit : hits) {
+      DocumentId document_id = hit.document_id();
+      if (document_id != last_document_id) {
+        last_document_id = document_id;
+        if (term_match_type == TermMatchType::EXACT_ONLY &&
+            hit.is_prefix_hit()) {
+          continue;
+        }
+        if (!namespace_checker->BelongsToTargetNamespaces(document_id)) {
+          // The document is removed or expired or not belongs to target
+          // namespaces.
+          continue;
+        }
+        // TODO(b/152934343) Add search type in SuggestionSpec to ask user to
+        // input search type, prefix or exact. And make different score strategy
+        // base on that.
+        ++count;
+      }
+    }
+    if (count > 0) {
+      term_metadata_list.push_back(TermMetadata(term_iterator.GetKey(), count));
+    }
 
     term_iterator.Advance();
   }
@@ -605,16 +607,22 @@ libtextclassifier3::Status MainIndex::AddPrefixBackfillHits(
   return libtextclassifier3::Status::OK;
 }
 
-void MainIndex::GetDebugInfo(int verbosity, std::string* out) const {
+IndexDebugInfoProto::MainIndexDebugInfoProto MainIndex::GetDebugInfo(
+    int verbosity) const {
+  IndexDebugInfoProto::MainIndexDebugInfoProto res;
+
   // Lexicon.
-  out->append("Main Lexicon stats:\n");
-  main_lexicon_->GetDebugInfo(verbosity, out);
+  main_lexicon_->GetDebugInfo(verbosity, res.mutable_lexicon_info());
+
+  res.set_last_added_document_id(last_added_document_id());
 
   if (verbosity <= 0) {
-    return;
+    return res;
   }
 
-  flash_index_storage_->GetDebugInfo(verbosity, out);
+  flash_index_storage_->GetDebugInfo(verbosity,
+                                     res.mutable_flash_index_storage_info());
+  return res;
 }
 
 }  // namespace lib
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index 919a5c5..abb0418 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -27,7 +27,9 @@
 #include "icing/index/term-metadata.h"
 #include "icing/legacy/index/icing-dynamic-trie.h"
 #include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/storage.pb.h"
+#include "icing/store/namespace-checker.h"
 #include "icing/store/namespace-id.h"
 #include "icing/util/status-macros.h"
 
@@ -71,17 +73,17 @@ class MainIndex {
   // Finds terms with the given prefix in the given namespaces. If
   // 'namespace_ids' is empty, returns results from all the namespaces. The
   // input prefix must be normalized, otherwise inaccurate results may be
-  // returned. Results are not sorted specifically and are in lexigraphical
-  // order. Number of results are no more than 'num_to_return'.
-  //
-  // The hit count returned with each TermMetadata is an approximation based of
-  // posting list size.
+  // returned. If term_match_type is EXACT, only exact hit will be counted and
+  // it is PREFIX, both prefix and exact hits will be counted. Results are not
+  // sorted specifically and are in lexigraphical order. Number of results are
+  // no more than 'num_to_return'.
   //
   // Returns:
   //   A list of TermMetadata on success
   //   INTERNAL_ERROR if failed to access term data.
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix(
-      const std::string& prefix, const std::vector<NamespaceId>& namespace_ids);
+      const std::string& prefix, TermMatchType::Code term_match_type,
+      const NamespaceChecker* namespace_checker);
 
   struct LexiconMergeOutputs {
     // Maps from main_lexicon tvi for new branching point to the main_lexicon
@@ -184,7 +186,8 @@ class MainIndex {
   // verbosity <= 0, simplest debug information - just the lexicon
   // verbosity > 0, more detailed debug information including raw postings
   //                lists.
-  void GetDebugInfo(int verbosity, std::string* out) const;
+  IndexDebugInfoProto::MainIndexDebugInfoProto GetDebugInfo(
+      int verbosity) const;
 
  private:
   libtextclassifier3::Status Init(const std::string& index_directory,
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index 74139be..fa83d68 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc
@@ -162,6 +162,34 @@ TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixReturnsValidAccessor) {
   EXPECT_THAT(main_index->GetAccessorForPrefixTerm("foo"), IsOk());
 }
 
+TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixReturnsNotFound) {
+  // 1. Index one doc in the Lite Index:
+  // - Doc0 {"foot" is_in_prefix_section=false}
+  ICING_ASSERT_OK_AND_ASSIGN(
+      uint32_t tvi,
+      lite_index_->InsertTerm("foot", TermMatchType::EXACT_ONLY, kNamespace0));
+  ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+                             term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+  Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
+               /*is_in_prefix_section=*/false);
+  ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
+
+  // 2. Create the main index. It should have no entries in its lexicon.
+  std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<MainIndex> main_index,
+      MainIndex::Create(main_index_file_name, &filesystem_,
+                        &icing_filesystem_));
+
+  // 3. Merge the index. The main index should return not found when we search
+  // prefix contain "foo".
+  ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+  // GetAccessorForPrefixTerm should return a valid accessor for "foo".
+  EXPECT_THAT(main_index->GetAccessorForPrefixTerm("foo"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
 TEST_F(MainIndexTest, MainIndexGetAccessorForExactTermNotFound) {
   // Create the main index. It should have no entries in its lexicon.
   std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
diff --git a/icing/legacy/index/icing-dynamic-trie.cc b/icing/legacy/index/icing-dynamic-trie.cc
index baa043a..77876c4 100644
--- a/icing/legacy/index/icing-dynamic-trie.cc
+++ b/icing/legacy/index/icing-dynamic-trie.cc
@@ -70,6 +70,7 @@
 #include <algorithm>
 #include <cerrno>
 #include <cinttypes>
+#include <cstdint>
 #include <cstring>
 #include <memory>
 #include <utility>
@@ -397,6 +398,8 @@ class IcingDynamicTrie::IcingDynamicTrieStorage {
   // storage.
   IcingScopedFd array_fds_[NUM_ARRAY_TYPES];
   std::vector<IcingArrayStorage> array_storage_;
+
+  // Legacy file system. Switch to use the new Filesystem class instead.
   const IcingFilesystem *filesystem_;
 };
 
@@ -1364,10 +1367,12 @@ uint32_t IcingDynamicTrie::size() const {
   return storage_->hdr().num_keys();
 }
 
-void IcingDynamicTrie::CollectStatsRecursive(const Node &node,
-                                             Stats *stats) const {
+void IcingDynamicTrie::CollectStatsRecursive(const Node &node, Stats *stats,
+                                             uint32_t depth) const {
   if (node.is_leaf()) {
     stats->num_leaves++;
+    stats->sum_depth += depth;
+    stats->max_depth = max(stats->max_depth, depth);
     const char *suffix = storage_->GetSuffix(node.next_index());
     stats->suffixes_used += strlen(suffix) + 1 + value_size();
     if (!suffix[0]) {
@@ -1379,13 +1384,16 @@ void IcingDynamicTrie::CollectStatsRecursive(const Node &node,
     for (; i < (1U << node.log2_num_children()); i++) {
       const Next &next = *storage_->GetNext(node.next_index(), i);
       if (next.node_index() == kInvalidNodeIndex) break;
-      CollectStatsRecursive(*storage_->GetNode(next.node_index()), stats);
+      CollectStatsRecursive(*storage_->GetNode(next.node_index()), stats,
+                            depth + 1);
     }
 
     // At least one valid node in each next array
     if (i == 0) {
       ICING_LOG(FATAL) << "No valid node in 'next' array";
     }
+    stats->sum_children += i;
+    stats->max_children = max(stats->max_children, i);
 
     stats->child_counts[i - 1]++;
     stats->wasted[node.log2_num_children()] +=
@@ -1467,9 +1475,12 @@ std::string IcingDynamicTrie::Stats::DumpStats(int verbosity) const {
         "Wasted total: %u\n"
         "Num intermediates %u num leaves %u "
         "suffixes used %u null %u\n"
+        "avg and max children for intermediates: %.3f, %u\n"
+        "avg and max depth for leaves: %.3f, %u\n"
         "Total next frag: %.3f%%\n",
         total_wasted, num_intermediates, num_leaves, suffixes_used,
-        null_suffixes,
+        null_suffixes, 1. * sum_children / num_intermediates, max_children,
+        1. * sum_depth / num_leaves, max_depth,
         100. * math_util::SafeDivide((total_free + total_wasted), num_nexts));
   }
   IcingStringUtil::SStringAppendF(
diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h
index 8821799..013b926 100644
--- a/icing/legacy/index/icing-dynamic-trie.h
+++ b/icing/legacy/index/icing-dynamic-trie.h
@@ -152,8 +152,13 @@ class IcingDynamicTrie : public IIcingStorage {
     uint32_t max_nodes;
     // Count of intermediate nodes.
     uint32_t num_intermediates;
+    // Total and maximum number of children of intermediate nodes.
+    uint32_t sum_children, max_children;
+
     // Count of leaf nodes.
     uint32_t num_leaves;
+    // Total and maximum depth of leaf nodes.
+    uint32_t sum_depth, max_depth;
 
     // Next stats
 
@@ -186,6 +191,7 @@ class IcingDynamicTrie : public IIcingStorage {
     uint32_t dirty_pages_nexts;
     uint32_t dirty_pages_suffixes;
 
+    // TODO(b/222349894) Convert the string output to a protocol buffer instead.
     std::string DumpStats(int verbosity) const;
   };
 
@@ -601,7 +607,8 @@ class IcingDynamicTrie : public IIcingStorage {
   static const uint32_t kInvalidSuffixIndex;
 
   // Stats helpers.
-  void CollectStatsRecursive(const Node &node, Stats *stats) const;
+  void CollectStatsRecursive(const Node &node, Stats *stats,
+                             uint32_t depth = 0) const;
 
   // Helpers for Find and Insert.
   const Next *GetNextByChar(const Node *node, uint8_t key_char) const;
diff --git a/icing/legacy/index/icing-filesystem.h b/icing/legacy/index/icing-filesystem.h
index f645632..ce75a82 100644
--- a/icing/legacy/index/icing-filesystem.h
+++ b/icing/legacy/index/icing-filesystem.h
@@ -224,6 +224,11 @@ class IcingFilesystem {
   // Increments to_increment by size if size is valid, or sets to_increment
   // to kBadFileSize if either size or to_increment is kBadFileSize.
   static void IncrementByOrSetInvalid(uint64_t size, uint64_t *to_increment);
+
+  // Return -1 if file_size is invalid. Otherwise, return file_size.
+  static int64_t SanitizeFileSize(int64_t file_size) {
+    return (file_size != kBadFileSize) ? file_size : -1;
+  }
 };
 
 }  // namespace lib
diff --git a/icing/legacy/index/icing-flash-bitmap.h b/icing/legacy/index/icing-flash-bitmap.h
index e3ba0e2..6bb9591 100644
--- a/icing/legacy/index/icing-flash-bitmap.h
+++ b/icing/legacy/index/icing-flash-bitmap.h
@@ -138,6 +138,7 @@ class IcingFlashBitmap {
   // Upgrade for version 18.
   bool UpgradeTo18();
 
+  // Legacy file system. Switch to use the new Filesystem class instead.
   const IcingFilesystem *const filesystem_;
   std::string filename_;
   OpenType open_type_;
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index bdd40aa..e48fe78 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -16,7 +16,6 @@
 #include "gmock/gmock.h"
 #include "third_party/absl/flags/flag.h"
 #include "icing/document-builder.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/index/index.h"
 #include "icing/proto/term.pb.h"
 #include "icing/query/query-processor.h"
@@ -24,6 +23,7 @@
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index daeb479..950f739 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -23,7 +23,6 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/index/hit/doc-hit-info.h"
 #include "icing/index/index.h"
 #include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
@@ -40,6 +39,7 @@
 #include "icing/store/document-store.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
@@ -61,16 +61,16 @@ using ::testing::SizeIs;
 using ::testing::Test;
 using ::testing::UnorderedElementsAre;
 
-constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
-    PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
+    PropertyConfigProto::DataType::STRING;
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
 
-constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
 
 class QueryProcessorTest : public Test {
  protected:
diff --git a/icing/query/suggestion-processor.cc b/icing/query/suggestion-processor.cc
index 9c60810..cfa53f6 100644
--- a/icing/query/suggestion-processor.cc
+++ b/icing/query/suggestion-processor.cc
@@ -35,7 +35,7 @@ SuggestionProcessor::Create(Index* index,
 libtextclassifier3::StatusOr<std::vector<TermMetadata>>
 SuggestionProcessor::QuerySuggestions(
     const icing::lib::SuggestionSpecProto& suggestion_spec,
-    const std::vector<NamespaceId>& namespace_ids) {
+    const NamespaceChecker* namespace_checker) {
   // We use query tokenizer to tokenize the give prefix, and we only use the
   // last token to be the suggestion prefix.
   ICING_ASSIGN_OR_RETURN(
@@ -73,8 +73,11 @@ SuggestionProcessor::QuerySuggestions(
   // lowercase.
   ICING_ASSIGN_OR_RETURN(
       std::vector<TermMetadata> terms,
-      index_.FindTermsByPrefix(normalizer_.NormalizeTerm(last_token),
-                               namespace_ids, suggestion_spec.num_to_return()));
+      index_.FindTermsByPrefix(
+          normalizer_.NormalizeTerm(last_token),
+          suggestion_spec.num_to_return(),
+          suggestion_spec.scoring_spec().scoring_match_type(),
+          namespace_checker));
 
   for (TermMetadata& term : terms) {
     term.content = query_prefix + term.content;
@@ -90,4 +93,4 @@ SuggestionProcessor::SuggestionProcessor(
       normalizer_(*normalizer) {}
 
 }  // namespace lib
-}  // namespace icing
-\ No newline at end of file
+}  // namespace icing
diff --git a/icing/query/suggestion-processor.h b/icing/query/suggestion-processor.h
index b10dc84..088863e 100644
--- a/icing/query/suggestion-processor.h
+++ b/icing/query/suggestion-processor.h
@@ -48,7 +48,7 @@ class SuggestionProcessor {
   //   INTERNAL_ERROR on all other errors
   libtextclassifier3::StatusOr<std::vector<TermMetadata>> QuerySuggestions(
       const SuggestionSpecProto& suggestion_spec,
-      const std::vector<NamespaceId>& namespace_ids);
+      const NamespaceChecker* namespace_checker);
 
  private:
   explicit SuggestionProcessor(Index* index,
diff --git a/icing/query/suggestion-processor_test.cc b/icing/query/suggestion-processor_test.cc
index 5e62277..ba4c90a 100644
--- a/icing/query/suggestion-processor_test.cc
+++ b/icing/query/suggestion-processor_test.cc
@@ -15,10 +15,11 @@
 #include "icing/query/suggestion-processor.h"
 
 #include "gmock/gmock.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/store/document-store.h"
+#include "icing/testing/always-true-namespace-checker-impl.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
@@ -80,7 +81,6 @@ class SuggestionProcessorTest : public Test {
         DocumentStore::CreateResult create_result,
         DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
                               schema_store_.get()));
-    document_store_ = std::move(create_result.document_store);
   }
 
   libtextclassifier3::Status AddTokenToIndex(
@@ -93,7 +93,6 @@ class SuggestionProcessorTest : public Test {
   }
 
   void TearDown() override {
-    document_store_.reset();
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
   }
 
@@ -103,7 +102,6 @@ class SuggestionProcessorTest : public Test {
   std::unique_ptr<Index> index_;
   std::unique_ptr<LanguageSegmenter> language_segmenter_;
   std::unique_ptr<Normalizer> normalizer_;
-  std::unique_ptr<DocumentStore> document_store_;
   std::unique_ptr<SchemaStore> schema_store_;
   std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
   FakeClock fake_clock_;
@@ -131,9 +129,10 @@ TEST_F(SuggestionProcessorTest, PrependedPrefixTokenTest) {
       "prefix token should be prepended to the suggestion f");
   suggestion_spec.set_num_to_return(10);
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
-                             suggestion_processor->QuerySuggestions(
-                                 suggestion_spec, /*namespace_ids=*/{}));
+  AlwaysTrueNamespaceCheckerImpl impl;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms.at(0).content,
               "prefix token should be prepended to the suggestion foo");
 }
@@ -152,9 +151,10 @@ TEST_F(SuggestionProcessorTest, NonExistentPrefixTest) {
   suggestion_spec.set_prefix("nonExistTerm");
   suggestion_spec.set_num_to_return(10);
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
-                             suggestion_processor->QuerySuggestions(
-                                 suggestion_spec, /*namespace_ids=*/{}));
+  AlwaysTrueNamespaceCheckerImpl impl;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
 
   EXPECT_THAT(terms, IsEmpty());
 }
@@ -173,9 +173,10 @@ TEST_F(SuggestionProcessorTest, PrefixTrailingSpaceTest) {
   suggestion_spec.set_prefix("f    ");
   suggestion_spec.set_num_to_return(10);
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
-                             suggestion_processor->QuerySuggestions(
-                                 suggestion_spec, /*namespace_ids=*/{}));
+  AlwaysTrueNamespaceCheckerImpl impl;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
 
   EXPECT_THAT(terms, IsEmpty());
 }
@@ -193,28 +194,26 @@ TEST_F(SuggestionProcessorTest, NormalizePrefixTest) {
   SuggestionSpecProto suggestion_spec;
   suggestion_spec.set_prefix("F");
   suggestion_spec.set_num_to_return(10);
+
+  AlwaysTrueNamespaceCheckerImpl impl;
   ICING_ASSERT_OK_AND_ASSIGN(
       std::vector<TermMetadata> terms,
-      suggestion_processor->QuerySuggestions(suggestion_spec,
-                                             /*namespace_ids=*/{}));
+      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms.at(0).content, "foo");
 
   suggestion_spec.set_prefix("fO");
   ICING_ASSERT_OK_AND_ASSIGN(
-      terms, suggestion_processor->QuerySuggestions(suggestion_spec,
-                                                    /*namespace_ids=*/{}));
+      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms.at(0).content, "foo");
 
   suggestion_spec.set_prefix("Fo");
   ICING_ASSERT_OK_AND_ASSIGN(
-      terms, suggestion_processor->QuerySuggestions(suggestion_spec,
-                                                    /*namespace_ids=*/{}));
+      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms.at(0).content, "foo");
 
   suggestion_spec.set_prefix("FO");
   ICING_ASSERT_OK_AND_ASSIGN(
-      terms, suggestion_processor->QuerySuggestions(suggestion_spec,
-                                                    /*namespace_ids=*/{}));
+      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms.at(0).content, "foo");
 }
 
@@ -235,9 +234,10 @@ TEST_F(SuggestionProcessorTest, OrOperatorPrefixTest) {
   suggestion_spec.set_prefix("f OR");
   suggestion_spec.set_num_to_return(10);
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
-                             suggestion_processor->QuerySuggestions(
-                                 suggestion_spec, /*namespace_ids=*/{}));
+  AlwaysTrueNamespaceCheckerImpl impl;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
 
   // Last Operator token will be used to query suggestion
   EXPECT_THAT(terms.at(0).content, "f original");
@@ -256,19 +256,20 @@ TEST_F(SuggestionProcessorTest, ParenthesesOperatorPrefixTest) {
   suggestion_spec.set_prefix("{f}");
   suggestion_spec.set_num_to_return(10);
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
-                             suggestion_processor->QuerySuggestions(
-                                 suggestion_spec, /*namespace_ids=*/{}));
+  AlwaysTrueNamespaceCheckerImpl impl;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms, IsEmpty());
 
   suggestion_spec.set_prefix("[f]");
-  ICING_ASSERT_OK_AND_ASSIGN(terms, suggestion_processor->QuerySuggestions(
-                                        suggestion_spec, /*namespace_ids=*/{}));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms, IsEmpty());
 
   suggestion_spec.set_prefix("(f)");
-  ICING_ASSERT_OK_AND_ASSIGN(terms, suggestion_processor->QuerySuggestions(
-                                        suggestion_spec, /*namespace_ids=*/{}));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms, IsEmpty());
 }
 
@@ -286,15 +287,15 @@ TEST_F(SuggestionProcessorTest, OtherSpecialPrefixTest) {
   suggestion_spec.set_prefix("f:");
   suggestion_spec.set_num_to_return(10);
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
-                             suggestion_processor->QuerySuggestions(
-                                 suggestion_spec, /*namespace_ids=*/{}));
+  AlwaysTrueNamespaceCheckerImpl impl;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms, IsEmpty());
 
   suggestion_spec.set_prefix("f-");
   ICING_ASSERT_OK_AND_ASSIGN(
-      terms, suggestion_processor->QuerySuggestions(suggestion_spec,
-                                                    /*namespace_ids=*/{}));
+      terms, suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms, IsEmpty());
 }
 
@@ -312,9 +313,10 @@ TEST_F(SuggestionProcessorTest, InvalidPrefixTest) {
   suggestion_spec.set_prefix("OR OR - :");
   suggestion_spec.set_num_to_return(10);
 
-  ICING_ASSERT_OK_AND_ASSIGN(std::vector<TermMetadata> terms,
-                             suggestion_processor->QuerySuggestions(
-                                 suggestion_spec, /*namespace_ids=*/{}));
+  AlwaysTrueNamespaceCheckerImpl impl;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<TermMetadata> terms,
+      suggestion_processor->QuerySuggestions(suggestion_spec, &impl));
   EXPECT_THAT(terms, IsEmpty());
 }
 
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 1c9684d..0d812e4 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc
@@ -22,7 +22,6 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/mock-filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
 #include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
@@ -36,6 +35,7 @@
 #include "icing/store/document-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/snippet-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
@@ -55,14 +55,14 @@ using ::testing::IsEmpty;
 using ::testing::Return;
 using ::testing::SizeIs;
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
 
-constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
-constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
 
 class ResultRetrieverTest : public testing::Test {
  protected:
@@ -160,7 +160,7 @@ ResultSpecProto::SnippetSpecProto CreateSnippetSpec() {
   ResultSpecProto::SnippetSpecProto snippet_spec;
   snippet_spec.set_num_to_snippet(std::numeric_limits<int>::max());
   snippet_spec.set_num_matches_per_property(std::numeric_limits<int>::max());
-  snippet_spec.set_max_window_bytes(1024);
+  snippet_spec.set_max_window_utf32_length(1024);
   return snippet_spec;
 }
 
@@ -362,8 +362,8 @@ TEST_F(ResultRetrieverTest, NotIgnoreErrors) {
 
 TEST_F(ResultRetrieverTest, IOErrorShouldReturnInternalError) {
   MockFilesystem mock_filesystem;
-  ON_CALL(mock_filesystem, OpenForRead(_)).WillByDefault(Return(false));
-
+  ON_CALL(mock_filesystem, PRead(A<int>(), A<void*>(), A<size_t>(), A<off_t>()))
+      .WillByDefault(Return(false));
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_,
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index 32e45aa..8a9005d 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -849,7 +849,7 @@ TEST_F(ResultStateManagerTest, ShouldGetSnippetContext) {
   ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(5);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
 
   SearchSpecProto search_spec;
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
@@ -884,7 +884,7 @@ TEST_F(ResultStateManagerTest, ShouldGetDefaultSnippetContext) {
   // 0 indicates no snippeting
   result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(0);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(0);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(0);
 
   SearchSpecProto search_spec;
   search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
diff --git a/icing/result/result-state_test.cc b/icing/result/result-state_test.cc
index f2121a5..d92fcfa 100644
--- a/icing/result/result-state_test.cc
+++ b/icing/result/result-state_test.cc
@@ -143,7 +143,7 @@ TEST_F(ResultStateTest, ShouldReturnSnippetContextAccordingToSpecs) {
   ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
   result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(5);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
 
   SectionRestrictQueryTermsMap query_terms_map;
   query_terms_map.emplace("term1", std::unordered_set<std::string>());
@@ -178,7 +178,7 @@ TEST_F(ResultStateTest, NoSnippetingShouldReturnNull) {
   // stored.
   result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
   result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
-  result_spec.mutable_snippet_spec()->set_max_window_bytes(5);
+  result_spec.mutable_snippet_spec()->set_max_window_utf32_length(5);
 
   SectionRestrictQueryTermsMap query_terms_map;
   query_terms_map.emplace("term1", std::unordered_set<std::string>());
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index c46762e..bd1524e 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -41,6 +41,7 @@
 #include "icing/transform/normalizer.h"
 #include "icing/util/character-iterator.h"
 #include "icing/util/i18n-utils.h"
+#include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
@@ -75,6 +76,67 @@ inline std::string AddIndexToPath(int values_size, int index,
                             kRBracket);
 }
 
+// Returns a string of the normalized text of the input Token. Normalization
+// is applied based on the Token's type.
+std::string NormalizeToken(const Normalizer& normalizer, const Token& token) {
+  switch (token.type) {
+    case Token::Type::REGULAR:
+      return normalizer.NormalizeTerm(token.text);
+    case Token::Type::VERBATIM:
+      return std::string(token.text);
+    case Token::Type::QUERY_EXCLUSION:
+      [[fallthrough]];
+    case Token::Type::QUERY_LEFT_PARENTHESES:
+      [[fallthrough]];
+    case Token::Type::QUERY_RIGHT_PARENTHESES:
+      [[fallthrough]];
+    case Token::Type::QUERY_OR:
+      [[fallthrough]];
+    case Token::Type::QUERY_PROPERTY:
+      [[fallthrough]];
+    case Token::Type::INVALID:
+      ICING_LOG(WARNING) << "Unable to normalize token of type: "
+                         << static_cast<int>(token.type);
+      return std::string(token.text);
+  }
+}
+
+// Returns a CharacterIterator for token's text, advancing one past the last
+// matching character from the query term.
+CharacterIterator FindMatchEnd(const Normalizer& normalizer, const Token& token,
+                               const std::string& match_query_term) {
+  switch (token.type) {
+    case Token::Type::VERBATIM: {
+      // VERBATIM tokens are not normalized. This means the non-normalized
+      // matched query term must be either equal to or a prefix of the token's
+      // text. Therefore, the match must end at the end of the matched query
+      // term.
+      CharacterIterator verbatim_match_end =
+          CharacterIterator(token.text, 0, 0, 0);
+      verbatim_match_end.AdvanceToUtf8(match_query_term.length());
+      return verbatim_match_end;
+    }
+    case Token::Type::QUERY_EXCLUSION:
+      [[fallthrough]];
+    case Token::Type::QUERY_LEFT_PARENTHESES:
+      [[fallthrough]];
+    case Token::Type::QUERY_RIGHT_PARENTHESES:
+      [[fallthrough]];
+    case Token::Type::QUERY_OR:
+      [[fallthrough]];
+    case Token::Type::QUERY_PROPERTY:
+      [[fallthrough]];
+    case Token::Type::INVALID:
+      ICING_LOG(WARNING)
+          << "Unexpected Token type " << static_cast<int>(token.type)
+          << " found when finding match end of query term and token.";
+      [[fallthrough]];
+    case Token::Type::REGULAR:
+      return normalizer.FindNormalizedMatchEndPosition(token.text,
+                                                       match_query_term);
+  }
+}
+
 class TokenMatcher {
  public:
   virtual ~TokenMatcher() = default;
@@ -102,15 +164,16 @@ class TokenMatcherExact : public TokenMatcher {
         normalizer_(normalizer) {}
 
   CharacterIterator Matches(Token token) const override {
-    std::string s = normalizer_.NormalizeTerm(token.text);
+    std::string s = NormalizeToken(normalizer_, token);
     auto itr = unrestricted_query_terms_.find(s);
     if (itr == unrestricted_query_terms_.end()) {
       itr = restricted_query_terms_.find(s);
     }
     if (itr != unrestricted_query_terms_.end() &&
         itr != restricted_query_terms_.end()) {
-      return normalizer_.FindNormalizedMatchEndPosition(token.text, *itr);
+      return FindMatchEnd(normalizer_, token, *itr);
     }
+
     return CharacterIterator(token.text, -1, -1, -1);
   }
 
@@ -131,19 +194,17 @@ class TokenMatcherPrefix : public TokenMatcher {
         normalizer_(normalizer) {}
 
   CharacterIterator Matches(Token token) const override {
-    std::string s = normalizer_.NormalizeTerm(token.text);
+    std::string s = NormalizeToken(normalizer_, token);
     for (const std::string& query_term : unrestricted_query_terms_) {
       if (query_term.length() <= s.length() &&
           s.compare(0, query_term.length(), query_term) == 0) {
-        return normalizer_.FindNormalizedMatchEndPosition(token.text,
-                                                          query_term);
+        return FindMatchEnd(normalizer_, token, query_term);
       }
     }
     for (const std::string& query_term : restricted_query_terms_) {
       if (query_term.length() <= s.length() &&
           s.compare(0, query_term.length(), query_term) == 0) {
-        return normalizer_.FindNormalizedMatchEndPosition(token.text,
-                                                          query_term);
+        return FindMatchEnd(normalizer_, token, query_term);
       }
     }
     return CharacterIterator(token.text, -1, -1, -1);
@@ -184,7 +245,7 @@ libtextclassifier3::StatusOr<CharacterIterator> DetermineWindowStart(
     const ResultSpecProto::SnippetSpecProto& snippet_spec,
     std::string_view value, int window_start_min_exclusive_utf32,
     Tokenizer::Iterator* iterator) {
-  if (!iterator->ResetToTokenAfter(window_start_min_exclusive_utf32)) {
+  if (!iterator->ResetToTokenStartingAfter(window_start_min_exclusive_utf32)) {
     return absl_ports::InternalError(
         "Couldn't reset tokenizer to determine snippet window!");
   }
@@ -219,7 +280,7 @@ libtextclassifier3::StatusOr<CharacterIterator> DetermineWindowEnd(
     const ResultSpecProto::SnippetSpecProto& snippet_spec,
     std::string_view value, int window_end_max_exclusive_utf32,
     Tokenizer::Iterator* iterator) {
-  if (!iterator->ResetToTokenBefore(window_end_max_exclusive_utf32)) {
+  if (!iterator->ResetToTokenEndingBefore(window_end_max_exclusive_utf32)) {
     return absl_ports::InternalError(
         "Couldn't reset tokenizer to determine snippet window!");
   }
@@ -283,9 +344,9 @@ libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
   int match_len_utf32 = end_itr.utf32_index() - match_pos_utf32;
   int match_mid_utf32 = match_pos_utf32 + match_len_utf32 / 2;
   int window_start_min_exclusive_utf32 =
-      (match_mid_utf32 - snippet_spec.max_window_bytes() / 2) - 1;
+      (match_mid_utf32 - snippet_spec.max_window_utf32_length() / 2) - 1;
   int window_end_max_exclusive_utf32 =
-      match_mid_utf32 + (snippet_spec.max_window_bytes() + 1) / 2;
+      match_mid_utf32 + (snippet_spec.max_window_utf32_length() + 1) / 2;
 
   snippet_match.set_exact_match_byte_position(start_itr.utf8_index());
   snippet_match.set_exact_match_utf16_position(start_itr.utf16_index());
@@ -296,7 +357,7 @@ libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
 
   // Only include windows if it'll at least include the matched text. Otherwise,
   // it'll just be an empty string anyways.
-  if (snippet_spec.max_window_bytes() >= match_len_utf32) {
+  if (snippet_spec.max_window_utf32_length() >= match_len_utf32) {
     // Find the beginning of the window.
     ICING_ASSIGN_OR_RETURN(
         CharacterIterator window_start,
@@ -337,8 +398,13 @@ libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
 
     // DetermineWindowStart/End may change the position of the iterator. So,
     // reset the iterator back to the original position.
-    bool success = (match_pos_utf32 > 0) ? iterator->ResetToTokenAfter(match_pos_utf32 - 1)
-                                   : iterator->ResetToStart();
+    bool success = false;
+    if (match_pos_utf32 > 0) {
+      success = iterator->ResetToTokenStartingAfter(match_pos_utf32 - 1);
+    } else {
+      success = iterator->ResetToStart();
+    }
+
     if (!success) {
       return absl_ports::InternalError(
           "Couldn't reset tokenizer to determine snippet window!");
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index f811941..0de2295 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -22,7 +22,6 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/mock-filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
 #include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
@@ -37,6 +36,7 @@
 #include "icing/store/key-mapper.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/snippet-helpers.h"
 #include "icing/testing/test-data.h"
@@ -58,16 +58,18 @@ using ::testing::Eq;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
-    PropertyConfigProto_Cardinality_Code_REPEATED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
+    PropertyConfigProto::Cardinality::REPEATED;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_VERBATIM =
+    StringIndexingConfig::TokenizerType::VERBATIM;
 
-constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
-constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
 
 std::vector<std::string_view> GetPropertyPaths(const SnippetProto& snippet) {
   std::vector<std::string_view> paths;
@@ -131,7 +133,7 @@ class SnippetRetrieverTest : public testing::Test {
     snippet_spec_.set_num_to_snippet(std::numeric_limits<int32_t>::max());
     snippet_spec_.set_num_matches_per_property(
         std::numeric_limits<int32_t>::max());
-    snippet_spec_.set_max_window_bytes(64);
+    snippet_spec_.set_max_window_utf32_length(64);
   }
 
   void TearDown() override {
@@ -178,7 +180,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeSmallerThanMatch) {
 
   // Window starts at the beginning of "three" and ends in the middle of
   // "three". len=4, orig_window= "thre"
-  snippet_spec_.set_max_window_bytes(4);
+  snippet_spec_.set_max_window_utf32_length(4);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -204,7 +206,7 @@ TEST_F(SnippetRetrieverTest,
 
   // Window starts at the beginning of "three" and at the exact end of
   // "three". len=5, orig_window= "three"
-  snippet_spec_.set_max_window_bytes(5);
+  snippet_spec_.set_max_window_utf32_length(5);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -230,7 +232,7 @@ TEST_F(SnippetRetrieverTest,
 
   // Window starts at the beginning of "four" and at the exact end of
   // "four". len=4, orig_window= "four"
-  snippet_spec_.set_max_window_bytes(4);
+  snippet_spec_.set_max_window_utf32_length(4);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -262,7 +264,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
   //   1. untrimmed, no-shifting window will be (2,17).
   //   2. trimmed, no-shifting window [4,13) "two three"
   //   3. trimmed, shifted window [4,18) "two three four"
-  snippet_spec_.set_max_window_bytes(14);
+  snippet_spec_.set_max_window_utf32_length(14);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -295,7 +297,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsMidToken) {
   //   1. untrimmed, no-shifting window will be (1,18).
   //   2. trimmed, no-shifting window [4,18) "two three four"
   //   3. trimmed, shifted window [4,20) "two three four.."
-  snippet_spec_.set_max_window_bytes(16);
+  snippet_spec_.set_max_window_utf32_length(16);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -321,7 +323,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInPunctuation) {
 
   // Window ends in the middle of all the punctuation and window starts at 0.
   // len=20, orig_window="one two three four.."
-  snippet_spec_.set_max_window_bytes(20);
+  snippet_spec_.set_max_window_utf32_length(20);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -349,7 +351,7 @@ TEST_F(SnippetRetrieverTest,
 
   // Window ends in the middle of all the punctuation and window starts at 0.
   // len=26, orig_window="pside down in Australia¿"
-  snippet_spec_.set_max_window_bytes(24);
+  snippet_spec_.set_max_window_utf32_length(24);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -377,7 +379,7 @@ TEST_F(SnippetRetrieverTest,
 
   // Window ends in the middle of all the punctuation and window starts at 0.
   // len=26, orig_window="upside down in Australia¿ "
-  snippet_spec_.set_max_window_bytes(26);
+  snippet_spec_.set_max_window_utf32_length(26);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -410,7 +412,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsBeforeValueStart) {
   //   1. untrimmed, no-shifting window will be (-2,21).
   //   2. trimmed, no-shifting window [0,21) "one two three four..."
   //   3. trimmed, shifted window [0,22) "one two three four...."
-  snippet_spec_.set_max_window_bytes(22);
+  snippet_spec_.set_max_window_utf32_length(22);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -436,7 +438,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInWhitespace) {
 
   // Window ends before "five" but after all the punctuation
   // len=26, orig_window="one two three four.... "
-  snippet_spec_.set_max_window_bytes(26);
+  snippet_spec_.set_max_window_utf32_length(26);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -469,7 +471,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsMidToken) {
   //   1. untrimmed, no-shifting window will be ((-7,26).
   //   2. trimmed, no-shifting window [0,26) "one two three four...."
   //   3. trimmed, shifted window [0,27) "one two three four.... five"
-  snippet_spec_.set_max_window_bytes(32);
+  snippet_spec_.set_max_window_utf32_length(32);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -495,7 +497,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeEqualToValueSize) {
 
   // Max window size equals the size of the value.
   // len=34, orig_window="one two three four.... five"
-  snippet_spec_.set_max_window_bytes(34);
+  snippet_spec_.set_max_window_utf32_length(34);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -521,7 +523,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeLargerThanValueSize) {
 
   // Max window size exceeds the size of the value.
   // len=36, orig_window="one two three four.... five"
-  snippet_spec_.set_max_window_bytes(36);
+  snippet_spec_.set_max_window_utf32_length(36);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -555,7 +557,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextStart) {
   //   1. untrimmed, no-shifting window will be (-10,19).
   //   2. trimmed, no-shifting window [0,19) "one two three four."
   //   3. trimmed, shifted window [0,27) "one two three four.... five"
-  snippet_spec_.set_max_window_bytes(28);
+  snippet_spec_.set_max_window_utf32_length(28);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -589,7 +591,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextEnd) {
   //   1. untrimmed, no-shifting window will be (10,39).
   //   2. trimmed, no-shifting window [14,31) "four.... five six"
   //   3. trimmed, shifted window [4,31) "two three four.... five six"
-  snippet_spec_.set_max_window_bytes(28);
+  snippet_spec_.set_max_window_utf32_length(28);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -623,7 +625,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextStartShortText) {
   //   1. untrimmed, no-shifting window will be (-10,19).
   //   2. trimmed, no-shifting window [0, 19) "one two three four."
   //   3. trimmed, shifted window [0, 22) "one two three four...."
-  snippet_spec_.set_max_window_bytes(28);
+  snippet_spec_.set_max_window_utf32_length(28);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -657,7 +659,7 @@ TEST_F(SnippetRetrieverTest, SnippetingWindowMatchAtTextEndShortText) {
   //   1. untrimmed, no-shifting window will be (1,30).
   //   2. trimmed, no-shifting window [4, 22) "two three four...."
   //   3. trimmed, shifted window [0, 22) "one two three four...."
-  snippet_spec_.set_max_window_bytes(28);
+  snippet_spec_.set_max_window_utf32_length(28);
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
 
@@ -721,7 +723,7 @@ TEST_F(SnippetRetrieverTest, SimpleSnippetingNoWindowing) {
           .AddStringProperty("body", "Only a fool would match this content.")
           .Build();
 
-  snippet_spec_.set_max_window_bytes(0);
+  snippet_spec_.set_max_window_utf32_length(0);
 
   SectionIdMask section_mask = 0b00000011;
   SectionRestrictQueryTermsMap query_terms{{"", {"foo"}}};
@@ -1473,7 +1475,7 @@ TEST_F(SnippetRetrieverTest, CJKSnippetWindowTest) {
   //   1. untrimmed, no-shifting window will be (0,7).
   //   2. trimmed, no-shifting window [1, 6) "每天走路去".
   //   3. trimmed, shifted window [0, 6) "我每天走路去"
-  snippet_spec_.set_max_window_bytes(6);
+  snippet_spec_.set_max_window_utf32_length(6);
 
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
@@ -1572,7 +1574,7 @@ TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitWindowTest) {
   // UTF8 idx:       9   22
   // UTF16 idx:      5   12
   // UTF32 idx:      3   7
-  snippet_spec_.set_max_window_bytes(6);
+  snippet_spec_.set_max_window_utf32_length(6);
 
   SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
       query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
@@ -1596,6 +1598,117 @@ TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitWindowTest) {
   EXPECT_THAT(match_proto.window_utf16_length(), Eq(7));
 }
 
+TEST_F(SnippetRetrieverTest, SnippettingVerbatimAscii) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("verbatimType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("verbatim")
+                                        .SetDataTypeString(MATCH_EXACT,
+                                                           TOKENIZER_VERBATIM)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "verbatim/1")
+                               .SetSchema("verbatimType")
+                               .AddStringProperty("verbatim", "Hello, world!")
+                               .Build();
+
+  SectionIdMask section_mask = 0b00000001;
+  SectionRestrictQueryTermsMap query_terms{{"", {"Hello, world!"}}};
+
+  snippet_spec_.set_max_window_utf32_length(13);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+  // There should only be one snippet entry and match, the verbatim token in its
+  // entirety.
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  ASSERT_THAT(entry->property_name(), "verbatim");
+
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+  // We expect the match to begin at position 0, and to span the entire token
+  // which contains 13 characters.
+  EXPECT_THAT(match_proto.window_byte_position(), Eq(0));
+  EXPECT_THAT(match_proto.window_utf16_length(), Eq(13));
+
+  // We expect the submatch to begin at position 0 of the verbatim token and
+  // span the length of our query term "Hello, world!", which has utf-16 length
+  // of 13. The submatch length is equal to the window length as the query the
+  // snippet is retrieved with an exact term match.
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(0));
+  EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(13));
+}
+
+TEST_F(SnippetRetrieverTest, SnippettingVerbatimCJK) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("verbatimType")
+                       .AddProperty(PropertyConfigBuilder()
+                                        .SetName("verbatim")
+                                        .SetDataTypeString(MATCH_PREFIX,
+                                                           TOKENIZER_VERBATIM)
+                                        .SetCardinality(CARDINALITY_REPEATED)))
+          .Build();
+  ICING_ASSERT_OK(schema_store_->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      snippet_retriever_,
+      SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+                               normalizer_.get()));
+
+  // String:     "我每天走路去上班。"
+  //              ^ ^  ^   ^^
+  // UTF8 idx:    0 3  9  15 18
+  // UTF16 idx:   0 1  3   5 6
+  // UTF32 idx:   0 1  3   5 6
+  // Breaks into segments: "我", "每天", "走路", "去", "上班"
+  std::string chinese_string = "我每天走路去上班。";
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "verbatim/1")
+                               .SetSchema("verbatimType")
+                               .AddStringProperty("verbatim", chinese_string)
+                               .Build();
+
+  SectionIdMask section_mask = 0b00000001;
+  SectionRestrictQueryTermsMap query_terms{{"", {"我每"}}};
+
+  snippet_spec_.set_max_window_utf32_length(9);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+  // There should only be one snippet entry and match, the verbatim token in its
+  // entirety.
+  ASSERT_THAT(snippet.entries(), SizeIs(1));
+
+  const SnippetProto::EntryProto* entry = &snippet.entries(0);
+  ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+  ASSERT_THAT(entry->property_name(), "verbatim");
+
+  const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+  // We expect the match to begin at position 0, and to span the entire token
+  // which has utf-16 length of 9.
+  EXPECT_THAT(match_proto.window_byte_position(), Eq(0));
+  EXPECT_THAT(match_proto.window_utf16_length(), Eq(9));
+
+  // We expect the submatch to begin at position 0 of the verbatim token and
+  // span the length of our query term "我每", which has utf-16 length of 2.
+  EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(0));
+  EXPECT_THAT(match_proto.submatch_utf16_length(), Eq(2));
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index 67528ab..acc5030 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -268,7 +268,7 @@ libtextclassifier3::Status SchemaStore::UpdateHeader(const Crc32& checksum) {
 libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   schema_type_mapper_.reset();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = KeyMapper<SchemaTypeId>::Delete(
       filesystem_, MakeSchemaTypeMapperFilename(base_dir_));
@@ -464,11 +464,8 @@ libtextclassifier3::Status SchemaStore::PersistToDisk() {
 SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
   SchemaStoreStorageInfoProto storage_info;
   int64_t directory_size = filesystem_.GetDiskUsage(base_dir_.c_str());
-  if (directory_size != Filesystem::kBadFileSize) {
-    storage_info.set_schema_store_size(directory_size);
-  } else {
-    storage_info.set_schema_store_size(-1);
-  }
+  storage_info.set_schema_store_size(
+      Filesystem::SanitizeFileSize(directory_size));
   ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, GetSchema(), storage_info);
   storage_info.set_num_schema_types(schema->types_size());
   int total_sections = 0;
@@ -496,5 +493,17 @@ SchemaStore::GetSectionMetadata(const std::string& schema_type) const {
   return section_manager_->GetMetadataList(schema_type);
 }
 
+libtextclassifier3::StatusOr<SchemaDebugInfoProto> SchemaStore::GetDebugInfo()
+    const {
+  SchemaDebugInfoProto debug_info;
+  if (has_schema_successfully_set_) {
+    ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, GetSchema());
+    *debug_info.mutable_schema() = *schema;
+  }
+  ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
+  debug_info.set_crc(crc.Get());
+  return debug_info;
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 6b6528d..2d3aca7 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -26,6 +26,7 @@
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/file/file-backed-proto.h"
 #include "icing/file/filesystem.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/logging.pb.h"
 #include "icing/proto/schema.pb.h"
@@ -137,9 +138,7 @@ class SchemaStore {
   // Persists and updates checksum of subcomponents.
   ~SchemaStore();
 
-  // Retrieve the current schema if it exists. Caller does not get ownership of
-  // the schema proto and modifying the returned pointer does not affect the
-  // underlying schema proto.
+  // Retrieve the current schema if it exists.
   //
   // Returns:
   //   SchemaProto* if exists
@@ -258,6 +257,13 @@ class SchemaStore {
   // that field will be set to -1.
   SchemaStoreStorageInfoProto GetStorageInfo() const;
 
+  // Get debug information for the schema store.
+  //
+  // Returns:
+  //   SchemaDebugInfoProto on success
+  //   INTERNAL_ERROR on IO errors, crc compute error
+  libtextclassifier3::StatusOr<SchemaDebugInfoProto> GetDebugInfo() const;
+
  private:
   // Use SchemaStore::Create instead.
   explicit SchemaStore(const Filesystem* filesystem, std::string base_dir,
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index be7170f..113084e 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -44,23 +44,24 @@ using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::Ge;
+using ::testing::Gt;
 using ::testing::Not;
 using ::testing::Pointee;
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
-    PropertyConfigProto_Cardinality_Code_REPEATED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
+    PropertyConfigProto::Cardinality::REPEATED;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
 
-constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
 
-constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
-    PropertyConfigProto_DataType_Code_STRING;
-constexpr PropertyConfigProto_DataType_Code TYPE_DOUBLE =
-    PropertyConfigProto_DataType_Code_DOUBLE;
+constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
+    PropertyConfigProto::DataType::STRING;
+constexpr PropertyConfigProto::DataType::Code TYPE_DOUBLE =
+    PropertyConfigProto::DataType::DOUBLE;
 
 class SchemaStoreTest : public ::testing::Test {
  protected:
@@ -868,6 +869,38 @@ TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
   EXPECT_THAT(storage_info.num_schema_types_sections_exhausted(), Eq(1));
 }
 
+TEST_F(SchemaStoreTest, GetDebugInfo) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+  // Set schema
+  ASSERT_THAT(
+      schema_store->SetSchema(schema_),
+      IsOkAndHolds(EqualsSetSchemaResult(SchemaStore::SetSchemaResult{
+          .success = true,
+          .schema_types_new_by_name = {schema_.types(0).schema_type()}})));
+
+  // Check debug info
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaDebugInfoProto out,
+                             schema_store->GetDebugInfo());
+  EXPECT_THAT(out.schema(), EqualsProto(schema_));
+  EXPECT_THAT(out.crc(), Gt(0));
+}
+
+TEST_F(SchemaStoreTest, GetDebugInfoForEmptySchemaStore) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+  // Check debug info before setting a schema
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaDebugInfoProto out,
+                             schema_store->GetDebugInfo());
+  SchemaDebugInfoProto expected_out;
+  expected_out.set_crc(0);
+  EXPECT_THAT(out, EqualsProto(expected_out));
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index 26ef4c7..f28a2f8 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -38,32 +38,32 @@ constexpr char kEmailType[] = "EmailMessage";
 constexpr char kMessageType[] = "Text";
 constexpr char kPersonType[] = "Person";
 
-constexpr PropertyConfigProto_DataType_Code TYPE_DOCUMENT =
-    PropertyConfigProto_DataType_Code_DOCUMENT;
-constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
-    PropertyConfigProto_DataType_Code_STRING;
-constexpr PropertyConfigProto_DataType_Code TYPE_INT =
-    PropertyConfigProto_DataType_Code_INT64;
-constexpr PropertyConfigProto_DataType_Code TYPE_DOUBLE =
-    PropertyConfigProto_DataType_Code_DOUBLE;
-
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_UNKNOWN =
-    PropertyConfigProto_Cardinality_Code_UNKNOWN;
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
-    PropertyConfigProto_Cardinality_Code_REQUIRED;
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
-    PropertyConfigProto_Cardinality_Code_REPEATED;
-
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
-    StringIndexingConfig_TokenizerType_Code_NONE;
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
-
-constexpr TermMatchType_Code MATCH_UNKNOWN = TermMatchType_Code_UNKNOWN;
-constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
-constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+constexpr PropertyConfigProto::DataType::Code TYPE_DOCUMENT =
+    PropertyConfigProto::DataType::DOCUMENT;
+constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
+    PropertyConfigProto::DataType::STRING;
+constexpr PropertyConfigProto::DataType::Code TYPE_INT =
+    PropertyConfigProto::DataType::INT64;
+constexpr PropertyConfigProto::DataType::Code TYPE_DOUBLE =
+    PropertyConfigProto::DataType::DOUBLE;
+
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_UNKNOWN =
+    PropertyConfigProto::Cardinality::UNKNOWN;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
+    PropertyConfigProto::Cardinality::REQUIRED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
+    PropertyConfigProto::Cardinality::REPEATED;
+
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_NONE =
+    StringIndexingConfig::TokenizerType::NONE;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
+
+constexpr TermMatchType::Code MATCH_UNKNOWN = TermMatchType::UNKNOWN;
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_PREFIX = TermMatchType::PREFIX;
 
 TEST(SchemaUtilTest, DependencyGraphAlphabeticalOrder) {
   // Create a schema with the following dependencies:
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index f22a31a..fef612d 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -40,11 +40,11 @@ namespace lib {
 namespace {
 using ::testing::Eq;
 
-constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
-    PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
+    PropertyConfigProto::DataType::STRING;
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
-    PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
+    PropertyConfigProto::Cardinality::REQUIRED;
 
 class ScorerTest : public testing::Test {
  protected:
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index 7e5cb0f..f169039 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -34,14 +34,16 @@ namespace lib {
 
 namespace {
 using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Gt;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
 
-constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
-    PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
+    PropertyConfigProto::DataType::STRING;
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
 
 class ScoringProcessorTest : public testing::Test {
  protected:
@@ -789,6 +791,77 @@ TEST_F(ScoringProcessorTest,
               ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit)));
 }
 
+TEST_F(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_WithZeroPropertyWeight) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/1));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store()->Put(document2, /*num_tokens=*/1));
+
+  // Document 1 contains the term "foo" 1 time in the "body" property
+  SectionId body_section_id = 0;
+  DocHitInfo doc_hit_info1(document_id1);
+  doc_hit_info1.UpdateSection(body_section_id, /*hit_term_frequency=*/1);
+
+  // Document 2 contains the term "foo" 1 time in the "subject" property
+  SectionId subject_section_id = 1;
+  DocHitInfo doc_hit_info2(document_id2);
+  doc_hit_info2.UpdateSection(subject_section_id, /*hit_term_frequency=*/1);
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2};
+
+  // Creates a dummy DocHitInfoIterator with 2 results for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+
+  // Sets property weight for "body" to 0.0.
+  PropertyWeight body_property_weight =
+      CreatePropertyWeight(/*path=*/"body", /*weight=*/0.0);
+  // Sets property weight for "subject" to 1.0.
+  PropertyWeight subject_property_weight =
+      CreatePropertyWeight(/*path=*/"subject", /*weight=*/1.0);
+  *spec_proto.add_type_property_weights() = CreateTypePropertyWeights(
+      /*schema_type=*/"email", {body_property_weight, subject_property_weight});
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store(), schema_store()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  std::vector<ScoredDocumentHit> scored_document_hits =
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/2, &query_term_iterators);
+
+  // We expect document1 to have a score of 0.0 as the query term "foo" matches
+  // in the "body" property which has a weight of 0.0. This is a result of the
+  // weighted term frequency being scaled down to 0.0 for the hit. We expect
+  // document2 to have a positive score as the query term "foo" matches in the
+  // "subject" property which has a weight of 1.0.
+  EXPECT_THAT(scored_document_hits, SizeIs(2));
+  EXPECT_THAT(scored_document_hits.at(0).document_id(), Eq(document_id1));
+  EXPECT_THAT(scored_document_hits.at(0).score(), Eq(0.0));
+  EXPECT_THAT(scored_document_hits.at(1).document_id(), Eq(document_id2));
+  EXPECT_THAT(scored_document_hits.at(1).score(), Gt(0.0));
+}
+
 TEST_F(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
   DocumentProto document1 =
       CreateDocument("icing", "email/1", kDefaultScore,
diff --git a/icing/scoring/section-weights.cc b/icing/scoring/section-weights.cc
index c4afe7f..ed7cd5e 100644
--- a/icing/scoring/section-weights.cc
+++ b/icing/scoring/section-weights.cc
@@ -27,10 +27,14 @@ namespace lib {
 
 namespace {
 
-// Normalizes all weights in the map to be in range (0.0, 1.0], where the max
-// weight is normalized to 1.0.
+// Normalizes all weights in the map to be in range [0.0, 1.0], where the max
+// weight is normalized to 1.0. In the case that all weights are equal to 0.0,
+// the normalized weight for each will be 0.0.
 inline void NormalizeSectionWeights(
     double max_weight, std::unordered_map<SectionId, double>& section_weights) {
+  if (max_weight == 0.0) {
+    return;
+  }
   for (auto& raw_weight : section_weights) {
     raw_weight.second = raw_weight.second / max_weight;
   }
@@ -70,11 +74,11 @@ SectionWeights::Create(const SchemaStore* schema_store,
          type_property_weights.property_weights()) {
       double property_path_weight = property_weight.weight();
 
-      // Return error on negative and zero weights.
-      if (property_path_weight <= 0.0) {
+      // Return error on negative weights.
+      if (property_path_weight < 0.0) {
         return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
-            "Property weight for property path \"%s\" is negative or zero. "
-            "Negative and zero weights are invalid.",
+            "Property weight for property path \"%s\" is negative. Negative "
+            "weights are invalid.",
             property_weight.path().c_str()));
       }
       property_paths_weights.insert(
@@ -116,7 +120,7 @@ inline SectionWeights::NormalizedSectionWeights
 SectionWeights::ExtractNormalizedSectionWeights(
     const std::unordered_map<std::string, double>& raw_weights,
     const std::vector<SectionMetadata>& metadata_list) {
-  double max_weight = 0.0;
+  double max_weight = -std::numeric_limits<double>::infinity();
   std::unordered_map<SectionId, double> section_weights;
   for (const SectionMetadata& section_metadata : metadata_list) {
     std::string_view metadata_path = section_metadata.path;
@@ -132,10 +136,11 @@ SectionWeights::ExtractNormalizedSectionWeights(
 
   NormalizeSectionWeights(max_weight, section_weights);
   // Set normalized default weight to 1.0 in case there is no section
-  // metadata and max_weight is 0.0 (we should not see this case).
-  double normalized_default_weight = max_weight == 0.0
-                                         ? kDefaultSectionWeight
-                                         : kDefaultSectionWeight / max_weight;
+  // metadata and max_weight is -INF (we should not see this case).
+  double normalized_default_weight =
+      max_weight == -std::numeric_limits<double>::infinity()
+          ? kDefaultSectionWeight
+          : kDefaultSectionWeight / max_weight;
   SectionWeights::NormalizedSectionWeights normalized_section_weights =
       SectionWeights::NormalizedSectionWeights();
   normalized_section_weights.section_weights = std::move(section_weights);
diff --git a/icing/scoring/section-weights_test.cc b/icing/scoring/section-weights_test.cc
index b90c3d5..330faee 100644
--- a/icing/scoring/section-weights_test.cc
+++ b/icing/scoring/section-weights_test.cc
@@ -48,13 +48,13 @@ class SectionWeightsTest : public testing::Test {
     SchemaTypeConfigProto sender_schema =
         SchemaTypeConfigBuilder()
             .SetType("sender")
-            .AddProperty(PropertyConfigBuilder()
-                             .SetName("name")
-                             .SetDataTypeString(
-                                 TermMatchType::PREFIX,
-                                 StringIndexingConfig::TokenizerType::PLAIN)
-                             .SetCardinality(
-                                 PropertyConfigProto_Cardinality_Code_OPTIONAL))
+            .AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("name")
+                    .SetDataTypeString(
+                        TermMatchType::PREFIX,
+                        StringIndexingConfig::TokenizerType::PLAIN)
+                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
             .Build();
     SchemaTypeConfigProto email_schema =
         SchemaTypeConfigBuilder()
@@ -65,24 +65,22 @@ class SectionWeightsTest : public testing::Test {
                     .SetDataTypeString(
                         TermMatchType::PREFIX,
                         StringIndexingConfig::TokenizerType::PLAIN)
-                    .SetDataType(PropertyConfigProto_DataType_Code_STRING)
-                    .SetCardinality(
-                        PropertyConfigProto_Cardinality_Code_OPTIONAL))
+                    .SetDataType(PropertyConfigProto::DataType::STRING)
+                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
             .AddProperty(
                 PropertyConfigBuilder()
                     .SetName("body")
                     .SetDataTypeString(
                         TermMatchType::PREFIX,
                         StringIndexingConfig::TokenizerType::PLAIN)
-                    .SetDataType(PropertyConfigProto_DataType_Code_STRING)
-                    .SetCardinality(
-                        PropertyConfigProto_Cardinality_Code_OPTIONAL))
-            .AddProperty(PropertyConfigBuilder()
-                             .SetName("sender")
-                             .SetDataTypeDocument(
-                                 "sender", /*index_nested_properties=*/true)
-                             .SetCardinality(
-                                 PropertyConfigProto_Cardinality_Code_OPTIONAL))
+                    .SetDataType(PropertyConfigProto::DataType::STRING)
+                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
+            .AddProperty(
+                PropertyConfigBuilder()
+                    .SetName("sender")
+                    .SetDataTypeDocument("sender",
+                                         /*index_nested_properties=*/true)
+                    .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL))
             .Build();
     SchemaProto schema =
         SchemaBuilder().AddType(sender_schema).AddType(email_schema).Build();
@@ -171,20 +169,79 @@ TEST_F(SectionWeightsTest, ShouldFailWithNegativeWeights) {
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }
 
-TEST_F(SectionWeightsTest, ShouldFailWithZeroWeight) {
+TEST_F(SectionWeightsTest, ShouldAcceptZeroWeight) {
   ScoringSpecProto spec_proto;
 
   TypePropertyWeights *type_property_weights =
       spec_proto.add_type_property_weights();
-  type_property_weights->set_schema_type("sender");
+  type_property_weights->set_schema_type("email");
 
-  PropertyWeight *property_weight =
+  PropertyWeight *body_property_weight =
       type_property_weights->add_property_weights();
-  property_weight->set_weight(0.0);
-  property_weight->set_path("name");
+  body_property_weight->set_weight(2.0);
+  body_property_weight->set_path("body");
 
-  EXPECT_THAT(SectionWeights::Create(schema_store(), spec_proto).status(),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  PropertyWeight *subject_property_weight =
+      type_property_weights->add_property_weights();
+  subject_property_weight->set_weight(0.0);
+  subject_property_weight->set_path("subject");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                             schema_store()->GetSchemaTypeId("email"));
+
+  // Normalized weight for "body" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/0),
+              Eq(1.0));
+  // Normalized weight for "subject" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/2),
+              Eq(0.0));
+}
+
+TEST_F(SectionWeightsTest, ShouldNormalizeToZeroWhenAllWeightsZero) {
+  ScoringSpecProto spec_proto;
+
+  TypePropertyWeights *type_property_weights =
+      spec_proto.add_type_property_weights();
+  type_property_weights->set_schema_type("email");
+
+  PropertyWeight *body_property_weight =
+      type_property_weights->add_property_weights();
+  body_property_weight->set_weight(0.0);
+  body_property_weight->set_path("body");
+
+  PropertyWeight *sender_property_weight =
+      type_property_weights->add_property_weights();
+  sender_property_weight->set_weight(0.0);
+  sender_property_weight->set_path("sender.name");
+
+  PropertyWeight *subject_property_weight =
+      type_property_weights->add_property_weights();
+  subject_property_weight->set_weight(0.0);
+  subject_property_weight->set_path("subject");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SectionWeights> section_weights,
+      SectionWeights::Create(schema_store(), spec_proto));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                             schema_store()->GetSchemaTypeId("email"));
+
+  // Normalized weight for "body" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/0),
+              Eq(0.0));
+  // Normalized weight for "sender.name" property (the nested property).
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/1),
+              Eq(0.0));
+  // Normalized weight for "subject" property.
+  EXPECT_THAT(section_weights->GetNormalizedSectionWeight(email_schema_type_id,
+                                                          /*section_id=*/2),
+              Eq(0.0));
 }
 
 TEST_F(SectionWeightsTest, ShouldReturnDefaultIfTypePropertyWeightsNotSet) {
diff --git a/icing/store/document-log-creator.cc b/icing/store/document-log-creator.cc
index 5e0426e..5e23a8e 100644
--- a/icing/store/document-log-creator.cc
+++ b/icing/store/document-log-creator.cc
@@ -72,19 +72,20 @@ DocumentLogCreator::Create(const Filesystem* filesystem,
   bool v1_exists =
       filesystem->FileExists(MakeDocumentLogFilenameV1(base_dir).c_str());
 
-  bool regen_derived_files = false;
+  bool new_file = false;
+  int preexisting_file_version = kCurrentVersion;
   if (v0_exists && !v1_exists) {
     ICING_RETURN_IF_ERROR(MigrateFromV0ToV1(filesystem, base_dir));
 
     // Need to regenerate derived files since documents may be written to a
     // different file offset in the log.
-    regen_derived_files = true;
+    preexisting_file_version = 0;
   } else if (!v1_exists) {
     // First time initializing a v1 log. There are no existing derived files at
     // this point, so we should generate some. "regenerate" here also means
     // "generate for the first time", i.e. we shouldn't expect there to be any
     // existing derived files.
-    regen_derived_files = true;
+    new_file = true;
   }
 
   ICING_ASSIGN_OR_RETURN(
@@ -96,7 +97,7 @@ DocumentLogCreator::Create(const Filesystem* filesystem,
               /*compress_in=*/true)));
 
   CreateResult create_result = {std::move(log_create_result),
-                                regen_derived_files};
+                                preexisting_file_version, new_file};
   return create_result;
 }
 
diff --git a/icing/store/document-log-creator.h b/icing/store/document-log-creator.h
index 51cf497..be8feed 100644
--- a/icing/store/document-log-creator.h
+++ b/icing/store/document-log-creator.h
@@ -30,14 +30,20 @@ namespace lib {
 // be necessary.
 class DocumentLogCreator {
  public:
+  // Version 0 refers to FileBackedProtoLog
+  // Version 1 refers to PortableFileBackedProtoLog with kFileFormatVersion = 0
+  static constexpr int32_t kCurrentVersion = 1;
   struct CreateResult {
     // The create result passed up from the PortableFileBackedProtoLog::Create.
     // Contains the document log.
     PortableFileBackedProtoLog<DocumentWrapper>::CreateResult log_create_result;
 
-    // Whether the caller needs to also regenerate/generate any derived files
-    // based off of the initialized document log.
-    bool regen_derived_files;
+    // The version number of the pre-existing document log file.
+    // If there is no document log file, it will be set to kCurrentVersion.
+    int preexisting_file_version;
+
+    // Whether the created file is new.
+    bool new_file;
   };
 
   // Creates the document log in the base_dir. Will create one if it doesn't
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 226a96b..8c8369c 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -164,6 +164,32 @@ int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
   return expiration_timestamp_ms;
 }
 
+InitializeStatsProto::RecoveryCause GetRecoveryCause(
+    const DocumentLogCreator::CreateResult& create_result,
+    bool force_recovery_and_revalidate_documents) {
+  if (force_recovery_and_revalidate_documents) {
+    return InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC;
+  } else if (create_result.log_create_result.has_data_loss()) {
+    return InitializeStatsProto::DATA_LOSS;
+  } else if (create_result.preexisting_file_version !=
+             DocumentLogCreator::kCurrentVersion) {
+    return InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT;
+  }
+  return InitializeStatsProto::NONE;
+}
+
+InitializeStatsProto::DocumentStoreDataStatus GetDataStatus(
+    DataLoss data_loss) {
+  switch (data_loss) {
+    case DataLoss::PARTIAL:
+      return InitializeStatsProto::PARTIAL_LOSS;
+    case DataLoss::COMPLETE:
+      return InitializeStatsProto::COMPLETE_LOSS;
+    case DataLoss::NONE:
+      return InitializeStatsProto::NO_DATA_LOSS;
+  }
+}
+
 }  // namespace
 
 DocumentStore::DocumentStore(const Filesystem* filesystem,
@@ -236,44 +262,34 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
       std::move(create_result_or).ValueOrDie();
 
   document_log_ = std::move(create_result.log_create_result.proto_log);
-
-  if (create_result.regen_derived_files ||
-      force_recovery_and_revalidate_documents ||
-      create_result.log_create_result.has_data_loss()) {
+  InitializeStatsProto::RecoveryCause recovery_cause =
+      GetRecoveryCause(create_result, force_recovery_and_revalidate_documents);
+
+  if (recovery_cause != InitializeStatsProto::NONE || create_result.new_file) {
+    ICING_LOG(WARNING) << "Starting Document Store Recovery with cause="
+        << recovery_cause << ", and create result { new_file="
+        << create_result.new_file << ", preeisting_file_version="
+        << create_result.preexisting_file_version << ", data_loss="
+        << create_result.log_create_result.data_loss << "} and kCurrentVersion="
+        << DocumentLogCreator::kCurrentVersion;
     // We can't rely on any existing derived files. Recreate them from scratch.
     // Currently happens if:
     //   1) This is a new log and we don't have derived files yet
     //   2) Client wanted us to force a regeneration.
     //   3) Log has some data loss, can't rely on existing derived data.
-    if (create_result.log_create_result.has_data_loss() &&
-        initialize_stats != nullptr) {
-      ICING_LOG(WARNING)
-          << "Data loss in document log, regenerating derived files.";
-      initialize_stats->set_document_store_recovery_cause(
-          InitializeStatsProto::DATA_LOSS);
-
-      if (create_result.log_create_result.data_loss == DataLoss::PARTIAL) {
-        // Ground truth is partially lost.
-        initialize_stats->set_document_store_data_status(
-            InitializeStatsProto::PARTIAL_LOSS);
-      } else {
-        // Ground truth is completely lost.
-        initialize_stats->set_document_store_data_status(
-            InitializeStatsProto::COMPLETE_LOSS);
-      }
-    }
-
     std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
     libtextclassifier3::Status status =
         RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
     if (initialize_stats != nullptr &&
-        (force_recovery_and_revalidate_documents ||
-         create_result.log_create_result.has_data_loss())) {
+        recovery_cause != InitializeStatsProto::NONE) {
       // Only consider it a recovery if the client forced a recovery or there
       // was data loss. Otherwise, this could just be the first time we're
       // initializing and generating derived files.
       initialize_stats->set_document_store_recovery_latency_ms(
           document_recovery_timer->GetElapsedMilliseconds());
+      initialize_stats->set_document_store_recovery_cause(recovery_cause);
+      initialize_stats->set_document_store_data_status(
+          GetDataStatus(create_result.log_create_result.data_loss));
     }
     if (!status.ok()) {
       ICING_LOG(ERROR)
@@ -282,13 +298,13 @@ libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
     }
   } else {
     if (!InitializeExistingDerivedFiles().ok()) {
-      ICING_VLOG(1)
+      ICING_LOG(WARNING)
           << "Couldn't find derived files or failed to initialize them, "
              "regenerating derived files for DocumentStore.";
       std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
       libtextclassifier3::Status status = RegenerateDerivedFiles(
-          /*force_recovery_and_revalidate_documents*/ false);
-      if (initialize_stats != nullptr && num_documents() > 0) {
+          /*force_recovery_and_revalidate_documents=*/false);
+      if (initialize_stats != nullptr) {
         initialize_stats->set_document_store_recovery_cause(
             InitializeStatsProto::IO_ERROR);
         initialize_stats->set_document_store_recovery_latency_ms(
@@ -415,7 +431,19 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
   // Iterates through document log
   auto iterator = document_log_->GetIterator();
   auto iterator_status = iterator.Advance();
+  libtextclassifier3::StatusOr<int64_t> element_size =
+      document_log_->GetElementsFileSize();
+  libtextclassifier3::StatusOr<int64_t> disk_usage =
+      document_log_->GetDiskUsage();
+  if (element_size.ok() && disk_usage.ok()) {
+    ICING_VLOG(1) << "Starting recovery of document store. Document store "
+                     "elements file size:"
+                  << element_size.ValueOrDie()
+                  << ", disk usage=" << disk_usage.ValueOrDie();
+  }
   while (iterator_status.ok()) {
+    ICING_VLOG(2) << "Attempting to read document at offset="
+                  << iterator.GetOffset();
     libtextclassifier3::StatusOr<DocumentWrapper> document_wrapper_or =
         document_log_->ReadProto(iterator.GetOffset());
 
@@ -530,7 +558,7 @@ libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
 libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   document_key_mapper_.reset();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status =
       KeyMapper<DocumentId>::Delete(*filesystem_, base_dir_);
@@ -540,7 +568,7 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
     return status;
   }
 
-  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
   auto document_key_mapper_or =
       KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize);
@@ -556,7 +584,7 @@ libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
 libtextclassifier3::Status DocumentStore::ResetDocumentIdMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   document_id_mapper_.reset();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = FileBackedVector<int64_t>::Delete(
       *filesystem_, MakeDocumentIdMapperFilename(base_dir_));
@@ -565,7 +593,7 @@ libtextclassifier3::Status DocumentStore::ResetDocumentIdMapper() {
                      << "Failed to delete old document_id mapper";
     return status;
   }
-  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // TODO(b/216487496): Implement a more robust version of TC_ASSIGN_OR_RETURN
   // that can support error logging.
   auto document_id_mapper_or = FileBackedVector<int64_t>::Create(
       *filesystem_, MakeDocumentIdMapperFilename(base_dir_),
@@ -618,7 +646,7 @@ libtextclassifier3::Status DocumentStore::ResetFilterCache() {
 libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   namespace_mapper_.reset();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = KeyMapper<NamespaceId>::Delete(
       *filesystem_, MakeNamespaceMapperFilename(base_dir_));
@@ -638,7 +666,7 @@ libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
 libtextclassifier3::Status DocumentStore::ResetCorpusMapper() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   corpus_mapper_.reset();
-  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
   // that can support error logging.
   libtextclassifier3::Status status = KeyMapper<CorpusId>::Delete(
       *filesystem_, MakeCorpusMapperFilename(base_dir_));
@@ -1749,5 +1777,63 @@ libtextclassifier3::Status DocumentStore::SetUsageScores(
   return usage_store_->SetUsageScores(document_id, usage_scores);
 }
 
+libtextclassifier3::StatusOr<
+    google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
+DocumentStore::CollectCorpusInfo() const {
+  google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>
+      corpus_info;
+  libtextclassifier3::StatusOr<const SchemaProto*> schema_proto_or =
+      schema_store_->GetSchema();
+  if (!schema_proto_or.ok()) {
+    return corpus_info;
+  }
+  // Maps from CorpusId to the corresponding protocol buffer in the result.
+  std::unordered_map<CorpusId, DocumentDebugInfoProto::CorpusInfo*> info_map;
+  std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
+      namespace_mapper_->GetValuesToKeys();
+  const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
+  for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
+       ++document_id) {
+    if (!InternalDoesDocumentExist(document_id)) {
+      continue;
+    }
+    ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
+                           filter_cache_->Get(document_id));
+    ICING_ASSIGN_OR_RETURN(const DocumentAssociatedScoreData* score_data,
+                           score_cache_->Get(document_id));
+    const std::string& name_space =
+        namespace_id_to_namespace[filter_data->namespace_id()];
+    const std::string& schema =
+        schema_proto->types()[filter_data->schema_type_id()].schema_type();
+    auto iter = info_map.find(score_data->corpus_id());
+    if (iter == info_map.end()) {
+      DocumentDebugInfoProto::CorpusInfo* entry = corpus_info.Add();
+      entry->set_namespace_(name_space);
+      entry->set_schema(schema);
+      iter = info_map.insert({score_data->corpus_id(), entry}).first;
+    }
+    iter->second->set_total_documents(iter->second->total_documents() + 1);
+    iter->second->set_total_token(iter->second->total_token() +
+                                  score_data->length_in_tokens());
+  }
+  return corpus_info;
+}
+
+libtextclassifier3::StatusOr<DocumentDebugInfoProto>
+DocumentStore::GetDebugInfo(int verbosity) const {
+  DocumentDebugInfoProto debug_info;
+  *debug_info.mutable_document_storage_info() = GetStorageInfo();
+  ICING_ASSIGN_OR_RETURN(Crc32 crc, ComputeChecksum());
+  debug_info.set_crc(crc.Get());
+  if (verbosity > 0) {
+    ICING_ASSIGN_OR_RETURN(google::protobuf::RepeatedPtrField<
+                               DocumentDebugInfoProto::CorpusInfo>
+                               corpus_info,
+                           CollectCorpusInfo());
+    *debug_info.mutable_corpus_info() = std::move(corpus_info);
+  }
+  return debug_info;
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index c85c989..e6d2e5c 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -27,6 +27,7 @@
 #include "icing/file/file-backed-vector.h"
 #include "icing/file/filesystem.h"
 #include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/proto/debug.pb.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/document_wrapper.pb.h"
 #include "icing/proto/logging.pb.h"
@@ -422,6 +423,17 @@ class DocumentStore {
   //   INTERNAL_ERROR on compute error
   libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const;
 
+  // Get debug information for the document store.
+  // verbosity <= 0, simplest debug information
+  // verbosity > 0, also return the total number of documents and tokens in each
+  // (namespace, schema type) pair.
+  //
+  // Returns:
+  //   DocumentDebugInfoProto on success
+  //   INTERNAL_ERROR on IO errors, crc compute error
+  libtextclassifier3::StatusOr<DocumentDebugInfoProto> GetDebugInfo(
+      int verbosity) const;
+
  private:
   // Use DocumentStore::Create() to instantiate.
   DocumentStore(const Filesystem* filesystem, std::string_view base_dir,
@@ -696,6 +708,13 @@ class DocumentStore {
   //     the document_id_mapper somehow became larger than the filter cache.
   DocumentStorageInfoProto CalculateDocumentStatusCounts(
       DocumentStorageInfoProto storage_info) const;
+
+  // Returns:
+  //   - on success, a RepeatedPtrField for CorpusInfo collected.
+  //   - OUT_OF_RANGE, this should never happen.
+  libtextclassifier3::StatusOr<google::protobuf::RepeatedPtrField<
+      DocumentDebugInfoProto::CorpusInfo>>
+  CollectCorpusInfo() const;
 };
 
 }  // namespace lib
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
index 77da928..fc3fd9d 100644
--- a/icing/store/document-store_benchmark.cc
+++ b/icing/store/document-store_benchmark.cc
@@ -64,13 +64,13 @@ namespace lib {
 
 namespace {
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
 
-constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
 
 class DestructibleDirectory {
  public:
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index a506eea..a30b4e4 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -29,7 +29,6 @@
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
 #include "icing/file/mock-filesystem.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
 #include "icing/portable/platform.h"
 #include "icing/proto/document.pb.h"
@@ -45,6 +44,7 @@
 #include "icing/store/namespace-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
 #include "icing/tokenization/language-segmenter-factory.h"
@@ -85,16 +85,16 @@ const NamespaceStorageInfoProto& GetNamespaceStorageInfo(
   return std::move(NamespaceStorageInfoProto());
 }
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
 
-constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
-    StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig::TokenizerType::Code TOKENIZER_PLAIN =
+    StringIndexingConfig::TokenizerType::PLAIN;
 
-constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType::Code MATCH_EXACT = TermMatchType::EXACT_ONLY;
 
-constexpr PropertyConfigProto_DataType_Code TYPE_INT =
-    PropertyConfigProto_DataType_Code_INT64;
+constexpr PropertyConfigProto::DataType::Code TYPE_INT =
+    PropertyConfigProto::DataType::INT64;
 
 UsageReport CreateUsageReport(std::string name_space, std::string uri,
                               int64 timestamp_ms,
@@ -3170,15 +3170,6 @@ TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
   ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
 }
 
-// TODO(b/185845269) Re-enable this test by copying over a full valid set of
-// document store files. Right now this test only includes the score_cache and
-// the document store header.
-//
-// This causes a problem now because this cl changes behavior to not consider an
-// InitializeExistingDerivedFiles failure to be a recovery if there is nothing
-// to recover because the doocument store is empty.
-#define DISABLE_BACKWARDS_COMPAT_TEST
-#ifndef DISABLE_BACKWARDS_COMPAT_TEST
 TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
   // The directory testdata/score_cache_without_length_in_tokens/document_store
   // contains only the scoring_cache and the document_store_header (holding the
@@ -3194,29 +3185,26 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
 
   // Get src files
   std::string document_store_without_length_in_tokens;
-  if (IsAndroidPlatform() || IsIosPlatform()) {
+  if (IsAndroidArm() || IsIosPlatform()) {
     document_store_without_length_in_tokens = GetTestFilePath(
         "icing/testdata/score_cache_without_length_in_tokens/"
         "document_store_android_ios_compatible");
+  } else if (IsAndroidX86()) {
+    document_store_without_length_in_tokens = GetTestFilePath(
+        "icing/testdata/score_cache_without_length_in_tokens/"
+        "document_store_android_x86");
   } else {
     document_store_without_length_in_tokens = GetTestFilePath(
         "icing/testdata/score_cache_without_length_in_tokens/"
         "document_store");
   }
-  std::vector<std::string> document_store_files;
   Filesystem filesystem;
-  filesystem.ListDirectory(document_store_without_length_in_tokens.c_str(),
-                           &document_store_files);
-
-  ICING_LOG(INFO) << "Copying files " << document_store_without_length_in_tokens
-                  << ' ' << document_store_files.size();
-  for (size_t i = 0; i != document_store_files.size(); i++) {
-    std::string src = absl_ports::StrCat(
-        document_store_without_length_in_tokens, "/", document_store_files[i]);
-    std::string dst =
-        absl_ports::StrCat(document_store_dir_, "/", document_store_files[i]);
-    ASSERT_THAT(filesystem_.CopyFile(src.c_str(), dst.c_str()), true);
-  }
+  ICING_LOG(INFO) << "Copying files "
+                  << document_store_without_length_in_tokens;
+  ASSERT_THAT(
+      filesystem.CopyDirectory(document_store_without_length_in_tokens.c_str(),
+                               document_store_dir_.c_str(), /*recursive=*/true),
+      true);
 
   InitializeStatsProto initialize_stats;
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -3227,12 +3215,11 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
                             &initialize_stats));
   std::unique_ptr<DocumentStore> doc_store =
       std::move(create_result.document_store);
-  // The store_cache trigger regeneration because its element size is
-  // inconsistent: expected 20 (current new size), actual 12 (as per the v0
-  // score_cache).
-  EXPECT_TRUE(initialize_stats.has_document_store_recovery_cause());
+  // The document log is using the legacy v0 format so that a migration is
+  // needed, which will also trigger regeneration.
+  EXPECT_EQ(initialize_stats.document_store_recovery_cause(),
+            InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
 }
-#endif  // DISABLE_BACKWARDS_COMPAT_TEST
 
 TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -3422,18 +3409,22 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
 
   {
     // Create the document store the second time and force recovery
+    InitializeStatsProto initialize_stats;
     ICING_ASSERT_OK_AND_ASSIGN(
         DocumentStore::CreateResult create_result,
-        DocumentStore::Create(
-            &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
-            /*force_recovery_and_revalidate_documents=*/true));
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store.get(),
+                              /*force_recovery_and_revalidate_documents=*/true,
+                              &initialize_stats));
     std::unique_ptr<DocumentStore> doc_store =
         std::move(create_result.document_store);
 
     // Ensure that the type id of the email document has been correctly updated.
     ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
                                doc_store->GetDocumentFilterData(docid));
-    ASSERT_THAT(filter_data.schema_type_id(), Eq(1));
+    EXPECT_THAT(filter_data.schema_type_id(), Eq(1));
+    EXPECT_THAT(initialize_stats.document_store_recovery_cause(),
+                Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
   }
 }
 
@@ -3841,7 +3832,8 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
   // Check that we didn't lose anything. A migration also doesn't technically
   // count as a recovery.
   EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
-  EXPECT_FALSE(initialize_stats.has_document_store_recovery_cause());
+  EXPECT_EQ(initialize_stats.document_store_recovery_cause(),
+            InitializeStatsProto::LEGACY_DOCUMENT_LOG_FORMAT);
 
   // Document 1 and 3 were put normally, and document 2 was deleted in our
   // testdata files.
@@ -3864,6 +3856,164 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
 }
 #endif  // DISABLE_BACKWARDS_COMPAT_TEST
 
+TEST_F(DocumentStoreTest, GetDebugInfo) {
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder()
+                       .SetType("email")
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("subject")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL))
+                       .AddProperty(
+                           PropertyConfigBuilder()
+                               .SetName("body")
+                               .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                               .SetCardinality(CARDINALITY_OPTIONAL)))
+          .AddType(SchemaTypeConfigBuilder().SetType("person").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("name")
+                  .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+                  .SetCardinality(CARDINALITY_OPTIONAL)))
+          .Build();
+  std::string schema_store_dir = schema_store_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+
+  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace1", "email/1")
+                                .SetSchema("email")
+                                .AddStringProperty("subject", "aa bb cc")
+                                .AddStringProperty("body", "dd ee")
+                                .SetCreationTimestampMs(1)
+                                .Build();
+  ICING_ASSERT_OK(document_store->Put(document1, 5));
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace2", "email/2")
+                                .SetSchema("email")
+                                .AddStringProperty("subject", "aa bb")
+                                .AddStringProperty("body", "cc")
+                                .SetCreationTimestampMs(1)
+                                .Build();
+  ICING_ASSERT_OK(document_store->Put(document2, 3));
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace2", "email/3")
+                                .SetSchema("email")
+                                .AddStringProperty("subject", "aa")
+                                .AddStringProperty("body", "")
+                                .SetCreationTimestampMs(1)
+                                .Build();
+  ICING_ASSERT_OK(document_store->Put(document3, 1));
+
+  DocumentProto document4 = DocumentBuilder()
+                                .SetKey("namespace1", "person/1")
+                                .SetSchema("person")
+                                .AddStringProperty("name", "test test")
+                                .SetCreationTimestampMs(1)
+                                .Build();
+  ICING_ASSERT_OK(document_store->Put(document4, 2));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out1,
+                             document_store->GetDebugInfo(/*verbosity=*/1));
+  EXPECT_THAT(out1.crc(), Gt(0));
+  EXPECT_THAT(out1.document_storage_info().num_alive_documents(), Eq(4));
+  EXPECT_THAT(out1.document_storage_info().num_deleted_documents(), Eq(0));
+  EXPECT_THAT(out1.document_storage_info().num_expired_documents(), Eq(0));
+
+  DocumentDebugInfoProto::CorpusInfo info1, info2, info3;
+  info1.set_namespace_("namespace1");
+  info1.set_schema("email");
+  info1.set_total_documents(1);  // document1
+  info1.set_total_token(5);
+
+  info2.set_namespace_("namespace2");
+  info2.set_schema("email");
+  info2.set_total_documents(2);  // document2 and document3
+  info2.set_total_token(4);      // 3 + 1
+
+  info3.set_namespace_("namespace1");
+  info3.set_schema("person");
+  info3.set_total_documents(1);  // document4
+  info3.set_total_token(2);
+
+  EXPECT_THAT(out1.corpus_info(),
+              UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2),
+                                   EqualsProto(info3)));
+
+  // Delete document3.
+  ICING_ASSERT_OK(document_store->Delete("namespace2", "email/3"));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out2,
+                             document_store->GetDebugInfo(/*verbosity=*/1));
+  EXPECT_THAT(out2.crc(), Gt(0));
+  EXPECT_THAT(out2.crc(), Not(Eq(out1.crc())));
+  EXPECT_THAT(out2.document_storage_info().num_alive_documents(), Eq(3));
+  EXPECT_THAT(out2.document_storage_info().num_deleted_documents(), Eq(1));
+  EXPECT_THAT(out2.document_storage_info().num_expired_documents(), Eq(0));
+  info2.set_total_documents(1);  // document2
+  info2.set_total_token(3);
+  EXPECT_THAT(out2.corpus_info(),
+              UnorderedElementsAre(EqualsProto(info1), EqualsProto(info2),
+                                   EqualsProto(info3)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out3,
+                             document_store->GetDebugInfo(/*verbosity=*/0));
+  EXPECT_THAT(out3.corpus_info(), IsEmpty());
+}
+
+TEST_F(DocumentStoreTest, GetDebugInfoWithoutSchema) {
+  std::string schema_store_dir = schema_store_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out,
+                             document_store->GetDebugInfo(/*verbosity=*/1));
+  EXPECT_THAT(out.crc(), Gt(0));
+  EXPECT_THAT(out.document_storage_info().num_alive_documents(), Eq(0));
+  EXPECT_THAT(out.document_storage_info().num_deleted_documents(), Eq(0));
+  EXPECT_THAT(out.document_storage_info().num_expired_documents(), Eq(0));
+  EXPECT_THAT(out.corpus_info(), IsEmpty());
+}
+
+TEST_F(DocumentStoreTest, GetDebugInfoForEmptyDocumentStore) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> document_store =
+      std::move(create_result.document_store);
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentDebugInfoProto out,
+                             document_store->GetDebugInfo(/*verbosity=*/1));
+  EXPECT_THAT(out.crc(), Gt(0));
+  EXPECT_THAT(out.document_storage_info().num_alive_documents(), Eq(0));
+  EXPECT_THAT(out.document_storage_info().num_deleted_documents(), Eq(0));
+  EXPECT_THAT(out.document_storage_info().num_expired_documents(), Eq(0));
+  EXPECT_THAT(out.corpus_info(), IsEmpty());
+}
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/store/namespace-checker-impl.h b/icing/store/namespace-checker-impl.h
new file mode 100644
index 0000000..bcd0643
--- /dev/null
+++ b/icing/store/namespace-checker-impl.h
@@ -0,0 +1,51 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_NAMESPACE_CHECKER_IMPL_H_
+#define ICING_STORE_NAMESPACE_CHECKER_IMPL_H_
+
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-checker.h"
+#include "icing/store/namespace-id.h"
+
+namespace icing {
+namespace lib {
+
+class NamespaceCheckerImpl : public NamespaceChecker {
+ public:
+  explicit NamespaceCheckerImpl(
+      const DocumentStore* document_store,
+      std::unordered_set<NamespaceId> target_namespace_ids)
+      : document_store_(*document_store),
+        target_namespace_ids_(std::move(target_namespace_ids)) {}
+
+  bool BelongsToTargetNamespaces(DocumentId document_id) const override {
+    if (target_namespace_ids_.empty()) {
+      return true;
+    }
+    auto document_filter_data_or_ =
+        document_store_.GetDocumentFilterData(document_id);
+    return document_filter_data_or_.ok() &&
+        target_namespace_ids_.count(
+            document_filter_data_or_.ValueOrDie().namespace_id())> 0;
+  }
+  const DocumentStore& document_store_;
+  std::unordered_set<NamespaceId> target_namespace_ids_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_NAMESPACE_CHECKER_IMPL_H_
+\ No newline at end of file
diff --git a/icing/store/namespace-checker.h b/icing/store/namespace-checker.h
new file mode 100644
index 0000000..8812ab1
--- /dev/null
+++ b/icing/store/namespace-checker.h
@@ -0,0 +1,42 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_NAMESPACE_CHECKER_H_
+#define ICING_STORE_NAMESPACE_CHECKER_H_
+
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+class NamespaceChecker {
+ public:
+  virtual ~NamespaceChecker() = default;
+
+  // Check whether the given document id is belongs to the target namespaces.
+  // Returns:
+  //   On success,
+  //     - true:  the given document id belongs to the target namespaces
+  //     - false: the given document id doesn't belong to the target namespaces
+  //   OUT_OF_RANGE if document_id is negative or exceeds previously seen
+  //                DocumentIds
+  //   NOT_FOUND if the document or the filter data is not found
+  //   INTERNAL_ERROR on all other errors
+  virtual bool BelongsToTargetNamespaces(DocumentId document_id) const = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_NAMESPACE_CHECKER_H_
diff --git a/icing/testing/always-true-namespace-checker-impl.h b/icing/testing/always-true-namespace-checker-impl.h
new file mode 100644
index 0000000..f7744b6
--- /dev/null
+++ b/icing/testing/always-true-namespace-checker-impl.h
@@ -0,0 +1,34 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_ALWAYS_TRUE_NAMESPACE_CHECKER_IMPL_H_
+#define ICING_TESTING_ALWAYS_TRUE_NAMESPACE_CHECKER_IMPL_H_
+
+#include "icing/store/document-id.h"
+#include "icing/store/namespace-checker.h"
+
+namespace icing {
+namespace lib {
+
+class AlwaysTrueNamespaceCheckerImpl : public NamespaceChecker {
+ public:
+  bool BelongsToTargetNamespaces(DocumentId document_id) const override {
+    return true;
+  }
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_ALWAYS_TRUE_NAMESPACE_CHECKER_IMPL_H_
+\ No newline at end of file
diff --git a/icing/helpers/icu/icu-data-file-helper.cc b/icing/testing/icu-data-file-helper.cc
index 6607c40..aaeb738 100644
--- a/icing/helpers/icu/icu-data-file-helper.cc
+++ b/icing/testing/icu-data-file-helper.cc
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/testing/icu-data-file-helper.h"
 
 #include <sys/mman.h>
 
diff --git a/icing/helpers/icu/icu-data-file-helper.h b/icing/testing/icu-data-file-helper.h
index 90f5bc7..d0276e7 100644
--- a/icing/helpers/icu/icu-data-file-helper.h
+++ b/icing/testing/icu-data-file-helper.h
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ICING_HELPERS_ICU_ICU_DATA_FILE_HELPER
-#define ICING_HELPERS_ICU_ICU_DATA_FILE_HELPER
+#ifndef ICING_TESTING_ICU_DATA_FILE_HELPER
+#define ICING_TESTING_ICU_DATA_FILE_HELPER
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 
@@ -40,4 +40,4 @@ libtextclassifier3::Status SetUpICUDataFile(
 }  // namespace lib
 }  // namespace icing
 
-#endif  // ICING_HELPERS_ICU_ICU_DATA_FILE_HELPER
+#endif  // ICING_TESTING_ICU_DATA_FILE_HELPER
diff --git a/icing/testing/random-string.h b/icing/testing/random-string.h
index 3165bf6..fd8d87b 100644
--- a/icing/testing/random-string.h
+++ b/icing/testing/random-string.h
@@ -15,6 +15,7 @@
 #ifndef ICING_TESTING_RANDOM_STRING_H_
 #define ICING_TESTING_RANDOM_STRING_H_
 
+#include <algorithm>
 #include <random>
 #include <string>
 
diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc
index 598ede7..8e0f789 100644
--- a/icing/tokenization/icu/icu-language-segmenter.cc
+++ b/icing/tokenization/icu/icu-language-segmenter.cc
@@ -59,34 +59,35 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
 
   ~IcuLanguageSegmenterIterator() {
     ubrk_close(break_iterator_);
-    utext_close(&u_text_);
+    utext_close(u_text_);
   }
 
   // Advances to the next term. Returns false if it has reached the end.
   bool Advance() override {
-    // Prerequisite check
-    if (term_end_index_exclusive_ == UBRK_DONE) {
-      return false;
-    }
+    while (true) {
+      // Prerequisite check
+      if (term_end_index_exclusive_ == UBRK_DONE) {
+        return false;
+      }
 
-    if (term_end_index_exclusive_ == 0) {
-      // First Advance() call
-      term_start_index_ = ubrk_first(break_iterator_);
-    } else {
-      term_start_index_ = term_end_index_exclusive_;
-    }
-    term_end_index_exclusive_ = ubrk_next(break_iterator_);
+      if (term_end_index_exclusive_ == 0) {
+        // First Advance() call
+        term_start_index_ = ubrk_first(break_iterator_);
+      } else {
+        term_start_index_ = term_end_index_exclusive_;
+      }
+      term_end_index_exclusive_ = ubrk_next(break_iterator_);
 
-    // Reached the end
-    if (term_end_index_exclusive_ == UBRK_DONE) {
-      MarkAsDone();
-      return false;
-    }
+      // Reached the end
+      if (term_end_index_exclusive_ == UBRK_DONE) {
+        MarkAsDone();
+        return false;
+      }
 
-    if (!IsValidSegment()) {
-      return Advance();
+      if (IsValidSegment()) {
+        return true;
+      }
     }
-    return true;
   }
 
   // Returns the current term. It can be called only when Advance() returns
@@ -253,7 +254,7 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
       : break_iterator_(nullptr),
         text_(text),
         locale_(locale),
-        u_text_(UTEXT_INITIALIZER),
+        u_text_(nullptr),
         offset_iterator_(text),
         term_start_index_(0),
         term_end_index_exclusive_(0) {}
@@ -261,10 +262,13 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   // Returns true on success
   bool Initialize() {
     UErrorCode status = U_ZERO_ERROR;
-    utext_openUTF8(&u_text_, text_.data(), text_.length(), &status);
+    u_text_ = utext_openUTF8(nullptr, text_.data(), text_.length(), &status);
+    if (u_text_ == nullptr) {
+      return false;
+    }
     break_iterator_ = ubrk_open(UBRK_WORD, locale_.data(), /*text=*/nullptr,
                                 /*textLength=*/0, &status);
-    ubrk_setUText(break_iterator_, &u_text_, &status);
+    ubrk_setUText(break_iterator_, u_text_, &status);
     return !U_FAILURE(status);
   }
 
@@ -322,8 +326,8 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   std::string_view locale_;
 
   // A thin wrapper around the input UTF8 text, needed by break_iterator_.
-  // utext_close() must be called after using.
-  UText u_text_;
+  // Allocated by calling utext_openUtf8() and freed by calling utext_close().
+  UText* u_text_;
 
   // Offset iterator. This iterator is not guaranteed to point to any particular
   // character, but is guaranteed to point to a valid UTF character sequence.
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index 3090087..fe0b96e 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -21,8 +21,8 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/icu-i18n-test-utils.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
diff --git a/icing/tokenization/language-segmenter-iterator_test.cc b/icing/tokenization/language-segmenter-iterator_test.cc
index d293581..3aff45c 100644
--- a/icing/tokenization/language-segmenter-iterator_test.cc
+++ b/icing/tokenization/language-segmenter-iterator_test.cc
@@ -15,9 +15,9 @@
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include "icing/absl_ports/str_cat.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
diff --git a/icing/tokenization/language-segmenter_benchmark.cc b/icing/tokenization/language-segmenter_benchmark.cc
index bd86169..6f7d4df 100644
--- a/icing/tokenization/language-segmenter_benchmark.cc
+++ b/icing/tokenization/language-segmenter_benchmark.cc
@@ -14,8 +14,8 @@
 
 #include "testing/base/public/benchmark.h"
 #include "gmock/gmock.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/language-segmenter.h"
diff --git a/icing/tokenization/plain-tokenizer.cc b/icing/tokenization/plain-tokenizer.cc
index 13fe550..7a1949f 100644
--- a/icing/tokenization/plain-tokenizer.cc
+++ b/icing/tokenization/plain-tokenizer.cc
@@ -66,9 +66,9 @@ class PlainTokenIterator : public Tokenizer::Iterator {
 
   Token GetToken() const override {
     if (current_term_.empty()) {
-      return Token(Token::INVALID);
+      return Token(Token::Type::INVALID);
     }
-    return Token(Token::REGULAR, current_term_);
+    return Token(Token::Type::REGULAR, current_term_);
   }
 
   libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenStart()
@@ -81,8 +81,8 @@ class PlainTokenIterator : public Tokenizer::Iterator {
     return base_iterator_->CalculateTermEndExclusive();
   }
 
-  bool ResetToTokenAfter(int32_t offset) override {
-    if (!base_iterator_->ResetToTermStartingAfterUtf32(offset).ok()) {
+  bool ResetToTokenStartingAfter(int32_t utf32_offset) override {
+    if (!base_iterator_->ResetToTermStartingAfterUtf32(utf32_offset).ok()) {
       return false;
     }
     current_term_ = base_iterator_->GetTerm();
@@ -93,15 +93,17 @@ class PlainTokenIterator : public Tokenizer::Iterator {
     return true;
   }
 
-  bool ResetToTokenBefore(int32_t offset) override {
+  bool ResetToTokenEndingBefore(int32_t utf32_offset) override {
     ICING_ASSIGN_OR_RETURN(
-        offset, base_iterator_->ResetToTermEndingBeforeUtf32(offset), false);
+        utf32_offset,
+        base_iterator_->ResetToTermEndingBeforeUtf32(utf32_offset), false);
     current_term_ = base_iterator_->GetTerm();
     while (!IsValidTerm(current_term_)) {
       // Haven't found a valid term yet. Retrieve the term prior to this one
       // from the segmenter.
       ICING_ASSIGN_OR_RETURN(
-          offset, base_iterator_->ResetToTermEndingBeforeUtf32(offset), false);
+          utf32_offset,
+          base_iterator_->ResetToTermEndingBeforeUtf32(utf32_offset), false);
       current_term_ = base_iterator_->GetTerm();
     }
     return true;
diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc
index 7490bfa..c48b51e 100644
--- a/icing/tokenization/plain-tokenizer_test.cc
+++ b/icing/tokenization/plain-tokenizer_test.cc
@@ -18,9 +18,9 @@
 
 #include "gmock/gmock.h"
 #include "icing/absl_ports/str_cat.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/icu-i18n-test-utils.h"
 #include "icing/testing/jni-test-helpers.h"
 #include "icing/testing/test-data.h"
@@ -68,26 +68,27 @@ TEST_F(PlainTokenizerTest, Simple) {
 
   EXPECT_THAT(plain_tokenizer->TokenizeAll(""), IsOkAndHolds(IsEmpty()));
 
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("Hello World"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("Hello World"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"))));
 
   EXPECT_THAT(
       plain_tokenizer->TokenizeAll(
           "Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
           "Duis efficitur iaculis auctor."),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Lorem"),
-                               EqualsToken(Token::REGULAR, "ipsum"),
-                               EqualsToken(Token::REGULAR, "dolor"),
-                               EqualsToken(Token::REGULAR, "sit"),
-                               EqualsToken(Token::REGULAR, "amet"),
-                               EqualsToken(Token::REGULAR, "consectetur"),
-                               EqualsToken(Token::REGULAR, "adipiscing"),
-                               EqualsToken(Token::REGULAR, "elit"),
-                               EqualsToken(Token::REGULAR, "Duis"),
-                               EqualsToken(Token::REGULAR, "efficitur"),
-                               EqualsToken(Token::REGULAR, "iaculis"),
-                               EqualsToken(Token::REGULAR, "auctor"))));
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Lorem"),
+                               EqualsToken(Token::Type::REGULAR, "ipsum"),
+                               EqualsToken(Token::Type::REGULAR, "dolor"),
+                               EqualsToken(Token::Type::REGULAR, "sit"),
+                               EqualsToken(Token::Type::REGULAR, "amet"),
+                               EqualsToken(Token::Type::REGULAR, "consectetur"),
+                               EqualsToken(Token::Type::REGULAR, "adipiscing"),
+                               EqualsToken(Token::Type::REGULAR, "elit"),
+                               EqualsToken(Token::Type::REGULAR, "Duis"),
+                               EqualsToken(Token::Type::REGULAR, "efficitur"),
+                               EqualsToken(Token::Type::REGULAR, "iaculis"),
+                               EqualsToken(Token::Type::REGULAR, "auctor"))));
 }
 
 TEST_F(PlainTokenizerTest, Whitespace) {
@@ -107,16 +108,18 @@ TEST_F(PlainTokenizerTest, Whitespace) {
   // 0x0009 is horizontal tab, considered as a whitespace
   std::string text_with_horizontal_tab =
       absl_ports::StrCat("Hello", UCharToString(0x0009), "World");
-  EXPECT_THAT(plain_tokenizer->TokenizeAll(text_with_horizontal_tab),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(text_with_horizontal_tab),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"))));
 
   // 0x000B is vertical tab, considered as a whitespace
   std::string text_with_vertical_tab =
       absl_ports::StrCat("Hello", UCharToString(0x000B), "World");
-  EXPECT_THAT(plain_tokenizer->TokenizeAll(text_with_vertical_tab),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(text_with_vertical_tab),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"))));
 }
 
 TEST_F(PlainTokenizerTest, Punctuation) {
@@ -131,38 +134,39 @@ TEST_F(PlainTokenizerTest, Punctuation) {
                                  language_segmenter.get()));
 
   // Half-width punctuation marks are filtered out.
-  EXPECT_THAT(plain_tokenizer->TokenizeAll(
-                  "Hello, World! Hello: World. \"Hello\" World?"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"),
-                                       EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"),
-                                       EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(
+          "Hello, World! Hello: World. \"Hello\" World?"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"),
+                               EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"),
+                               EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"))));
 
   // Full-width punctuation marks are filtered out.
   std::vector<std::string_view> exp_tokens;
   if (IsCfStringTokenization()) {
     EXPECT_THAT(
         plain_tokenizer->TokenizeAll("你好，世界！你好：世界。“你好”世界？"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "你"),
-                                 EqualsToken(Token::REGULAR, "好"),
-                                 EqualsToken(Token::REGULAR, "世界"),
-                                 EqualsToken(Token::REGULAR, "你"),
-                                 EqualsToken(Token::REGULAR, "好"),
-                                 EqualsToken(Token::REGULAR, "世界"),
-                                 EqualsToken(Token::REGULAR, "你"),
-                                 EqualsToken(Token::REGULAR, "好"),
-                                 EqualsToken(Token::REGULAR, "世界"))));
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "你"),
+                                 EqualsToken(Token::Type::REGULAR, "好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"),
+                                 EqualsToken(Token::Type::REGULAR, "你"),
+                                 EqualsToken(Token::Type::REGULAR, "好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"),
+                                 EqualsToken(Token::Type::REGULAR, "你"),
+                                 EqualsToken(Token::Type::REGULAR, "好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"))));
   } else {
     EXPECT_THAT(
         plain_tokenizer->TokenizeAll("你好，世界！你好：世界。“你好”世界？"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "你好"),
-                                 EqualsToken(Token::REGULAR, "世界"),
-                                 EqualsToken(Token::REGULAR, "你好"),
-                                 EqualsToken(Token::REGULAR, "世界"),
-                                 EqualsToken(Token::REGULAR, "你好"),
-                                 EqualsToken(Token::REGULAR, "世界"))));
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "你好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"),
+                                 EqualsToken(Token::Type::REGULAR, "你好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"),
+                                 EqualsToken(Token::Type::REGULAR, "你好"),
+                                 EqualsToken(Token::Type::REGULAR, "世界"))));
   }
 }
 
@@ -180,14 +184,16 @@ TEST_F(PlainTokenizerTest, SpecialCharacters) {
   // Right now we don't have special logic for these characters, just output
   // them as tokens.
 
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("1+1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "1"),
-                                       EqualsToken(Token::REGULAR, "+"),
-                                       EqualsToken(Token::REGULAR, "1"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("1+1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "1"),
+                               EqualsToken(Token::Type::REGULAR, "+"),
+                               EqualsToken(Token::Type::REGULAR, "1"))));
 
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("$50"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "$"),
-                                       EqualsToken(Token::REGULAR, "50"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("$50"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "$"),
+                               EqualsToken(Token::Type::REGULAR, "50"))));
 }
 
 TEST_F(PlainTokenizerTest, CJKT) {
@@ -203,12 +209,13 @@ TEST_F(PlainTokenizerTest, CJKT) {
                              tokenizer_factory::CreateIndexingTokenizer(
                                  StringIndexingConfig::TokenizerType::PLAIN,
                                  language_segmenter.get()));
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("我每天走路去上班。"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "我"),
-                                       EqualsToken(Token::REGULAR, "每天"),
-                                       EqualsToken(Token::REGULAR, "走路"),
-                                       EqualsToken(Token::REGULAR, "去"),
-                                       EqualsToken(Token::REGULAR, "上班"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("我每天走路去上班。"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "我"),
+                               EqualsToken(Token::Type::REGULAR, "每天"),
+                               EqualsToken(Token::Type::REGULAR, "走路"),
+                               EqualsToken(Token::Type::REGULAR, "去"),
+                               EqualsToken(Token::Type::REGULAR, "上班"))));
   // Japanese
   options = language_segmenter_factory::SegmenterOptions(ULOC_JAPANESE,
                                                          jni_cache_.get());
@@ -220,41 +227,44 @@ TEST_F(PlainTokenizerTest, CJKT) {
                                  StringIndexingConfig::TokenizerType::PLAIN,
                                  language_segmenter.get()));
   if (IsCfStringTokenization()) {
-    EXPECT_THAT(plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
-                IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "私"),
-                                         EqualsToken(Token::REGULAR, "は"),
-                                         EqualsToken(Token::REGULAR, "毎日"),
-                                         EqualsToken(Token::REGULAR, "仕事"),
-                                         EqualsToken(Token::REGULAR, "に"),
-                                         EqualsToken(Token::REGULAR, "歩い"),
-                                         EqualsToken(Token::REGULAR, "て"),
-                                         EqualsToken(Token::REGULAR, "い"),
-                                         EqualsToken(Token::REGULAR, "ます"))));
+    EXPECT_THAT(
+        plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "私"),
+                                 EqualsToken(Token::Type::REGULAR, "は"),
+                                 EqualsToken(Token::Type::REGULAR, "毎日"),
+                                 EqualsToken(Token::Type::REGULAR, "仕事"),
+                                 EqualsToken(Token::Type::REGULAR, "に"),
+                                 EqualsToken(Token::Type::REGULAR, "歩い"),
+                                 EqualsToken(Token::Type::REGULAR, "て"),
+                                 EqualsToken(Token::Type::REGULAR, "い"),
+                                 EqualsToken(Token::Type::REGULAR, "ます"))));
   } else {
-    EXPECT_THAT(plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
-                IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "私"),
-                                         EqualsToken(Token::REGULAR, "は"),
-                                         EqualsToken(Token::REGULAR, "毎日"),
-                                         EqualsToken(Token::REGULAR, "仕事"),
-                                         EqualsToken(Token::REGULAR, "に"),
-                                         EqualsToken(Token::REGULAR, "歩"),
-                                         EqualsToken(Token::REGULAR, "い"),
-                                         EqualsToken(Token::REGULAR, "てい"),
-                                         EqualsToken(Token::REGULAR, "ます"))));
+    EXPECT_THAT(
+        plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "私"),
+                                 EqualsToken(Token::Type::REGULAR, "は"),
+                                 EqualsToken(Token::Type::REGULAR, "毎日"),
+                                 EqualsToken(Token::Type::REGULAR, "仕事"),
+                                 EqualsToken(Token::Type::REGULAR, "に"),
+                                 EqualsToken(Token::Type::REGULAR, "歩"),
+                                 EqualsToken(Token::Type::REGULAR, "い"),
+                                 EqualsToken(Token::Type::REGULAR, "てい"),
+                                 EqualsToken(Token::Type::REGULAR, "ます"))));
   }
 
   // Khmer
-  EXPECT_THAT(plain_tokenizer->TokenizeAll("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ញុំ"),
-                                       EqualsToken(Token::REGULAR, "ដើរទៅ"),
-                                       EqualsToken(Token::REGULAR, "ធ្វើការ"),
-                                       EqualsToken(Token::REGULAR, "រាល់ថ្ងៃ"))));
-  // Korean
   EXPECT_THAT(
-      plain_tokenizer->TokenizeAll("나는 매일 출근합니다."),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "나는"),
-                               EqualsToken(Token::REGULAR, "매일"),
-                               EqualsToken(Token::REGULAR, "출근합니다"))));
+      plain_tokenizer->TokenizeAll("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "ញុំ"),
+                               EqualsToken(Token::Type::REGULAR, "ដើរទៅ"),
+                               EqualsToken(Token::Type::REGULAR, "ធ្វើការ"),
+                               EqualsToken(Token::Type::REGULAR, "រាល់ថ្ងៃ"))));
+  // Korean
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("나는 매일 출근합니다."),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::REGULAR, "나는"),
+                  EqualsToken(Token::Type::REGULAR, "매일"),
+                  EqualsToken(Token::Type::REGULAR, "출근합니다"))));
 
   // Thai
   // DIFFERENCE!! Disagreement over how to segment "ทุกวัน" (iOS groups).
@@ -264,23 +274,24 @@ TEST_F(PlainTokenizerTest, CJKT) {
         std::vector<Token> tokens,
         plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"));
 
-    EXPECT_THAT(tokens, ElementsAre(EqualsToken(Token::REGULAR, "ฉัน"),
-                                    EqualsToken(Token::REGULAR, "เดิน"),
-                                    EqualsToken(Token::REGULAR, "ไป"),
-                                    EqualsToken(Token::REGULAR, "ทำงาน"),
-                                    EqualsToken(Token::REGULAR, "ทุกวัน")));
+    EXPECT_THAT(tokens, ElementsAre(EqualsToken(Token::Type::REGULAR, "ฉัน"),
+                                    EqualsToken(Token::Type::REGULAR, "เดิน"),
+                                    EqualsToken(Token::Type::REGULAR, "ไป"),
+                                    EqualsToken(Token::Type::REGULAR, "ทำงาน"),
+                                    EqualsToken(Token::Type::REGULAR, "ทุกวัน")));
   } else {
-    EXPECT_THAT(plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"),
-                IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ฉัน"),
-                                         EqualsToken(Token::REGULAR, "เดิน"),
-                                         EqualsToken(Token::REGULAR, "ไป"),
-                                         EqualsToken(Token::REGULAR, "ทำงาน"),
-                                         EqualsToken(Token::REGULAR, "ทุก"),
-                                         EqualsToken(Token::REGULAR, "วัน"))));
+    EXPECT_THAT(
+        plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"),
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "ฉัน"),
+                                 EqualsToken(Token::Type::REGULAR, "เดิน"),
+                                 EqualsToken(Token::Type::REGULAR, "ไป"),
+                                 EqualsToken(Token::Type::REGULAR, "ทำงาน"),
+                                 EqualsToken(Token::Type::REGULAR, "ทุก"),
+                                 EqualsToken(Token::Type::REGULAR, "วัน"))));
   }
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenAfterSimple) {
+TEST_F(PlainTokenizerTest, ResetToTokenStartingAfterSimple) {
   language_segmenter_factory::SegmenterOptions options(ULOC_US,
                                                        jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -294,13 +305,13 @@ TEST_F(PlainTokenizerTest, ResetToTokenAfterSimple) {
   constexpr std::string_view kText = "f b";
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
 
-  EXPECT_TRUE(iterator->ResetToTokenAfter(0));
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "b"));
+  EXPECT_TRUE(iterator->ResetToTokenStartingAfter(0));
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::Type::REGULAR, "b"));
 
-  EXPECT_FALSE(iterator->ResetToTokenAfter(2));
+  EXPECT_FALSE(iterator->ResetToTokenStartingAfter(2));
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenBeforeSimple) {
+TEST_F(PlainTokenizerTest, ResetToTokenEndingBeforeSimple) {
   language_segmenter_factory::SegmenterOptions options(ULOC_US,
                                                        jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -314,13 +325,13 @@ TEST_F(PlainTokenizerTest, ResetToTokenBeforeSimple) {
   constexpr std::string_view kText = "f b";
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
 
-  EXPECT_TRUE(iterator->ResetToTokenBefore(2));
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "f"));
+  EXPECT_TRUE(iterator->ResetToTokenEndingBefore(2));
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::Type::REGULAR, "f"));
 
-  EXPECT_FALSE(iterator->ResetToTokenBefore(0));
+  EXPECT_FALSE(iterator->ResetToTokenEndingBefore(0));
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenAfter) {
+TEST_F(PlainTokenizerTest, ResetToTokenStartingAfter) {
   language_segmenter_factory::SegmenterOptions options(ULOC_US,
                                                        jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -332,11 +343,12 @@ TEST_F(PlainTokenizerTest, ResetToTokenAfter) {
                                  language_segmenter.get()));
 
   constexpr std::string_view kText = " foo . bar baz.. bat ";
-  EXPECT_THAT(plain_tokenizer->TokenizeAll(kText),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "foo"),
-                                       EqualsToken(Token::REGULAR, "bar"),
-                                       EqualsToken(Token::REGULAR, "baz"),
-                                       EqualsToken(Token::REGULAR, "bat"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(kText),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "foo"),
+                               EqualsToken(Token::Type::REGULAR, "bar"),
+                               EqualsToken(Token::Type::REGULAR, "baz"),
+                               EqualsToken(Token::Type::REGULAR, "bat"))));
   std::vector<std::string> expected_text = {
       "foo",  //  0: " foo . bar"
       "bar",  //  1: "foo . bar "
@@ -359,19 +371,19 @@ TEST_F(PlainTokenizerTest, ResetToTokenAfter) {
 
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
   EXPECT_TRUE(iterator->Advance());
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "foo"));
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::Type::REGULAR, "foo"));
   for (int i = 0; i < kText.length(); ++i) {
     if (i < expected_text.size()) {
-      EXPECT_TRUE(iterator->ResetToTokenAfter(i));
+      EXPECT_TRUE(iterator->ResetToTokenStartingAfter(i));
       EXPECT_THAT(iterator->GetToken(),
-                  EqualsToken(Token::REGULAR, expected_text[i]));
+                  EqualsToken(Token::Type::REGULAR, expected_text[i]));
     } else {
-      EXPECT_FALSE(iterator->ResetToTokenAfter(i));
+      EXPECT_FALSE(iterator->ResetToTokenStartingAfter(i));
     }
   }
 }
 
-TEST_F(PlainTokenizerTest, ResetToTokenBefore) {
+TEST_F(PlainTokenizerTest, ResetToTokenEndingBefore) {
   language_segmenter_factory::SegmenterOptions options(ULOC_US,
                                                        jni_cache_.get());
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -383,11 +395,12 @@ TEST_F(PlainTokenizerTest, ResetToTokenBefore) {
                                  language_segmenter.get()));
 
   constexpr std::string_view kText = " foo . bar baz.. bat ";
-  EXPECT_THAT(plain_tokenizer->TokenizeAll(kText),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "foo"),
-                                       EqualsToken(Token::REGULAR, "bar"),
-                                       EqualsToken(Token::REGULAR, "baz"),
-                                       EqualsToken(Token::REGULAR, "bat"))));
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(kText),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "foo"),
+                               EqualsToken(Token::Type::REGULAR, "bar"),
+                               EqualsToken(Token::Type::REGULAR, "baz"),
+                               EqualsToken(Token::Type::REGULAR, "bat"))));
   std::vector<std::string> expected_text = {
       "bat",  // 20: "baz.. bat "
       "baz",  // 19: " baz.. bat"
@@ -410,15 +423,16 @@ TEST_F(PlainTokenizerTest, ResetToTokenBefore) {
 
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
   EXPECT_TRUE(iterator->Advance());
-  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "foo"));
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::Type::REGULAR, "foo"));
   for (int i = kText.length() - 1; i >= 0; --i) {
     int expected_index = kText.length() - 1 - i;
     if (expected_index < expected_text.size()) {
-      EXPECT_TRUE(iterator->ResetToTokenBefore(i));
-      EXPECT_THAT(iterator->GetToken(),
-                  EqualsToken(Token::REGULAR, expected_text[expected_index]));
+      EXPECT_TRUE(iterator->ResetToTokenEndingBefore(i));
+      EXPECT_THAT(
+          iterator->GetToken(),
+          EqualsToken(Token::Type::REGULAR, expected_text[expected_index]));
     } else {
-      EXPECT_FALSE(iterator->ResetToTokenBefore(i));
+      EXPECT_FALSE(iterator->ResetToTokenEndingBefore(i));
     }
   }
 }
diff --git a/icing/tokenization/raw-query-tokenizer.cc b/icing/tokenization/raw-query-tokenizer.cc
index 2d461ee..8a27103 100644
--- a/icing/tokenization/raw-query-tokenizer.cc
+++ b/icing/tokenization/raw-query-tokenizer.cc
@@ -422,7 +422,7 @@ std::pair<TermType, std::string_view> GetTerm(std::string_view text,
 // and [(cat OR)]. This helps assert extra rule 3: "OR" is ignored if there's no
 // valid token on its right.
 void RemoveLastTokenIfOrOperator(std::vector<Token>* tokens) {
-  if (!tokens->empty() && tokens->back().type == Token::QUERY_OR) {
+  if (!tokens->empty() && tokens->back().type == Token::Type::QUERY_OR) {
     tokens->pop_back();
   }
 }
@@ -436,11 +436,11 @@ libtextclassifier3::Status OutputOrOperatorToken(std::vector<Token>* tokens) {
   }
   Token::Type last_token_type = tokens->back().type;
   switch (last_token_type) {
-    case Token::REGULAR:
-    case Token::QUERY_RIGHT_PARENTHESES:
-      tokens->emplace_back(Token::QUERY_OR);
+    case Token::Type::REGULAR:
+    case Token::Type::QUERY_RIGHT_PARENTHESES:
+      tokens->emplace_back(Token::Type::QUERY_OR);
       break;
-    case Token::QUERY_OR:
+    case Token::Type::QUERY_OR:
       // Ignores "OR" because there's already an "OR", e.g. "term1 OR OR term2"
       break;
     default:
@@ -481,21 +481,21 @@ libtextclassifier3::Status OutputToken(State new_state,
                 GetErrorMessage(ERROR_NON_ASCII_AS_PROPERTY_NAME));
           }
         }
-        tokens->emplace_back(Token::QUERY_PROPERTY, current_term);
+        tokens->emplace_back(Token::Type::QUERY_PROPERTY, current_term);
       } else {
-        tokens->emplace_back(Token::REGULAR, current_term);
+        tokens->emplace_back(Token::Type::REGULAR, current_term);
       }
       break;
     case LEFT_PARENTHESES:
-      tokens->emplace_back(Token::QUERY_LEFT_PARENTHESES);
+      tokens->emplace_back(Token::Type::QUERY_LEFT_PARENTHESES);
       break;
     case RIGHT_PARENTHESES:
       // Ignores "OR" if it's followed by right parentheses.
       RemoveLastTokenIfOrOperator(tokens);
-      tokens->emplace_back(Token::QUERY_RIGHT_PARENTHESES);
+      tokens->emplace_back(Token::Type::QUERY_RIGHT_PARENTHESES);
       break;
     case EXCLUSION_OPERATOR:
-      tokens->emplace_back(Token::QUERY_EXCLUSION);
+      tokens->emplace_back(Token::Type::QUERY_EXCLUSION);
       break;
     case OR_OPERATOR:
       return OutputOrOperatorToken(tokens);
@@ -648,7 +648,7 @@ class RawQueryTokenIterator : public Tokenizer::Iterator {
 
   Token GetToken() const override {
     if (current_ < 0 || current_ >= tokens_.size()) {
-      return Token(Token::INVALID);
+      return Token(Token::Type::INVALID);
     }
     return tokens_.at(current_);
   }
diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
index 500efa0..c6d981d 100644
--- a/icing/tokenization/raw-query-tokenizer_test.cc
+++ b/icing/tokenization/raw-query-tokenizer_test.cc
@@ -16,9 +16,9 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/platform.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/tokenization/language-segmenter-factory.h"
 #include "icing/tokenization/tokenizer-factory.h"
@@ -59,13 +59,15 @@ TEST_F(RawQueryTokenizerTest, Simple) {
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("Hello World!"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
-                                       EqualsToken(Token::REGULAR, "World"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("Hello World!"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "Hello"),
+                               EqualsToken(Token::Type::REGULAR, "World"))));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("hElLo WORLD"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "hElLo"),
-                                       EqualsToken(Token::REGULAR, "WORLD"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("hElLo WORLD"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "hElLo"),
+                               EqualsToken(Token::Type::REGULAR, "WORLD"))));
 }
 
 TEST_F(RawQueryTokenizerTest, Parentheses) {
@@ -80,82 +82,82 @@ TEST_F(RawQueryTokenizerTest, Parentheses) {
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("()"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( )"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("((term1 term2) (term3 term4))"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term3"),
-                  EqualsToken(Token::REGULAR, "term4"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term3"),
+                  EqualsToken(Token::Type::REGULAR, "term4"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1(term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("(term1)term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                               EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)(term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
-
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("(term1)-term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                               EqualsToken(Token::QUERY_EXCLUSION, ""),
-                               EqualsToken(Token::REGULAR, "term2"))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)-term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("(term1)OR term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                               EqualsToken(Token::QUERY_OR, ""),
-                               EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)OR term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)OR(term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_OR, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1):term2"),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
@@ -180,44 +182,49 @@ TEST_F(RawQueryTokenizerTest, Exclustion) {
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                       EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("-term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                               EqualsToken(Token::Type::REGULAR, "term1"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(-term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_EXCLUSION, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Exclusion operator is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("- term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("- term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // Exclusion operator is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1- term2"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
-                                       EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1- term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::REGULAR, "term2"))));
 
   // Exclusion operator is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 -)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // First exclusion operator is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("--term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                       EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("--term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                               EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // First "-" is exclusion operator, second is not and will be discarded.
   // In other words, exclusion only applies to the term right after it.
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1-term2"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                       EqualsToken(Token::REGULAR, "term1"),
-                                       EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("-term1-term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                               EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-(term1)"),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
@@ -249,73 +256,75 @@ TEST_F(RawQueryTokenizerTest, PropertyRestriction) {
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(property1:term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Colon is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll(":term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll(":term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // Colon is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(:term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Colon is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1:"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1:"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // property name can be a path
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("email.title:hello"),
-              IsOkAndHolds(
-                  ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "email.title"),
-                              EqualsToken(Token::REGULAR, "hello"))));
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "email.title"),
+                  EqualsToken(Token::Type::REGULAR, "hello"))));
 
   // The first colon ":" triggers property restriction, the second colon is used
   // as a word connector per ICU's rule
   // (https://unicode.org/reports/tr29/#Word_Boundaries).
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property:foo:bar"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property"),
-                               EqualsToken(Token::REGULAR, "foo:bar"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property:foo:bar"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property"),
+                  EqualsToken(Token::Type::REGULAR, "foo:bar"))));
 
   // Property restriction only applies to the term right after it.
   // Note: "term1:term2" is not a term but 2 terms because word connectors
   // don't apply to numbers and alphabets.
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:term1:term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1:term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:今天:天气"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "今天"),
-                               EqualsToken(Token::REGULAR, "天气"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:今天:天气"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "今天"),
+                  EqualsToken(Token::Type::REGULAR, "天气"))));
 
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:term1-"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1-"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // Multiple continuous colons will still be recognized as a property
   // restriction operator
-  EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1::term1"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1::term1"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"))));
 
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("property1:(term1)"),
@@ -345,105 +354,109 @@ TEST_F(RawQueryTokenizerTest, OR) {
       tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
                                               language_segmenter.get()));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR term2"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
-                                       EqualsToken(Token::QUERY_OR, ""),
-                                       EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1 OR term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::QUERY_OR, ""),
+                               EqualsToken(Token::Type::REGULAR, "term2"))));
 
   // Two continuous "OR"s are treated as one
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR OR term2"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
-                                       EqualsToken(Token::QUERY_OR, ""),
-                                       EqualsToken(Token::REGULAR, "term2"))));
-
   EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("(term1) OR term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                               EqualsToken(Token::QUERY_OR, ""),
-                               EqualsToken(Token::REGULAR, "term2"))));
+      raw_query_tokenizer->TokenizeAll("term1 OR OR term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::QUERY_OR, ""),
+                               EqualsToken(Token::Type::REGULAR, "term2"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1) OR term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR (term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_OR, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("((term1) OR (term2))"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_OR, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Only "OR" (all in uppercase) is the operator
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("term1 or term2 Or term3 oR term4"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::REGULAR, "or"),
-                               EqualsToken(Token::REGULAR, "term2"),
-                               EqualsToken(Token::REGULAR, "Or"),
-                               EqualsToken(Token::REGULAR, "term3"),
-                               EqualsToken(Token::REGULAR, "oR"),
-                               EqualsToken(Token::REGULAR, "term4"))));
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::REGULAR, "or"),
+                               EqualsToken(Token::Type::REGULAR, "term2"),
+                               EqualsToken(Token::Type::REGULAR, "Or"),
+                               EqualsToken(Token::Type::REGULAR, "term3"),
+                               EqualsToken(Token::Type::REGULAR, "oR"),
+                               EqualsToken(Token::Type::REGULAR, "term4"))));
 
   // "OR" is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("OR term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("OR term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // "OR" is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1 OR"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(OR term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( OR term1)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 OR)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 OR )"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // "OR" is ignored
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( OR )"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR(term2)"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_OR, ""),
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term2"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_OR, ""),
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term2"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("term1 OR-term2"),
@@ -472,31 +485,31 @@ TEST_F(RawQueryTokenizerTest, CJKT) {
   if (IsCfStringTokenization()) {
     EXPECT_THAT(
         raw_query_tokenizer->TokenizeAll("-今天天气很好"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                 EqualsToken(Token::REGULAR, "今天"),
-                                 EqualsToken(Token::REGULAR, "天气"),
-                                 EqualsToken(Token::REGULAR, "很"),
-                                 EqualsToken(Token::REGULAR, "好"))));
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                                 EqualsToken(Token::Type::REGULAR, "今天"),
+                                 EqualsToken(Token::Type::REGULAR, "天气"),
+                                 EqualsToken(Token::Type::REGULAR, "很"),
+                                 EqualsToken(Token::Type::REGULAR, "好"))));
   } else {
     EXPECT_THAT(
         raw_query_tokenizer->TokenizeAll("-今天天气很好"),
-        IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                 EqualsToken(Token::REGULAR, "今天"),
-                                 EqualsToken(Token::REGULAR, "天气"),
-                                 EqualsToken(Token::REGULAR, "很好"))));
+        IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                                 EqualsToken(Token::Type::REGULAR, "今天"),
+                                 EqualsToken(Token::Type::REGULAR, "天气"),
+                                 EqualsToken(Token::Type::REGULAR, "很好"))));
   }
 
   if (IsCfStringTokenization()) {
     EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:你好"),
-                IsOkAndHolds(
-                    ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                                EqualsToken(Token::REGULAR, "你"),
-                                EqualsToken(Token::REGULAR, "好"))));
+                IsOkAndHolds(ElementsAre(
+                    EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                    EqualsToken(Token::Type::REGULAR, "你"),
+                    EqualsToken(Token::Type::REGULAR, "好"))));
   } else {
     EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:你好"),
-                IsOkAndHolds(
-                    ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                                EqualsToken(Token::REGULAR, "你好"))));
+                IsOkAndHolds(ElementsAre(
+                    EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                    EqualsToken(Token::Type::REGULAR, "你好"))));
   }
 
   EXPECT_THAT(
@@ -504,10 +517,11 @@ TEST_F(RawQueryTokenizerTest, CJKT) {
       StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
                HasSubstr("Characters in property name must all be ASCII")));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("cat OR ねこ"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "cat"),
-                                       EqualsToken(Token::QUERY_OR, ""),
-                                       EqualsToken(Token::REGULAR, "ねこ"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("cat OR ねこ"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "cat"),
+                               EqualsToken(Token::Type::QUERY_OR, ""),
+                               EqualsToken(Token::Type::REGULAR, "ねこ"))));
 
   EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("cat ORねこ"),
@@ -543,40 +557,45 @@ TEST_F(RawQueryTokenizerTest, OtherChars) {
                                               language_segmenter.get()));
 
   // Comma is ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll(",term1, ,"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll(",term1, ,"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(,term1),"),
               IsOkAndHolds(ElementsAre(
-                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                  EqualsToken(Token::REGULAR, "term1"),
-                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                  EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
 
   // Exclusion operator and comma are ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-,term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("-,term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"))));
 
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1,"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
-                                       EqualsToken(Token::REGULAR, "term1"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("-term1,"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                               EqualsToken(Token::Type::REGULAR, "term1"))));
 
   // Colon and comma are ignored
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:,term1"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "property1"),
-                                       EqualsToken(Token::REGULAR, "term1"))));
-
   EXPECT_THAT(
-      raw_query_tokenizer->TokenizeAll("property1:term1,term2"),
-      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
-                               EqualsToken(Token::REGULAR, "term1"),
-                               EqualsToken(Token::REGULAR, "term2"))));
+      raw_query_tokenizer->TokenizeAll("property1:,term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "property1"),
+                               EqualsToken(Token::Type::REGULAR, "term1"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:term1,term2"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::Type::REGULAR, "term1"),
+                  EqualsToken(Token::Type::REGULAR, "term2"))));
 
   // This is a special case for OR, unknown chars are treated the same as
   // whitespaces before and after OR.
-  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1,OR,term2"),
-              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
-                                       EqualsToken(Token::QUERY_OR, ""),
-                                       EqualsToken(Token::REGULAR, "term2"))));
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1,OR,term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::REGULAR, "term1"),
+                               EqualsToken(Token::Type::QUERY_OR, ""),
+                               EqualsToken(Token::Type::REGULAR, "term2"))));
 }
 
 TEST_F(RawQueryTokenizerTest, Mix) {
@@ -593,37 +612,38 @@ TEST_F(RawQueryTokenizerTest, Mix) {
     EXPECT_THAT(raw_query_tokenizer->TokenizeAll(
                     "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"),
                 IsOkAndHolds(ElementsAre(
-                    EqualsToken(Token::REGULAR, "こんにちは"),
-                    EqualsToken(Token::REGULAR, "good"),
-                    EqualsToken(Token::REGULAR, "afternoon"),
-                    EqualsToken(Token::QUERY_PROPERTY, "title"),
-                    EqualsToken(Token::REGULAR, "今天"),
-                    EqualsToken(Token::QUERY_OR, ""),
-                    EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                    EqualsToken(Token::REGULAR, "ใน"),
-                    EqualsToken(Token::REGULAR, "วันนี้"),
-                    EqualsToken(Token::QUERY_EXCLUSION, ""),
-                    EqualsToken(Token::REGULAR, "B12"),
-                    EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+                    EqualsToken(Token::Type::REGULAR, "こんにちは"),
+                    EqualsToken(Token::Type::REGULAR, "good"),
+                    EqualsToken(Token::Type::REGULAR, "afternoon"),
+                    EqualsToken(Token::Type::QUERY_PROPERTY, "title"),
+                    EqualsToken(Token::Type::REGULAR, "今天"),
+                    EqualsToken(Token::Type::QUERY_OR, ""),
+                    EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                    EqualsToken(Token::Type::REGULAR, "ใน"),
+                    EqualsToken(Token::Type::REGULAR, "วันนี้"),
+                    EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                    EqualsToken(Token::Type::REGULAR, "B12"),
+                    EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, ""))));
   } else {
     ICING_ASSERT_OK_AND_ASSIGN(
         std::vector<Token> tokens,
         raw_query_tokenizer->TokenizeAll(
             "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"));
-    EXPECT_THAT(tokens,
-                ElementsAre(EqualsToken(Token::REGULAR, "こんにちは"),
-                            EqualsToken(Token::REGULAR, "good"),
-                            EqualsToken(Token::REGULAR, "afternoon"),
-                            EqualsToken(Token::QUERY_PROPERTY, "title"),
-                            EqualsToken(Token::REGULAR, "今天"),
-                            EqualsToken(Token::QUERY_OR, ""),
-                            EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
-                            EqualsToken(Token::REGULAR, "ใน"),
-                            EqualsToken(Token::REGULAR, "วัน"),
-                            EqualsToken(Token::REGULAR, "นี้"),
-                            EqualsToken(Token::QUERY_EXCLUSION, ""),
-                            EqualsToken(Token::REGULAR, "B12"),
-                            EqualsToken(Token::QUERY_RIGHT_PARENTHESES, "")));
+    EXPECT_THAT(
+        tokens,
+        ElementsAre(EqualsToken(Token::Type::REGULAR, "こんにちは"),
+                    EqualsToken(Token::Type::REGULAR, "good"),
+                    EqualsToken(Token::Type::REGULAR, "afternoon"),
+                    EqualsToken(Token::Type::QUERY_PROPERTY, "title"),
+                    EqualsToken(Token::Type::REGULAR, "今天"),
+                    EqualsToken(Token::Type::QUERY_OR, ""),
+                    EqualsToken(Token::Type::QUERY_LEFT_PARENTHESES, ""),
+                    EqualsToken(Token::Type::REGULAR, "ใน"),
+                    EqualsToken(Token::Type::REGULAR, "วัน"),
+                    EqualsToken(Token::Type::REGULAR, "นี้"),
+                    EqualsToken(Token::Type::QUERY_EXCLUSION, ""),
+                    EqualsToken(Token::Type::REGULAR, "B12"),
+                    EqualsToken(Token::Type::QUERY_RIGHT_PARENTHESES, "")));
   }
 }
 
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
index b936f2b..cb474c6 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter.cc
@@ -43,45 +43,46 @@ class ReverseJniLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
 
   // Advances to the next term. Returns false if it has reached the end.
   bool Advance() override {
-    // Prerequisite check
-    if (IsDone()) {
-      return false;
-    }
+    while (true) {
+      // Prerequisite check
+      if (IsDone()) {
+        return false;
+      }
 
-    if (term_end_exclusive_.utf16_index() == 0) {
-      int first = break_iterator_->First();
-      if (!term_start_.MoveToUtf16(first)) {
-        // First is guaranteed to succeed and return a position within bonds. So
-        // the only possible failure could be an invalid sequence. Mark as DONE
-        // and return.
+      if (term_end_exclusive_.utf16_index() == 0) {
+        int first = break_iterator_->First();
+        if (!term_start_.MoveToUtf16(first)) {
+          // First is guaranteed to succeed and return a position within bonds.
+          // So the only possible failure could be an invalid sequence. Mark as
+          // DONE and return.
+          MarkAsDone();
+          return false;
+        }
+      } else {
+        term_start_ = term_end_exclusive_;
+      }
+
+      int next_utf16_index_exclusive = break_iterator_->Next();
+      // Reached the end
+      if (next_utf16_index_exclusive == ReverseJniBreakIterator::kDone) {
+        MarkAsDone();
+        return false;
+      }
+      if (!term_end_exclusive_.MoveToUtf16(next_utf16_index_exclusive)) {
+        // next_utf16_index_exclusive is guaranteed to be within bonds thanks to
+        // the check for kDone above. So the only possible failure could be an
+        // invalid sequence. Mark as DONE and return.
         MarkAsDone();
         return false;
       }
-    } else {
-      term_start_ = term_end_exclusive_;
-    }
-
-    int next_utf16_index_exclusive = break_iterator_->Next();
-    // Reached the end
-    if (next_utf16_index_exclusive == ReverseJniBreakIterator::kDone) {
-      MarkAsDone();
-      return false;
-    }
-    if (!term_end_exclusive_.MoveToUtf16(next_utf16_index_exclusive)) {
-      // next_utf16_index_exclusive is guaranteed to be within bonds thanks to
-      // the check for kDone above. So the only possible failure could be an
-      // invalid sequence. Mark as DONE and return.
-      MarkAsDone();
-      return false;
-    }
 
-    // Check if the current term is valid. We consider any term valid if its
-    // first character is valid. If it's not valid, then we need to advance to
-    // the next term.
-    if (IsValidTerm()) {
-      return true;
+      // Check if the current term is valid. We consider any term valid if its
+      // first character is valid. If it's not valid, then we need to advance to
+      // the next term.
+      if (IsValidTerm()) {
+        return true;
+      }
     }
-    return Advance();
   }
 
   // Returns the current term. It can be called only when Advance() returns
diff --git a/icing/tokenization/token.h b/icing/tokenization/token.h
index dda9efc..0c268be 100644
--- a/icing/tokenization/token.h
+++ b/icing/tokenization/token.h
@@ -21,11 +21,14 @@ namespace icing {
 namespace lib {
 
 struct Token {
-  enum Type {
+  enum class Type {
     // Common types
     REGULAR,  // A token without special meanings, the value of it will be
               // indexed or searched directly
 
+    VERBATIM,  // A token that should be indexed and searched without any
+               // modifications to the raw text
+
     // Types only used in raw query
     QUERY_OR,         // Indicates OR logic between its left and right tokens
     QUERY_EXCLUSION,  // Indicates exclusion operation on next token
diff --git a/icing/tokenization/tokenizer-factory.cc b/icing/tokenization/tokenizer-factory.cc
index 9b59acf..b2508f7 100644
--- a/icing/tokenization/tokenizer-factory.cc
+++ b/icing/tokenization/tokenizer-factory.cc
@@ -23,6 +23,7 @@
 #include "icing/tokenization/plain-tokenizer.h"
 #include "icing/tokenization/raw-query-tokenizer.h"
 #include "icing/tokenization/tokenizer.h"
+#include "icing/tokenization/verbatim-tokenizer.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
@@ -38,6 +39,8 @@ CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,
   switch (type) {
     case StringIndexingConfig::TokenizerType::PLAIN:
       return std::make_unique<PlainTokenizer>(lang_segmenter);
+    case StringIndexingConfig::TokenizerType::VERBATIM:
+      return std::make_unique<VerbatimTokenizer>();
     case StringIndexingConfig::TokenizerType::NONE:
       [[fallthrough]];
     default:
diff --git a/icing/tokenization/tokenizer.h b/icing/tokenization/tokenizer.h
index b4f0c6e..2bc18cc 100644
--- a/icing/tokenization/tokenizer.h
+++ b/icing/tokenization/tokenizer.h
@@ -43,6 +43,7 @@ class Tokenizer {
   enum Type {
     // Index tokenizers
     PLAIN,  // Used to tokenize plain text input
+    VERBATIM,  // Used to tokenize the input text in verbatim
 
     // Query tokenizers
     RAW_QUERY,  // Used to tokenize raw queries
@@ -83,22 +84,26 @@ class Tokenizer {
     // offset.
     // Ex.
     // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
-    // iterator.ResetToTokenAfter(4);
+    // iterator.ResetToTokenStartingAfter(4);
     // // The first full token starting after position 4 (the 'b' in "bar") is
     // // "baz".
     // PrintToken(iterator.GetToken());  // prints "baz"
-    virtual bool ResetToTokenAfter(int32_t offset) { return false; }
+    virtual bool ResetToTokenStartingAfter(int32_t utf32_offset) {
+      return false;
+    }
 
     // Sets the tokenizer to point at the first token that *ends* *before*
     // offset. Returns false if there are no valid tokens ending
     // before offset.
     // Ex.
     // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
-    // iterator.ResetToTokenBefore(4);
+    // iterator.ResetToTokenEndingBefore(4);
     // // The first full token ending before position 4 (the 'b' in "bar") is
     // // "foo".
     // PrintToken(iterator.GetToken());  // prints "foo"
-    virtual bool ResetToTokenBefore(int32_t offset) { return false; }
+    virtual bool ResetToTokenEndingBefore(int32_t utf32_offset) {
+      return false;
+    }
 
     virtual bool ResetToStart() { return false; }
   };
diff --git a/icing/tokenization/verbatim-tokenizer.cc b/icing/tokenization/verbatim-tokenizer.cc
new file mode 100644
index 0000000..0d3a320
--- /dev/null
+++ b/icing/tokenization/verbatim-tokenizer.cc
@@ -0,0 +1,139 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/verbatim-tokenizer.h"
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/util/character-iterator.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+class VerbatimTokenIterator : public Tokenizer::Iterator {
+ public:
+  explicit VerbatimTokenIterator(std::string_view text)
+      : term_(std::move(text)) {}
+
+  bool Advance() override {
+    if (term_.empty() || has_advanced_to_end_) {
+      return false;
+    }
+
+    has_advanced_to_end_ = true;
+    return true;
+  }
+
+  Token GetToken() const override {
+    if (term_.empty() || !has_advanced_to_end_) {
+      return Token(Token::Type::INVALID);
+    }
+
+    return Token(Token::Type::VERBATIM, term_);
+  }
+
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenStart()
+      override {
+    if (term_.empty()) {
+      return absl_ports::AbortedError(
+          "Could not calculate start of empty token.");
+    }
+
+    return CharacterIterator(term_, 0, 0, 0);
+  }
+
+  libtextclassifier3::StatusOr<CharacterIterator> CalculateTokenEndExclusive()
+      override {
+    if (term_.empty()) {
+      return absl_ports::AbortedError(
+          "Could not calculate end of empty token.");
+    }
+
+    if (token_end_iterator_.utf8_index() >= 0) {
+      return token_end_iterator_;
+    }
+
+    bool moved_to_token_end = token_end_iterator_.MoveToUtf8(term_.length());
+    if (moved_to_token_end) {
+      return token_end_iterator_;
+    } else {
+      return absl_ports::AbortedError("Could not move to end of token.");
+    }
+  }
+
+  bool ResetToTokenStartingAfter(int32_t utf32_offset) override {
+    // We can only reset to the sole verbatim token, so we must have a negative
+    // offset for it to be considered the token after.
+    if (utf32_offset < 0) {
+      // Because we are now at the sole verbatim token, we should ensure we can
+      // no longer advance past it.
+      has_advanced_to_end_ = true;
+      return true;
+    }
+    return false;
+  }
+
+  bool ResetToTokenEndingBefore(int32_t utf32_offset) override {
+    // We can only reset to the sole verbatim token, so we must have an offset
+    // after the end of the token for the reset to be valid. This means the
+    // provided utf-32 offset must be equal to or greater than the utf-32 length
+    // of the token.
+    if (token_end_iterator_.utf8_index() < 0) {
+      // Moves one index past the end of the term.
+      bool moved_to_token_end = token_end_iterator_.MoveToUtf8(term_.length());
+      if (!moved_to_token_end) {
+        // We're unable to reset as we failed to move to the end of the term.
+        return false;
+      }
+    }
+
+    if (utf32_offset >= token_end_iterator_.utf32_index()) {
+      // Because we are now at the sole verbatim token, we should ensure we can
+      // no longer advance past it.
+      has_advanced_to_end_ = true;
+      return true;
+    }
+    return false;
+  }
+
+  bool ResetToStart() override {
+    has_advanced_to_end_ = true;
+    return true;
+  }
+
+ private:
+  std::string_view term_;
+  CharacterIterator token_end_iterator_ = CharacterIterator(term_, -1, -1, -1);
+  // Used to determine whether we have advanced on the sole verbatim token
+  bool has_advanced_to_end_ = false;
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>>
+VerbatimTokenizer::Tokenize(std::string_view text) const {
+  return std::make_unique<VerbatimTokenIterator>(text);
+}
+
+libtextclassifier3::StatusOr<std::vector<Token>> VerbatimTokenizer::TokenizeAll(
+    std::string_view text) const {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
+                         Tokenize(text));
+  std::vector<Token> tokens;
+  while (iterator->Advance()) {
+    tokens.push_back(iterator->GetToken());
+  }
+  return tokens;
+}
+
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/tokenization/verbatim-tokenizer.h b/icing/tokenization/verbatim-tokenizer.h
new file mode 100644
index 0000000..8404cf1
--- /dev/null
+++ b/icing/tokenization/verbatim-tokenizer.h
@@ -0,0 +1,41 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOKENIZATION_VERBATIM_H_
+#define ICING_TOKENIZATION_VERBATIM_H_
+
+#include <memory>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/tokenization/tokenizer.h"
+
+namespace icing {
+namespace lib {
+
+// Provides verbatim tokenization on input text
+class VerbatimTokenizer : public Tokenizer {
+ public:
+  libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize(
+      std::string_view text) const override;
+
+  libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
+      std::string_view text) const override;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TOKENIZATION_VERBATIM_H_
diff --git a/icing/tokenization/verbatim-tokenizer_test.cc b/icing/tokenization/verbatim-tokenizer_test.cc
new file mode 100644
index 0000000..e38c7aa
--- /dev/null
+++ b/icing/tokenization/verbatim-tokenizer_test.cc
@@ -0,0 +1,209 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "icing/portable/platform.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
+#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/util/character-iterator.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+namespace {
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+class VerbatimTokenizerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      ICING_ASSERT_OK(
+          // File generated via icu_data_file rule in //icing/BUILD.
+          icu_data_file_helper::SetUpICUDataFile(
+              GetTestFilePath("icing/icu.dat")));
+    }
+
+    jni_cache_ = GetTestJniCache();
+    language_segmenter_factory::SegmenterOptions options(ULOC_US,
+                                                         jni_cache_.get());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        language_segmenter_,
+        language_segmenter_factory::Create(std::move(options)));
+  }
+
+  std::unique_ptr<const JniCache> jni_cache_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+};
+
+TEST_F(VerbatimTokenizerTest, Empty) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  EXPECT_THAT(verbatim_tokenizer->TokenizeAll(""), IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(VerbatimTokenizerTest, Simple) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  EXPECT_THAT(
+      verbatim_tokenizer->TokenizeAll("foo bar"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::Type::VERBATIM, "foo bar"))));
+}
+
+TEST_F(VerbatimTokenizerTest, Punctuation) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  EXPECT_THAT(verbatim_tokenizer->TokenizeAll("Hello, world!"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::Type::VERBATIM, "Hello, world!"))));
+}
+
+TEST_F(VerbatimTokenizerTest, InvalidTokenBeforeAdvancing) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  // We should get an invalid token if we get the token before advancing.
+  EXPECT_THAT(token_iterator->GetToken(),
+              EqualsToken(Token::Type::INVALID, ""));
+}
+
+TEST_F(VerbatimTokenizerTest, ResetToTokenEndingBefore) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  // Reset to beginning of verbatim of token. We provide an offset of 13 as it
+  // is larger than the final index (12) of the verbatim token.
+  EXPECT_TRUE(token_iterator->ResetToTokenEndingBefore(13));
+  EXPECT_THAT(token_iterator->GetToken(),
+              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
+
+  // Ensure our cached character iterator propertly maintains the end of the
+  // verbatim token.
+  EXPECT_TRUE(token_iterator->ResetToTokenEndingBefore(13));
+  EXPECT_THAT(token_iterator->GetToken(),
+              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
+
+  // We should not be able to reset with an offset before or within
+  // the verbatim token's utf-32 length.
+  EXPECT_FALSE(token_iterator->ResetToTokenEndingBefore(0));
+  EXPECT_FALSE(token_iterator->ResetToTokenEndingBefore(12));
+}
+
+TEST_F(VerbatimTokenizerTest, ResetToTokenStartingAfter) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  // Get token without resetting
+  EXPECT_TRUE(token_iterator->Advance());
+  EXPECT_THAT(token_iterator->GetToken(),
+              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
+
+  // We expect a sole verbatim token, so it's not possible to reset after the
+  // start of the token.
+  EXPECT_FALSE(token_iterator->ResetToTokenStartingAfter(1));
+
+  // We expect to be reset to the sole verbatim token when the offset is
+  // negative.
+  EXPECT_TRUE(token_iterator->ResetToTokenStartingAfter(-1));
+  EXPECT_THAT(token_iterator->GetToken(),
+              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
+}
+
+TEST_F(VerbatimTokenizerTest, ResetToStart) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  // Get token without resetting
+  EXPECT_TRUE(token_iterator->Advance());
+  EXPECT_THAT(token_iterator->GetToken(),
+              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
+
+  // Retrieve token again after resetting to start
+  EXPECT_TRUE(token_iterator->ResetToStart());
+  EXPECT_THAT(token_iterator->GetToken(),
+              EqualsToken(Token::Type::VERBATIM, "Hello, world!"));
+}
+
+TEST_F(VerbatimTokenizerTest, CalculateTokenStart) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  ICING_ASSERT_OK_AND_ASSIGN(CharacterIterator start_character_iterator,
+                             token_iterator->CalculateTokenStart());
+
+  // We should retrieve the character 'H', the first character of the token.
+  EXPECT_THAT(start_character_iterator.GetCurrentChar(), Eq('H'));
+}
+
+TEST_F(VerbatimTokenizerTest, CalculateTokenEnd) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> verbatim_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::VERBATIM,
+                                 language_segmenter_.get()));
+
+  constexpr std::string_view kText = "Hello, world!";
+  auto token_iterator = verbatim_tokenizer->Tokenize(kText).ValueOrDie();
+
+  ICING_ASSERT_OK_AND_ASSIGN(CharacterIterator end_character_iterator,
+                             token_iterator->CalculateTokenEndExclusive());
+
+  // We should retrieve the the null character, as the returned character
+  // iterator will be set one past the end of the token.
+  EXPECT_THAT(end_character_iterator.GetCurrentChar(), Eq('\0'));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/transform/icu/icu-normalizer_benchmark.cc b/icing/transform/icu/icu-normalizer_benchmark.cc
index 8d09be2..fdd4c70 100644
--- a/icing/transform/icu/icu-normalizer_benchmark.cc
+++ b/icing/transform/icu/icu-normalizer_benchmark.cc
@@ -14,8 +14,8 @@
 
 #include "testing/base/public/benchmark.h"
 #include "gmock/gmock.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/test-data.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
diff --git a/icing/transform/icu/icu-normalizer_test.cc b/icing/transform/icu/icu-normalizer_test.cc
index a46fcc7..143da17 100644
--- a/icing/transform/icu/icu-normalizer_test.cc
+++ b/icing/transform/icu/icu-normalizer_test.cc
@@ -16,8 +16,8 @@
 
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/testing/common-matchers.h"
+#include "icing/testing/icu-data-file-helper.h"
 #include "icing/testing/icu-i18n-test-utils.h"
 #include "icing/testing/test-data.h"
 #include "icing/transform/normalizer-factory.h"
diff --git a/icing/util/character-iterator.cc b/icing/util/character-iterator.cc
index d483031..0ab1e50 100644
--- a/icing/util/character-iterator.cc
+++ b/icing/util/character-iterator.cc
@@ -49,6 +49,8 @@ bool CharacterIterator::MoveToUtf8(int desired_utf8_index) {
 }
 
 bool CharacterIterator::AdvanceToUtf8(int desired_utf8_index) {
+  ResetToStartIfNecessary();
+
   if (desired_utf8_index > text_.length()) {
     // Enforce the requirement.
     return false;
@@ -120,6 +122,8 @@ bool CharacterIterator::MoveToUtf16(int desired_utf16_index) {
 }
 
 bool CharacterIterator::AdvanceToUtf16(int desired_utf16_index) {
+  ResetToStartIfNecessary();
+
   UChar32 uchar32 = cached_current_char_;
   while (utf16_index_ < desired_utf16_index) {
     uchar32 =
@@ -190,6 +194,8 @@ bool CharacterIterator::MoveToUtf32(int desired_utf32_index) {
 }
 
 bool CharacterIterator::AdvanceToUtf32(int desired_utf32_index) {
+  ResetToStartIfNecessary();
+
   UChar32 uchar32 = cached_current_char_;
   while (utf32_index_ < desired_utf32_index) {
     uchar32 =
@@ -249,5 +255,15 @@ bool CharacterIterator::RewindToUtf32(int desired_utf32_index) {
   return true;
 }
 
+void CharacterIterator::ResetToStartIfNecessary() {
+  if (utf8_index_ < 0 || utf16_index_ < 0 || utf32_index_ < 0) {
+    utf8_index_ = 0;
+    utf16_index_ = 0;
+    utf32_index_ = 0;
+    cached_current_char_ =
+        i18n_utils::GetUChar32At(text_.data(), text_.length(), 0);
+  }
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/util/character-iterator.h b/icing/util/character-iterator.h
index c7569a7..893718a 100644
--- a/icing/util/character-iterator.h
+++ b/icing/util/character-iterator.h
@@ -99,6 +99,10 @@ class CharacterIterator {
   }
 
  private:
+  // Resets the character iterator to the start of the text if any of the
+  // indices are negative.
+  void ResetToStartIfNecessary();
+
   std::string_view text_;
   UChar32 cached_current_char_;
   int utf8_index_;
diff --git a/icing/util/character-iterator_test.cc b/icing/util/character-iterator_test.cc
index 445f837..195a47b 100644
--- a/icing/util/character-iterator_test.cc
+++ b/icing/util/character-iterator_test.cc
@@ -231,5 +231,36 @@ TEST(CharacterIteratorTest, InvalidUtf) {
   EXPECT_THAT(iterator, Eq(exp_iterator));
 }
 
+TEST(CharacterIteratorTest, MoveToUtfNegativeIndex) {
+  constexpr std::string_view kText = "¿Dónde está la biblioteca?";
+
+  CharacterIterator iterator_utf8(kText, /*utf8_index=*/-1, /*utf16_index=*/0,
+                             /*utf32_index=*/0);
+  // We should be able to successfully move when the index is negative.
+  EXPECT_THAT(iterator_utf8.MoveToUtf8(0), IsTrue());
+  // The character cache should be reset and contain the first character when
+  // resetting to index 0.
+  EXPECT_THAT(UCharToString(iterator_utf8.GetCurrentChar()), Eq("¿"));
+  EXPECT_THAT(iterator_utf8.utf8_index(), Eq(0));
+  EXPECT_THAT(iterator_utf8.utf16_index(), Eq(0));
+  EXPECT_THAT(iterator_utf8.utf32_index(), Eq(0));
+
+  CharacterIterator iterator_utf16(kText, /*utf8_index=*/0, /*utf16_index=*/-1,
+                             /*utf32_index=*/0);
+  EXPECT_THAT(iterator_utf16.MoveToUtf16(1), IsTrue());
+  EXPECT_THAT(iterator_utf16.GetCurrentChar(), Eq('D'));
+  EXPECT_THAT(iterator_utf16.utf8_index(), Eq(2));
+  EXPECT_THAT(iterator_utf16.utf16_index(), Eq(1));
+  EXPECT_THAT(iterator_utf16.utf32_index(), Eq(1));
+
+  CharacterIterator iterator_utf32(kText, /*utf8_index=*/0, /*utf16_index=*/0,
+                             /*utf32_index=*/-1);
+  EXPECT_THAT(iterator_utf32.MoveToUtf32(2), IsTrue());
+  EXPECT_THAT(UCharToString(iterator_utf32.GetCurrentChar()), Eq("ó"));
+  EXPECT_THAT(iterator_utf32.utf8_index(), Eq(3));
+  EXPECT_THAT(iterator_utf32.utf16_index(), Eq(2));
+  EXPECT_THAT(iterator_utf32.utf32_index(), Eq(2));
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index cb013d7..2261c37 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc
@@ -46,15 +46,15 @@ constexpr char kPropertyEmails[] = "emails";
 constexpr char kDefaultNamespace[] = "icing";
 constexpr char kDefaultString[] = "This is a string.";
 
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
-    PropertyConfigProto_Cardinality_Code_OPTIONAL;
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
-    PropertyConfigProto_Cardinality_Code_REQUIRED;
-constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
-    PropertyConfigProto_Cardinality_Code_REPEATED;
-
-constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
-    PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_OPTIONAL =
+    PropertyConfigProto::Cardinality::OPTIONAL;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REQUIRED =
+    PropertyConfigProto::Cardinality::REQUIRED;
+constexpr PropertyConfigProto::Cardinality::Code CARDINALITY_REPEATED =
+    PropertyConfigProto::Cardinality::REPEATED;
+
+constexpr PropertyConfigProto::DataType::Code TYPE_STRING =
+    PropertyConfigProto::DataType::STRING;
 
 class DocumentValidatorTest : public ::testing::Test {
  protected:
diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index cb28331..a46814c 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
@@ -53,7 +53,9 @@ import com.google.android.icing.proto.StringIndexingConfig;
 import com.google.android.icing.proto.StringIndexingConfig.TokenizerType;
 import com.google.android.icing.proto.SuggestionResponse;
 import com.google.android.icing.proto.SuggestionSpecProto;
+import com.google.android.icing.proto.SuggestionSpecProto.SuggestionScoringSpecProto;
 import com.google.android.icing.proto.TermMatchType;
+import com.google.android.icing.proto.TermMatchType.Code;
 import com.google.android.icing.proto.UsageReport;
 import com.google.android.icing.IcingSearchEngine;
 import java.io.File;
@@ -650,7 +652,14 @@ public final class IcingSearchEngineTest {
     assertStatusOk(icingSearchEngine.put(emailDocument2).getStatus());
 
     SuggestionSpecProto suggestionSpec =
-        SuggestionSpecProto.newBuilder().setPrefix("f").setNumToReturn(10).build();
+        SuggestionSpecProto.newBuilder()
+            .setPrefix("f")
+            .setNumToReturn(10)
+            .setScoringSpec(
+                SuggestionScoringSpecProto.newBuilder()
+                    .setScoringMatchType(Code.EXACT_ONLY)
+                    .build())
+            .build();
 
     SuggestionResponse response = icingSearchEngine.searchSuggestions(suggestionSpec);
     assertStatusOk(response.getStatus());
diff --git a/proto/icing/proto/debug.proto b/proto/icing/proto/debug.proto
new file mode 100644
index 0000000..504ae43
--- /dev/null
+++ b/proto/icing/proto/debug.proto
@@ -0,0 +1,127 @@
+// Copyright 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+import "icing/proto/schema.proto";
+import "icing/proto/status.proto";
+import "icing/proto/storage.proto";
+
+option java_package = "com.google.android.icing.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// Next tag: 4
+message IndexDebugInfoProto {
+  // Storage information of the index.
+  optional IndexStorageInfoProto index_storage_info = 1;
+
+  message MainIndexDebugInfoProto {
+    // Information about the main lexicon.
+    // TODO(b/222349894) Convert the string output to a protocol buffer instead.
+    optional string lexicon_info = 1;
+
+    // Last added document id.
+    optional uint32 last_added_document_id = 2;
+
+    // If verbosity > 0, return information about the posting list storage.
+    // TODO(b/222349894) Convert the string output to a protocol buffer instead.
+    optional string flash_index_storage_info = 3;
+  }
+  optional MainIndexDebugInfoProto main_index_info = 2;
+
+  message LiteIndexDebugInfoProto {
+    // Current number of hits.
+    optional uint32 curr_size = 1;
+
+    // The maximum possible number of hits.
+    optional uint32 hit_buffer_size = 2;
+
+    // Last added document id.
+    optional uint32 last_added_document_id = 3;
+
+    // The first position in the hit buffer that is not sorted yet,
+    // or curr_size if all hits are sorted.
+    optional uint32 searchable_end = 4;
+
+    // The most recent checksum of the lite index, by calling
+    // LiteIndex::ComputeChecksum().
+    optional uint32 index_crc = 5;
+
+    // Information about the lite lexicon.
+    // TODO(b/222349894) Convert the string output to a protocol buffer instead.
+    optional string lexicon_info = 6;
+  }
+  optional LiteIndexDebugInfoProto lite_index_info = 3;
+}
+
+// Next tag: 4
+message DocumentDebugInfoProto {
+  // Storage information of the document store.
+  optional DocumentStorageInfoProto document_storage_info = 1;
+
+  // The most recent checksum of the document store, by calling
+  // DocumentStore::ComputeChecksum().
+  optional uint32 crc = 2;
+
+  message CorpusInfo {
+    optional string namespace = 1;
+    optional string schema = 2;
+    optional uint32 total_documents = 3;
+    optional uint32 total_token = 4;
+  }
+
+  // If verbosity > 0, return the total number of documents and tokens in each
+  // (namespace, schema type) pair.
+  // Note that deleted and expired documents are skipped in the output.
+  repeated CorpusInfo corpus_info = 3;
+}
+
+// Next tag: 3
+message SchemaDebugInfoProto {
+  // Copy of the SchemaProto if it has been set in the schema store.
+  // Modifying this does not affect the Schema that IcingSearchEngine holds.
+  optional SchemaProto schema = 1;
+
+  // The most recent checksum of the schema store, by calling
+  // SchemaStore::ComputeChecksum().
+  optional uint32 crc = 2;
+}
+
+// Next tag: 4
+message DebugInfoProto {
+  // Debug information of the index.
+  optional IndexDebugInfoProto index_info = 1;
+
+  // Debug information of the document store.
+  optional DocumentDebugInfoProto document_info = 2;
+
+  // Debug information of the schema store.
+  optional SchemaDebugInfoProto schema_info = 3;
+}
+
+// Next tag: 3
+message DebugInfoResultProto {
+  // Status code can be one of:
+  //   OK
+  //   FAILED_PRECONDITION
+  //
+  // See status.proto for more details.
+  optional StatusProto status = 1;
+
+  // Debug information for Icing.
+  optional DebugInfoProto debug_info = 2;
+}
diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto
index 2e8321b..1a501e7 100644
--- a/proto/icing/proto/document.proto
+++ b/proto/icing/proto/document.proto
@@ -209,7 +209,7 @@ message DeleteBySchemaTypeResultProto {
 }
 
 // Result of a call to IcingSearchEngine.DeleteByQuery
-// Next tag: 4
+// Next tag: 5
 message DeleteByQueryResultProto {
   // Status code can be one of:
   //   OK
@@ -226,5 +226,18 @@ message DeleteByQueryResultProto {
   // Stats for delete execution performance.
   optional DeleteByQueryStatsProto delete_by_query_stats = 3;
 
+  // Used by DeleteByQueryResultProto to return information about deleted
+  // documents.
+  message DocumentGroupInfo {
+    optional string namespace = 1;
+    optional string schema = 2;
+    repeated string uris = 3;
+  }
+
+  // Additional return message that shows the uris of the deleted documents, if
+  // users set return_deleted_document_info to true.
+  // The result is grouped by the corresponding namespace and type.
+  repeated DocumentGroupInfo deleted_documents = 4;
+
   reserved 2;
 }
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index ab2556d..7fe1e6f 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -30,19 +30,6 @@ message IcingSearchEngineOptions {
   // the index saved by the last instance.
   optional string base_dir = 1;
 
-  // The maximum number of tokens to be allowed per document. If a document
-  // exceeds this number of tokens, then only the first max_tokens_per_doc
-  // will be indexed.
-  //
-  // Clients may use this value to prevent the possibility of a select few
-  // documents from exhausting limits in the index that are shared between all
-  // documents (ie max allowed index size).
-  //
-  // Valid values: [1, INT_MAX], Current default is 1/5 of the default of
-  // max_document_size.
-  // Optional.
-  optional int32 max_tokens_per_doc = 2 [default = 13107];
-
   // The maximum allowable token length. All tokens in excess of this size
   // will be truncated to max_token_length before being indexed.
   //
@@ -70,6 +57,8 @@ message IcingSearchEngineOptions {
   // Valid values: [1, INT_MAX]
   // Optional.
   optional int32 index_merge_size = 4 [default = 1048576];  // 1 MiB
+
+  reserved 2;
 }
 
 // Result of a call to IcingSearchEngine.Initialize
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index 2f1f271..0a7c4a6 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -46,6 +46,9 @@ message InitializeStatsProto {
 
     // Random I/O errors.
     IO_ERROR = 4;
+
+    // The document log is using legacy format.
+    LEGACY_DOCUMENT_LOG_FORMAT = 5;
   }
 
   // Possible recovery causes for document store:
diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index c611cbf..ffb6f2c 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto
@@ -91,6 +91,14 @@ message StringIndexingConfig {
 
       // Tokenization for plain text.
       PLAIN = 1;
+
+      // Tokenizes text in verbatim. This means no normalization or segmentation
+      // is applied to string values that are tokenized using this type.
+      // Therefore, the output token is equivalent to the raw string text. For
+      // example, "Hello, world!" would be tokenized as "Hello, world!"
+      // preserving punctuation and capitalization, and not creating separate
+      // tokens between the space.
+      VERBATIM = 2;
     }
   }
   optional TokenizerType.Code tokenizer_type = 2;
diff --git a/proto/icing/proto/scoring.proto b/proto/icing/proto/scoring.proto
index a3a64df..71c943e 100644
--- a/proto/icing/proto/scoring.proto
+++ b/proto/icing/proto/scoring.proto
@@ -116,8 +116,9 @@ message PropertyWeight {
   // specified, the property weight is discarded.
   optional string path = 1;
 
-  // Property weight, valid values are positive. Zero and negative weights are
-  // invalid and will result in an error. By default, a property is given a raw,
-  // pre-normalized weight of 1.0.
+  // Property weight, valid values are positive and zero. Setting a zero
+  // property weight will remove scoring contribution for a query term match in
+  // the property. Negative weights are invalid and will result in an error.
+  // By default, a property is given a raw, pre-normalized weight of 1.0.
   optional double weight = 2;
 }
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index c712ab2..f005c76 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -85,16 +85,16 @@ message ResultSpecProto {
     // have snippet information provided. If set to 0, snippeting is disabled.
     optional int32 num_matches_per_property = 2;
 
-    // How large of a window to provide. Windows start at max_window_bytes / 2
-    // bytes before the middle of the matching token and end at max_window_bytes
-    // / 2 bytes after the middle of the matching token. Windowing respects
-    // token boundaries.
-    // Therefore, the returned window may be smaller than requested. Setting
-    // max_window_bytes to 0 will disable windowing information. If matches
-    // enabled is also set to false, then snippeting is disabled.
-    // Ex. max_window_bytes = 16. "foo bar baz bat rat" with a query of "baz"
+    // How large of a window to provide. Windows start at
+    // max_window_utf32_length / 2 bytes before the middle of the matching token
+    // and end at max_window_utf32_length / 2 bytes after the middle of the
+    // matching token. Windowing respects token boundaries. Therefore, the
+    // returned window may be smaller than requested. Setting
+    // max_window_utf32_length to 0 will disable windowing information. If
+    // matches enabled is also set to false, then snippeting is disabled. Ex.
+    // max_window_utf32_length = 16. "foo bar baz bat rat" with a query of "baz"
     // will return a window of "bar baz bat" which is only 11 bytes long.
-    optional int32 max_window_bytes = 3;
+    optional int32 max_window_utf32_length = 3;
   }
   optional SnippetSpecProto snippet_spec = 3;
 
@@ -309,7 +309,7 @@ message GetResultSpecProto {
   repeated TypePropertyMask type_property_masks = 1;
 }
 
-// Next tag: 4
+// Next tag: 5
 message SuggestionSpecProto {
   // REQUIRED: The "raw" prefix string that users may type. For example, "f"
   // will search for suggested query that start with "f" like "foo", "fool".
@@ -323,6 +323,23 @@ message SuggestionSpecProto {
 
   // REQUIRED: The number of suggestions to be returned.
   optional int32 num_to_return = 3;
+
+  // Indicates how the suggestion terms should be scored and ranked.
+  message SuggestionScoringSpecProto {
+    // TermMatchType.Code=UNKNOWN
+    // Should never purposely be set and may lead to undefined behavior. This is
+    // used for backwards compatibility reasons.
+    //
+    // TermMatchType.Code=EXACT_ONLY
+    // Only exact hits will be counted to score a suggestion term.
+    //
+    // TermMatchType.Code=PREFIX
+    // Both exact hits and prefix hits will be counted to score a suggestion
+    // term.
+    optional TermMatchType.Code scoring_match_type = 1;
+  }
+
+  optional SuggestionScoringSpecProto scoring_spec = 4;
 }
 
 // Next tag: 3
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 7e0431b..73d349b 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=404879391)
+set(synced_AOSP_CL_number=436284873)
author	Alexander Dorokhine <adorokhine@google.com>	2022-03-22 22:55:15 -0700
committer	Tim Barron <tjbarron@google.com>	2022-03-23 17:04:14 +0000
commit	9ab600c39d0b5c87fc7dc4d8155d1efb535f1608 (patch)
tree	93ed846d985900e348c166b14818348705d46ea9
parent	19600c2c36c5add7e7a792b7e4f742d45b3f871f (diff)
parent	c4f46ed536752b4c07f7696e65ff79c2d5086f3f (diff)
download	icing-9ab600c39d0b5c87fc7dc4d8155d1efb535f1608.tar.gz