Update icing from upstream.

Change-Id: Ia63a77142ec717c0d9a81ec0a5c1267381858200
author: Alexander Dorokhine <adorokhine@google.com> 2020-11-05 15:28:28 -0800
committer: Alexander Dorokhine <adorokhine@google.com> 2020-11-05 15:29:03 -0800
commit: e111bb917a63282b8e11012acb8f97cba882f342 (patch)
tree: 1c59a59fa2c918d9a3f512a0e4b7d4934f6bb07b /icing
parent: 71b8eddc99c6337ff304a2f3cd0588c42239202f (diff)
download: icing-e111bb917a63282b8e11012acb8f97cba882f342.tar.gz
38 files changed, 1853 insertions, 680 deletions
diff --git a/icing/absl_ports/str_join.cc b/icing/absl_ports/str_join.cc
new file mode 100644
index 0000000..2d105ca
--- /dev/null
+++ b/icing/absl_ports/str_join.cc
@@ -0,0 +1,41 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/absl_ports/str_join.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+std::vector<std::string_view> StrSplit(std::string_view text,
+                                       std::string_view sep) {
+  std::vector<std::string_view> substrings;
+  size_t separator_position = text.find(sep);
+  size_t current_start = 0;
+  size_t current_end = separator_position;
+  while (separator_position != std::string_view::npos) {
+    substrings.push_back(
+        text.substr(current_start, current_end - current_start));
+    current_start = current_end + sep.length();
+    separator_position = text.find(sep, current_start);
+    current_end = separator_position;
+  }
+  current_end = text.length();
+  substrings.push_back(text.substr(current_start, current_end - current_start));
+  return substrings;
+}
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
diff --git a/icing/absl_ports/str_join.h b/icing/absl_ports/str_join.h
index 7c8936a..f66a977 100644
--- a/icing/absl_ports/str_join.h
+++ b/icing/absl_ports/str_join.h
@@ -17,6 +17,7 @@
 
 #include <string>
 #include <string_view>
+#include <vector>
 
 #include "icing/absl_ports/str_cat.h"
 
@@ -104,6 +105,9 @@ std::string StrJoin(const Container& container, std::string_view sep) {
   return absl_ports::StrJoin(container, sep, DefaultFormatter());
 }
 
+std::vector<std::string_view> StrSplit(std::string_view text,
+                                       std::string_view sep);
+
 }  // namespace absl_ports
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 95511ac..aa5a031 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -168,11 +168,15 @@ class FileBackedProtoLog {
     // A successfully initialized log.
     std::unique_ptr<FileBackedProtoLog<ProtoT>> proto_log;
 
-    // Whether there was some data loss while initializing from a previous
-    // state. This can happen if the file is corrupted or some previously added
-    // data was unpersisted. This may be used to signal that any derived data
-    // off of the proto log may need to be regenerated.
-    bool data_loss;
+    // The data status after initializing from a previous state. Data loss can
+    // happen if the file is corrupted or some previously added data was
+    // unpersisted. This may be used to signal that any derived data off of the
+    // proto log may need to be regenerated.
+    enum DataStatus { NO_DATA_LOSS, PARTIAL_LOSS, COMPLETE_LOSS } data_status;
+
+    bool has_data_loss() {
+      return data_status == PARTIAL_LOSS || data_status == COMPLETE_LOSS;
+    }
   };
 
   // Factory method to create, initialize, and return a FileBackedProtoLog. Will
@@ -182,10 +186,11 @@ class FileBackedProtoLog {
   // added data was unpersisted, the log will rewind to the last-good state. The
   // log saves these checkpointed "good" states when PersistToDisk() is called
   // or the log is safely destructed. If the log rewinds successfully to the
-  // last-good state, then the returned CreateResult.data_loss indicates
-  // there was some data loss so that any derived data may know that it
-  // needs to be updated. If the log re-initializes successfully without any
-  // data loss, the boolean will be false.
+  // last-good state, then the returned CreateResult.data_status indicates
+  // whether it has a data loss and what kind of data loss it is (partial or
+  // complete) so that any derived data may know that it needs to be updated. If
+  // the log re-initializes successfully without any data loss,
+  // CreateResult.data_status will be NO_DATA_LOSS.
   //
   // Params:
   //   filesystem: Handles system level calls
@@ -511,7 +516,7 @@ FileBackedProtoLog<ProtoT>::InitializeNewFile(const Filesystem* filesystem,
       std::unique_ptr<FileBackedProtoLog<ProtoT>>(
           new FileBackedProtoLog<ProtoT>(filesystem, file_path,
                                          std::move(header))),
-      /*data_loss=*/false};
+      /*data_status=*/CreateResult::NO_DATA_LOSS};
 
   return create_result;
 }
@@ -561,15 +566,14 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
   }
   header->max_proto_size = options.max_proto_size;
 
-  bool data_loss = false;
+  typename CreateResult::DataStatus data_status = CreateResult::NO_DATA_LOSS;
   ICING_ASSIGN_OR_RETURN(Crc32 calculated_log_checksum,
                          ComputeChecksum(filesystem, file_path, Crc32(),
                                          sizeof(Header), file_size));
   // Double check that the log checksum is the same as the one that was
   // persisted last time. If not, we start recovery logic.
   if (header->log_checksum != calculated_log_checksum.Get()) {
-    // Need to rewind the proto log since the checksums don't match
-    data_loss = true;
+    // Need to rewind the proto log since the checksums don't match.
     // Worst case, we have to rewind the entire log back to just the header
     int64_t last_known_good = sizeof(Header);
 
@@ -585,10 +589,12 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
       // Check if it matches our last rewind state. If so, this becomes our last
       // good state and we can safely truncate and recover from here.
       last_known_good = header->rewind_offset;
+      data_status = CreateResult::PARTIAL_LOSS;
     } else {
       // Otherwise, we're going to truncate the entire log and this resets the
       // checksum to an empty log state.
       header->log_checksum = 0;
+      data_status = CreateResult::COMPLETE_LOSS;
     }
 
     if (!filesystem->Truncate(file_path.c_str(), last_known_good)) {
@@ -604,7 +610,7 @@ FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
       std::unique_ptr<FileBackedProtoLog<ProtoT>>(
           new FileBackedProtoLog<ProtoT>(filesystem, file_path,
                                          std::move(header))),
-      data_loss};
+      data_status};
 
   return create_result;
 }
diff --git a/icing/file/file-backed-proto-log_test.cc b/icing/file/file-backed-proto-log_test.cc
index fad5248..7410d2b 100644
--- a/icing/file/file-backed-proto-log_test.cc
+++ b/icing/file/file-backed-proto-log_test.cc
@@ -77,7 +77,7 @@ TEST_F(FileBackedProtoLogTest, Initialize) {
           FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                      max_proto_size_)));
   EXPECT_THAT(create_result.proto_log, NotNull());
-  EXPECT_FALSE(create_result.data_loss);
+  EXPECT_FALSE(create_result.has_data_loss());
 
   // Can't recreate the same file with different options.
   ASSERT_THAT(FileBackedProtoLog<DocumentProto>::Create(
@@ -96,7 +96,7 @@ TEST_F(FileBackedProtoLogTest, WriteProtoTooLarge) {
           FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                      max_proto_size)));
   auto proto_log = std::move(create_result.proto_log);
-  ASSERT_FALSE(create_result.data_loss);
+  ASSERT_FALSE(create_result.has_data_loss());
 
   DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
 
@@ -113,7 +113,7 @@ TEST_F(FileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
           FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                      max_proto_size_)));
   auto proto_log = std::move(create_result.proto_log);
-  ASSERT_FALSE(create_result.data_loss);
+  ASSERT_FALSE(create_result.has_data_loss());
 
   // Write a proto
   DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
@@ -147,7 +147,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteUncompressedProto) {
             FileBackedProtoLog<DocumentProto>::Options(
                 /*compress_in=*/false, max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    ASSERT_FALSE(create_result.data_loss);
+    ASSERT_FALSE(create_result.has_data_loss());
 
     // Write the first proto
     DocumentProto document1 =
@@ -194,7 +194,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteUncompressedProto) {
             FileBackedProtoLog<DocumentProto>::Options(
                 /*compress_in=*/false, max_proto_size_)));
     auto recreated_proto_log = std::move(create_result.proto_log);
-    ASSERT_FALSE(create_result.data_loss);
+    ASSERT_FALSE(create_result.has_data_loss());
 
     // Write a third proto
     DocumentProto document3 =
@@ -216,7 +216,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteCompressedProto) {
             FileBackedProtoLog<DocumentProto>::Options(
                 /*compress_in=*/true, max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    ASSERT_FALSE(create_result.data_loss);
+    ASSERT_FALSE(create_result.has_data_loss());
 
     // Write the first proto
     DocumentProto document1 =
@@ -263,7 +263,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteCompressedProto) {
             FileBackedProtoLog<DocumentProto>::Options(
                 /*compress_in=*/true, max_proto_size_)));
     auto recreated_proto_log = std::move(create_result.proto_log);
-    ASSERT_FALSE(create_result.data_loss);
+    ASSERT_FALSE(create_result.has_data_loss());
 
     // Write a third proto
     DocumentProto document3 =
@@ -283,7 +283,7 @@ TEST_F(FileBackedProtoLogTest, CorruptHeader) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto recreated_proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
+    EXPECT_FALSE(create_result.has_data_loss());
 
     int corrupt_offset =
         offsetof(FileBackedProtoLog<DocumentProto>::Header, rewind_offset);
@@ -312,7 +312,7 @@ TEST_F(FileBackedProtoLogTest, CorruptContent) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
+    EXPECT_FALSE(create_result.has_data_loss());
 
     DocumentProto document =
         DocumentBuilder().SetKey("namespace1", "uri1").Build();
@@ -338,7 +338,7 @@ TEST_F(FileBackedProtoLogTest, CorruptContent) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    ASSERT_TRUE(create_result.data_loss);
+    ASSERT_TRUE(create_result.has_data_loss());
 
     // Lost everything in the log since the rewind position doesn't help if
     // there's been data corruption within the persisted region
@@ -363,7 +363,7 @@ TEST_F(FileBackedProtoLogTest, PersistToDisk) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    ASSERT_FALSE(create_result.data_loss);
+    ASSERT_FALSE(create_result.has_data_loss());
 
     // Write and persist the first proto
     ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
@@ -407,7 +407,7 @@ TEST_F(FileBackedProtoLogTest, PersistToDisk) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    ASSERT_TRUE(create_result.data_loss);
+    ASSERT_TRUE(create_result.has_data_loss());
 
     // Check that everything was persisted across instances
     ASSERT_THAT(proto_log->ReadProto(document1_offset),
@@ -433,7 +433,7 @@ TEST_F(FileBackedProtoLogTest, Iterator) {
           FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                      max_proto_size_)));
   auto proto_log = std::move(create_result.proto_log);
-  ASSERT_FALSE(create_result.data_loss);
+  ASSERT_FALSE(create_result.has_data_loss());
 
   {
     // Empty iterator
@@ -484,7 +484,7 @@ TEST_F(FileBackedProtoLogTest, ComputeChecksum) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    ASSERT_FALSE(create_result.data_loss);
+    ASSERT_FALSE(create_result.has_data_loss());
 
     ICING_EXPECT_OK(proto_log->WriteProto(document));
 
@@ -502,7 +502,7 @@ TEST_F(FileBackedProtoLogTest, ComputeChecksum) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    ASSERT_FALSE(create_result.data_loss);
+    ASSERT_FALSE(create_result.has_data_loss());
 
     // Checksum should be consistent across instances
     EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
@@ -528,7 +528,7 @@ TEST_F(FileBackedProtoLogTest, EraseProtoShouldSetZero) {
           FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                      max_proto_size_)));
   auto proto_log = std::move(create_result.proto_log);
-  ASSERT_FALSE(create_result.data_loss);
+  ASSERT_FALSE(create_result.has_data_loss());
 
   // Writes and erases proto
   ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
@@ -561,7 +561,7 @@ TEST_F(FileBackedProtoLogTest, EraseProtoShouldReturnNotFound) {
           FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                      max_proto_size_)));
   auto proto_log = std::move(create_result.proto_log);
-  ASSERT_FALSE(create_result.data_loss);
+  ASSERT_FALSE(create_result.has_data_loss());
 
   // Writes 2 protos
   ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
@@ -603,7 +603,7 @@ TEST_F(FileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    ASSERT_FALSE(create_result.data_loss);
+    ASSERT_FALSE(create_result.has_data_loss());
 
     // Writes 3 protos
     ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
@@ -631,7 +631,7 @@ TEST_F(FileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    ASSERT_FALSE(create_result.data_loss);
+    ASSERT_FALSE(create_result.has_data_loss());
 
     // Erases the 2nd proto that is now before the rewind position. Checksum is
     // updated.
@@ -651,7 +651,7 @@ TEST_F(FileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    ASSERT_FALSE(create_result.data_loss);
+    ASSERT_FALSE(create_result.has_data_loss());
 
     // Append a new document which is after the rewind position.
     ICING_ASSERT_OK(proto_log->WriteProto(document4));
@@ -673,7 +673,7 @@ TEST_F(FileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
             FileBackedProtoLog<DocumentProto>::Options(compress_,
                                                        max_proto_size_)));
     auto proto_log = std::move(create_result.proto_log);
-    EXPECT_FALSE(create_result.data_loss);
+    EXPECT_FALSE(create_result.has_data_loss());
   }
 }
 
diff --git a/icing/icing-search-engine-with-icu-file_test.cc b/icing/icing-search-engine-with-icu-file_test.cc
index 32ac9e6..1cb8620 100644
--- a/icing/icing-search-engine-with-icu-file_test.cc
+++ b/icing/icing-search-engine-with-icu-file_test.cc
@@ -63,9 +63,10 @@ SchemaProto CreateMessageSchema() {
   body->set_property_name("body");
   body->set_data_type(PropertyConfigProto::DataType::STRING);
   body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  body->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  body->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   return schema;
 }
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index fdec473..08ceafd 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -35,6 +35,7 @@
 #include "icing/legacy/index/icing-filesystem.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/proto/optimize.pb.h"
 #include "icing/proto/persist.pb.h"
 #include "icing/proto/reset.pb.h"
@@ -59,6 +60,7 @@
 #include "icing/util/crc32.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
+#include "icing/util/timer.h"
 #include "unicode/uloc.h"
 
 namespace icing {
@@ -150,6 +152,10 @@ std::string MakeSchemaDirectoryPath(const std::string& base_dir) {
 void TransformStatus(const libtextclassifier3::Status& internal_status,
                      StatusProto* status_proto) {
   StatusProto::Code code;
+  if (!internal_status.ok()) {
+    ICING_LOG(WARNING) << "Error: " << internal_status.error_code()
+                       << ", Message: " << internal_status.error_message();
+  }
   switch (internal_status.CanonicalCode()) {
     case libtextclassifier3::StatusCode::OK:
       code = StatusProto::OK;
@@ -257,45 +263,101 @@ InitializeResultProto IcingSearchEngine::InternalInitialize() {
   ICING_VLOG(1) << "Initializing IcingSearchEngine in dir: "
                 << options_.base_dir();
 
+  // Measure the latency of the initialization process.
+  Timer initialize_timer;
+
   InitializeResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
+  NativeInitializeStats* initialize_stats =
+      result_proto.mutable_native_initialize_stats();
   if (initialized_) {
     // Already initialized.
     result_status->set_code(StatusProto::OK);
+    initialize_stats->set_latency_ms(initialize_timer.GetElapsedMilliseconds());
+    initialize_stats->set_num_documents(document_store_->num_documents());
     return result_proto;
   }
 
   // Releases result / query cache if any
   result_state_manager_.InvalidateAllResultStates();
 
-  libtextclassifier3::Status status = InitializeMembers();
+  libtextclassifier3::Status status = InitializeMembers(initialize_stats);
   if (!status.ok()) {
     TransformStatus(status, result_status);
+    initialize_stats->set_latency_ms(initialize_timer.GetElapsedMilliseconds());
     return result_proto;
   }
 
   // Even if each subcomponent initialized fine independently, we need to
   // check if they're consistent with each other.
   if (!CheckConsistency().ok()) {
-    ICING_VLOG(1)
-        << "IcingSearchEngine in inconsistent state, regenerating all "
-           "derived data";
-    status = RegenerateDerivedFiles();
+    // The total checksum doesn't match the stored value, it could be one of the
+    // following cases:
+    // 1. Icing is initialized the first time in this directory.
+    // 2. Non-checksumed changes have been made to some files.
+    if (index_->last_added_document_id() == kInvalidDocumentId &&
+        document_store_->last_added_document_id() == kInvalidDocumentId &&
+        absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
+      // First time initialize. Not recovering but creating all the files.
+      // We need to explicitly clear the recovery-related fields because some
+      // sub-components may not be able to tell if the storage is being
+      // initialized the first time or has lost some files. Sub-components may
+      // already have set these fields in earlier steps.
+      *initialize_stats = NativeInitializeStats();
+      status = RegenerateDerivedFiles();
+    } else {
+      ICING_VLOG(1)
+          << "IcingSearchEngine in inconsistent state, regenerating all "
+             "derived data";
+      // Total checksum mismatch may not be the root cause of document store
+      // recovery. Preserve the root cause that was set by the document store.
+      bool should_log_document_store_recovery_cause =
+          initialize_stats->document_store_recovery_cause() ==
+          NativeInitializeStats::NONE;
+      if (should_log_document_store_recovery_cause) {
+        initialize_stats->set_document_store_recovery_cause(
+            NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH);
+      }
+      initialize_stats->set_index_restoration_cause(
+          NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH);
+      status = RegenerateDerivedFiles(initialize_stats,
+                                      should_log_document_store_recovery_cause);
+    }
   } else {
-    status = RestoreIndex();
+    DocumentId last_stored_document_id =
+        document_store_->last_added_document_id();
+    DocumentId last_indexed_document_id = index_->last_added_document_id();
+    if (last_stored_document_id != last_indexed_document_id) {
+      if (last_stored_document_id == kInvalidDocumentId) {
+        // Document store is empty but index is not. Reset the index.
+        status = index_->Reset();
+      } else {
+        // Index is inconsistent with the document store, we need to restore the
+        // index.
+        initialize_stats->set_index_restoration_cause(
+            NativeInitializeStats::INCONSISTENT_WITH_GROUND_TRUTH);
+        Timer index_restore_timer;
+        status = RestoreIndexIfNeeded();
+        initialize_stats->set_index_restoration_latency_ms(
+            index_restore_timer.GetElapsedMilliseconds());
+      }
+    }
   }
 
   if (status.ok() || absl_ports::IsDataLoss(status)) {
     initialized_ = true;
   }
   TransformStatus(status, result_status);
+  initialize_stats->set_latency_ms(initialize_timer.GetElapsedMilliseconds());
   return result_proto;
 }
 
-libtextclassifier3::Status IcingSearchEngine::InitializeMembers() {
+libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
+    NativeInitializeStats* initialize_stats) {
+  ICING_RETURN_ERROR_IF_NULL(initialize_stats);
   ICING_RETURN_IF_ERROR(InitializeOptions());
-  ICING_RETURN_IF_ERROR(InitializeSchemaStore());
-  ICING_RETURN_IF_ERROR(InitializeDocumentStore());
+  ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
+  ICING_RETURN_IF_ERROR(InitializeDocumentStore(initialize_stats));
 
   // TODO(b/156383798) : Resolve how to specify the locale.
   language_segmenter_factory::SegmenterOptions segmenter_options(
@@ -306,7 +368,7 @@ libtextclassifier3::Status IcingSearchEngine::InitializeMembers() {
   TC3_ASSIGN_OR_RETURN(normalizer_,
                        normalizer_factory::Create(options_.max_token_length()));
 
-  ICING_RETURN_IF_ERROR(InitializeIndex());
+  ICING_RETURN_IF_ERROR(InitializeIndex(initialize_stats));
 
   return libtextclassifier3::Status::OK;
 }
@@ -323,7 +385,10 @@ libtextclassifier3::Status IcingSearchEngine::InitializeOptions() {
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore() {
+libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
+    NativeInitializeStats* initialize_stats) {
+  ICING_RETURN_ERROR_IF_NULL(initialize_stats);
+
   const std::string schema_store_dir =
       MakeSchemaDirectoryPath(options_.base_dir());
   // Make sure the sub-directory exists
@@ -332,12 +397,16 @@ libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore() {
         absl_ports::StrCat("Could not create directory: ", schema_store_dir));
   }
   ICING_ASSIGN_OR_RETURN(
-      schema_store_, SchemaStore::Create(filesystem_.get(), schema_store_dir));
+      schema_store_, SchemaStore::Create(filesystem_.get(), schema_store_dir,
+                                         initialize_stats));
 
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore() {
+libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
+    NativeInitializeStats* initialize_stats) {
+  ICING_RETURN_ERROR_IF_NULL(initialize_stats);
+
   const std::string document_dir =
       MakeDocumentDirectoryPath(options_.base_dir());
   // Make sure the sub-directory exists
@@ -348,12 +417,15 @@ libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore() {
   ICING_ASSIGN_OR_RETURN(
       document_store_,
       DocumentStore::Create(filesystem_.get(), document_dir, clock_.get(),
-                            schema_store_.get()));
+                            schema_store_.get(), initialize_stats));
 
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status IcingSearchEngine::InitializeIndex() {
+libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
+    NativeInitializeStats* initialize_stats) {
+  ICING_RETURN_ERROR_IF_NULL(initialize_stats);
+
   const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
   // Make sure the sub-directory exists
   if (!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
@@ -371,11 +443,18 @@ libtextclassifier3::Status IcingSearchEngine::InitializeIndex() {
           absl_ports::StrCat("Could not recreate directory: ", index_dir));
     }
 
+    initialize_stats->set_index_restoration_cause(
+        NativeInitializeStats::IO_ERROR);
+
     // Try recreating it from scratch and re-indexing everything.
     ICING_ASSIGN_OR_RETURN(index_,
                            Index::Create(index_options, filesystem_.get(),
                                          icing_filesystem_.get()));
-    ICING_RETURN_IF_ERROR(RestoreIndex());
+
+    Timer restore_timer;
+    ICING_RETURN_IF_ERROR(RestoreIndexIfNeeded());
+    initialize_stats->set_index_restoration_latency_ms(
+        restore_timer.GetElapsedMilliseconds());
   } else {
     // Index was created fine.
     index_ = std::move(index_or).ValueOrDie();
@@ -414,11 +493,25 @@ libtextclassifier3::Status IcingSearchEngine::CheckConsistency() {
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status IcingSearchEngine::RegenerateDerivedFiles() {
+libtextclassifier3::Status IcingSearchEngine::RegenerateDerivedFiles(
+    NativeInitializeStats* initialize_stats, bool log_document_store_stats) {
+  // Measure the latency of the data recovery. The cause of the recovery should
+  // be logged by the caller.
+  Timer timer;
   ICING_RETURN_IF_ERROR(
       document_store_->UpdateSchemaStore(schema_store_.get()));
+  if (initialize_stats != nullptr && log_document_store_stats) {
+    initialize_stats->set_document_store_recovery_latency_ms(
+        timer.GetElapsedMilliseconds());
+  }
+  // Restart timer.
+  timer = Timer();
   ICING_RETURN_IF_ERROR(index_->Reset());
-  ICING_RETURN_IF_ERROR(RestoreIndex());
+  ICING_RETURN_IF_ERROR(RestoreIndexIfNeeded());
+  if (initialize_stats != nullptr) {
+    initialize_stats->set_index_restoration_latency_ms(
+        timer.GetElapsedMilliseconds());
+  }
 
   const std::string header_file =
       MakeHeaderFilename(options_.base_dir().c_str());
@@ -513,7 +606,7 @@ SetSchemaResultProto IcingSearchEngine::SetSchema(
         return result_proto;
       }
 
-      status = RestoreIndex();
+      status = RestoreIndexIfNeeded();
       if (!status.ok()) {
         TransformStatus(status, result_status);
         return result_proto;
@@ -580,8 +673,12 @@ PutResultProto IcingSearchEngine::Put(const DocumentProto& document) {
 PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
   ICING_VLOG(1) << "Writing document to document store";
 
+  Timer put_timer;
+
   PutResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
+  NativePutDocumentStats* put_document_stats =
+      result_proto.mutable_native_put_document_stats();
 
   // Lock must be acquired before validation because the DocumentStore uses
   // the schema file to validate, and the schema could be changed in
@@ -590,12 +687,14 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
   if (!initialized_) {
     result_status->set_code(StatusProto::FAILED_PRECONDITION);
     result_status->set_message("IcingSearchEngine has not been initialized!");
+    put_document_stats->set_latency_ms(put_timer.GetElapsedMilliseconds());
     return result_proto;
   }
 
-  auto document_id_or = document_store_->Put(document);
+  auto document_id_or = document_store_->Put(document, put_document_stats);
   if (!document_id_or.ok()) {
     TransformStatus(document_id_or.status(), result_status);
+    put_document_stats->set_latency_ms(put_timer.GetElapsedMilliseconds());
     return result_proto;
   }
   DocumentId document_id = document_id_or.ValueOrDie();
@@ -605,13 +704,17 @@ PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
       index_.get(), CreateIndexProcessorOptions(options_));
   if (!index_processor_or.ok()) {
     TransformStatus(index_processor_or.status(), result_status);
+    put_document_stats->set_latency_ms(put_timer.GetElapsedMilliseconds());
     return result_proto;
   }
   std::unique_ptr<IndexProcessor> index_processor =
       std::move(index_processor_or).ValueOrDie();
 
-  auto status = index_processor->IndexDocument(document, document_id);
+  auto status =
+      index_processor->IndexDocument(document, document_id, put_document_stats);
+
   TransformStatus(status, result_status);
+  put_document_stats->set_latency_ms(put_timer.GetElapsedMilliseconds());
   return result_proto;
 }
 
@@ -889,7 +992,7 @@ OptimizeResultProto IcingSearchEngine::Optimize() {
     return result_proto;
   }
 
-  libtextclassifier3::Status index_restoration_status = RestoreIndex();
+  libtextclassifier3::Status index_restoration_status = RestoreIndexIfNeeded();
   if (!index_restoration_status.ok()) {
     status = absl_ports::Annotate(
         absl_ports::InternalError(
@@ -1300,19 +1403,21 @@ libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status IcingSearchEngine::RestoreIndex() {
+libtextclassifier3::Status IcingSearchEngine::RestoreIndexIfNeeded() {
   DocumentId last_stored_document_id =
       document_store_->last_added_document_id();
   DocumentId last_indexed_document_id = index_->last_added_document_id();
 
-  if (last_stored_document_id == kInvalidDocumentId) {
-    // Nothing to index. Make sure the index is also empty.
-    if (last_indexed_document_id != kInvalidDocumentId) {
-      ICING_RETURN_IF_ERROR(index_->Reset());
-    }
+  if (last_stored_document_id == last_indexed_document_id) {
+    // No need to recover.
     return libtextclassifier3::Status::OK;
   }
 
+  if (last_stored_document_id == kInvalidDocumentId) {
+    // Document store is empty but index is not. Reset the index.
+    return index_->Reset();
+  }
+
   // TruncateTo ensures that the index does not hold any data that is not
   // present in the ground truth. If the document store lost some documents,
   // TruncateTo will ensure that the index does not contain any hits from those
@@ -1323,7 +1428,7 @@ libtextclassifier3::Status IcingSearchEngine::RestoreIndex() {
   DocumentId first_document_to_reindex =
       (last_indexed_document_id != kInvalidDocumentId)
           ? index_->last_added_document_id() + 1
-          : 0;
+          : kMinDocumentId;
 
   ICING_ASSIGN_OR_RETURN(
       std::unique_ptr<IndexProcessor> index_processor,
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 58b8df2..70a9c07 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -451,10 +451,12 @@ class IcingSearchEngine {
   //
   // Returns:
   //   OK on success
+  //   FAILED_PRECONDITION if initialize_stats is null
   //   RESOURCE_EXHAUSTED if the index runs out of storage
   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
   //   INTERNAL on any I/O errors
-  libtextclassifier3::Status InitializeMembers()
+  libtextclassifier3::Status InitializeMembers(
+      NativeInitializeStats* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Do any validation/setup required for the given IcingSearchEngineOptions
@@ -470,8 +472,10 @@ class IcingSearchEngine {
   //
   // Returns:
   //   OK on success
+  //   FAILED_PRECONDITION if initialize_stats is null
   //   INTERNAL on I/O error
-  libtextclassifier3::Status InitializeSchemaStore()
+  libtextclassifier3::Status InitializeSchemaStore(
+      NativeInitializeStats* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Do any initialization/recovery necessary to create a DocumentStore
@@ -479,8 +483,10 @@ class IcingSearchEngine {
   //
   // Returns:
   //   OK on success
+  //   FAILED_PRECONDITION if initialize_stats is null
   //   INTERNAL on I/O error
-  libtextclassifier3::Status InitializeDocumentStore()
+  libtextclassifier3::Status InitializeDocumentStore(
+      NativeInitializeStats* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Do any initialization/recovery necessary to create a DocumentStore
@@ -488,10 +494,12 @@ class IcingSearchEngine {
   //
   // Returns:
   //   OK on success
+  //   FAILED_PRECONDITION if initialize_stats is null
   //   RESOURCE_EXHAUSTED if the index runs out of storage
   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
   //   INTERNAL on I/O error
-  libtextclassifier3::Status InitializeIndex()
+  libtextclassifier3::Status InitializeIndex(
+      NativeInitializeStats* initialize_stats)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Many of the internal components rely on other components' derived data.
@@ -514,7 +522,9 @@ class IcingSearchEngine {
   // Returns:
   //   OK on success
   //   INTERNAL_ERROR on any IO errors
-  libtextclassifier3::Status RegenerateDerivedFiles()
+  libtextclassifier3::Status RegenerateDerivedFiles(
+      NativeInitializeStats* initialize_stats = nullptr,
+      bool log_document_store_stats = false)
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Optimizes the DocumentStore by removing any unneeded documents (i.e.
@@ -543,7 +553,7 @@ class IcingSearchEngine {
   //   RESOURCE_EXHAUSTED if the index fills up before finishing indexing
   //   NOT_FOUND if some Document's schema type is not in the SchemaStore
   //   INTERNAL_ERROR on any IO errors
-  libtextclassifier3::Status RestoreIndex()
+  libtextclassifier3::Status RestoreIndexIfNeeded()
       ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
 
   // Computes the combined checksum of the IcingSearchEngine - includes all its
diff --git a/icing/icing-search-engine_fuzz_test.cc b/icing/icing-search-engine_fuzz_test.cc
index d31f836..1f59c6e 100644
--- a/icing/icing-search-engine_fuzz_test.cc
+++ b/icing/icing-search-engine_fuzz_test.cc
@@ -44,9 +44,10 @@ SchemaProto SetTypes() {
   body->set_property_name("body");
   body->set_data_type(PropertyConfigProto::DataType::STRING);
   body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  body->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  body->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
   return schema;
 }
 
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 06e89f2..b642a94 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -41,6 +41,7 @@
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
 #include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/random-string.h"
 #include "icing/testing/snippet-helpers.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
@@ -161,9 +162,10 @@ SchemaProto CreateMessageSchema() {
   body->set_property_name("body");
   body->set_data_type(PropertyConfigProto::DataType::STRING);
   body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  body->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  body->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   return schema;
 }
@@ -177,16 +179,18 @@ SchemaProto CreateEmailSchema() {
   body->set_property_name("body");
   body->set_data_type(PropertyConfigProto::DataType::STRING);
   body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  body->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  body->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
   auto* subj = type->add_properties();
   subj->set_property_name("subject");
   subj->set_data_type(PropertyConfigProto::DataType::STRING);
   subj->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  subj->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  subj->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  subj->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  subj->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   return schema;
 }
@@ -423,10 +427,7 @@ TEST_F(IcingSearchEngineTest,
   body->set_schema_type("Person");
   body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
   body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  body->mutable_document_indexing_config()->set_index_nested_properties(true);
 
   type = schema.add_types();
   type->set_schema_type("Person");
@@ -436,10 +437,7 @@ TEST_F(IcingSearchEngineTest,
   body->set_schema_type("Message");
   body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
   body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  body->mutable_document_indexing_config()->set_index_nested_properties(true);
 
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -735,7 +733,7 @@ TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
   SchemaProto schema_with_no_indexed_property = CreateMessageSchema();
   schema_with_no_indexed_property.mutable_types(0)
       ->mutable_properties(0)
-      ->clear_indexing_config();
+      ->clear_string_indexing_config();
 
   EXPECT_THAT(icing.SetSchema(schema_with_no_indexed_property).status(),
               ProtoIsOk());
@@ -1775,10 +1773,10 @@ TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
   property->set_property_name("subject");
   property->set_data_type(PropertyConfigProto::DataType::STRING);
   property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property->mutable_indexing_config()->set_term_match_type(
+  property->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
-  property->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
   // Add an message type
   type = schema.add_types();
   type->set_schema_type("message");
@@ -1786,10 +1784,10 @@ TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
   property->set_property_name("body");
   property->set_data_type(PropertyConfigProto::DataType::STRING);
   property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property->mutable_indexing_config()->set_term_match_type(
+  property->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
-  property->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
   DocumentProto document1 =
       DocumentBuilder()
           .SetKey("namespace1", "uri1")
@@ -2205,20 +2203,20 @@ TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) {
   new_property2->set_property_name("property2");
   new_property2->set_data_type(PropertyConfigProto::DataType::STRING);
   new_property2->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  new_property2->mutable_indexing_config()->set_term_match_type(
+  new_property2->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::PREFIX);
-  new_property2->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  new_property2->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   SchemaProto schema3 = SchemaProto(schema2);
   auto new_property3 = schema3.mutable_types(0)->add_properties();
   new_property3->set_property_name("property3");
   new_property3->set_data_type(PropertyConfigProto::DataType::STRING);
   new_property3->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  new_property3->mutable_indexing_config()->set_term_match_type(
+  new_property3->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::PREFIX);
-  new_property3->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  new_property3->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   {
     IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
@@ -2463,9 +2461,10 @@ TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
   body->set_property_name("body");
   body->set_data_type(PropertyConfigProto::DataType::STRING);
   body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  body->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  body->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "uri")
@@ -2513,9 +2512,10 @@ TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
   body->set_property_name("body");
   body->set_data_type(PropertyConfigProto::DataType::STRING);
   body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  body->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  body->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   DocumentProto document = DocumentBuilder()
                                .SetKey("namespace", "uri")
@@ -2613,10 +2613,10 @@ TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
   property->set_property_name("body");
   property->set_data_type(PropertyConfigProto::DataType::STRING);
   property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property->mutable_indexing_config()->set_term_match_type(
+  property->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::PREFIX);
-  property->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
 
@@ -2869,10 +2869,10 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
     property->set_property_name("body");
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    property->mutable_indexing_config()->set_term_match_type(
+    property->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::PREFIX);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    property->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
 
     property = type->add_properties();
     property->set_property_name("additional");
@@ -2916,19 +2916,19 @@ TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
     property->set_property_name("body");
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    property->mutable_indexing_config()->set_term_match_type(
+    property->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::PREFIX);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    property->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
 
     property = type->add_properties();
     property->set_property_name("additional");
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    property->mutable_indexing_config()->set_term_match_type(
+    property->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::PREFIX);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    property->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
 
     ICING_ASSERT_OK_AND_ASSIGN(
         std::unique_ptr<SchemaStore> schema_store,
@@ -3705,9 +3705,10 @@ TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
   body->set_property_name("body");
   body->set_data_type(PropertyConfigProto::DataType::STRING);
   body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
-  body->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  body->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  body->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   // Make an incompatible schema, a previously OPTIONAL field is REQUIRED
   SchemaProto incompatible_schema = schema;
@@ -4218,10 +4219,10 @@ TEST_F(IcingSearchEngineTest, Hyphens) {
   prop->set_property_name("foo");
   prop->set_data_type(PropertyConfigProto::DataType::STRING);
   prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  prop->mutable_indexing_config()->set_term_match_type(
+  prop->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  prop->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
   ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
 
   DocumentProto document_one =
@@ -4281,7 +4282,7 @@ TEST_F(IcingSearchEngineTest, RestoreIndex) {
     EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
   }
 
-  // 2. Delete the index file to trigger RestoreIndex.
+  // 2. Delete the index file to trigger RestoreIndexIfNeeded.
   std::string idx_subdir = GetIndexDir() + "/idx";
   filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
 
@@ -4474,7 +4475,7 @@ TEST_F(IcingSearchEngineTest, IndexingDocMergeFailureResets) {
     EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
   }
 
-  // 2. Delete the index file to trigger RestoreIndex.
+  // 2. Delete the index file to trigger RestoreIndexIfNeeded.
   std::string idx_subdir = GetIndexDir() + "/idx";
   filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
 
@@ -4522,6 +4523,691 @@ TEST_F(IcingSearchEngineTest, IndexingDocMergeFailureResets) {
   }
 }
 
+TEST_F(IcingSearchEngineTest, InitializeShouldLogFunctionLatency) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats().latency_ms(),
+              Gt(0));
+}
+
+TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) {
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message body")
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message body")
+                                .Build();
+
+  {
+    // Initialize and put a document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(
+        initialize_result_proto.native_initialize_stats().num_documents(),
+        Eq(0));
+
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(
+        initialize_result_proto.native_initialize_stats().num_documents(),
+        Eq(1));
+
+    // Put another document.
+    ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(
+        initialize_result_proto.native_initialize_stats().num_documents(),
+        Eq(2));
+  }
+}
+
+TEST_F(IcingSearchEngineTest,
+       InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .document_store_recovery_cause(),
+              Eq(NativeInitializeStats::NONE));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .document_store_recovery_latency_ms(),
+              Eq(0));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .document_store_data_status(),
+              Eq(NativeInitializeStats::NO_DATA_LOSS));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .index_restoration_cause(),
+              Eq(NativeInitializeStats::NONE));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .index_restoration_latency_ms(),
+              Eq(0));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .schema_store_recovery_cause(),
+              Eq(NativeInitializeStats::NONE));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .schema_store_recovery_latency_ms(),
+              Eq(0));
+}
+
+TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCausePartialDataLoss) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .Build();
+
+  {
+    // Initialize and put a document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  {
+    // Append a non-checksummed document. This will mess up the checksum of the
+    // proto log, forcing it to rewind and later return a DATA_LOSS error.
+    const std::string serialized_document = document.SerializeAsString();
+    const std::string document_log_file =
+        absl_ports::StrCat(GetDocumentDir(), "/document_log");
+
+    int64_t file_size = filesystem()->GetFileSize(document_log_file.c_str());
+    filesystem()->PWrite(document_log_file.c_str(), file_size,
+                         serialized_document.data(),
+                         serialized_document.size());
+  }
+
+  {
+    // Document store will rewind to previous checkpoint. The cause should be
+    // DATA_LOSS and the data status should be PARTIAL_LOSS.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(NativeInitializeStats::DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Gt(0));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_data_status(),
+                Eq(NativeInitializeStats::PARTIAL_LOSS));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .index_restoration_cause(),
+                Eq(NativeInitializeStats::NONE));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(NativeInitializeStats::NONE));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(IcingSearchEngineTest,
+       InitializeShouldLogRecoveryCauseCompleteDataLoss) {
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", kIpsumText)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", kIpsumText)
+                                .Build();
+
+  {
+    // Initialize and put a document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
+  }
+
+  {
+    // Modify the document log checksum to trigger a complete document log
+    // rewind.
+    const std::string document_log_file =
+        absl_ports::StrCat(GetDocumentDir(), "/document_log");
+
+    FileBackedProtoLog<DocumentWrapper>::Header document_log_header;
+    filesystem()->PRead(document_log_file.c_str(), &document_log_header,
+                        sizeof(FileBackedProtoLog<DocumentWrapper>::Header),
+                        /*offset=*/0);
+    // Set a garbage checksum.
+    document_log_header.log_checksum = 10;
+    document_log_header.header_checksum =
+        document_log_header.CalculateHeaderChecksum();
+    filesystem()->PWrite(document_log_file.c_str(), /*offset=*/0,
+                         &document_log_header,
+                         sizeof(FileBackedProtoLog<DocumentWrapper>::Header));
+  }
+
+  {
+    // Document store will completely rewind. The cause should be DATA_LOSS and
+    // the data status should be COMPLETE_LOSS.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(NativeInitializeStats::DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Gt(0));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_data_status(),
+                Eq(NativeInitializeStats::COMPLETE_LOSS));
+    // The complete rewind of ground truth causes the mismatch of total
+    // checksum, so index should be restored.
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .index_restoration_cause(),
+                Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
+    // Here we don't check index_restoration_latency_ms because the index
+    // restoration is super fast when document store is emtpy. We won't get a
+    // latency that is greater than 1 ms.
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(NativeInitializeStats::NONE));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(IcingSearchEngineTest,
+       InitializeShouldLogRecoveryCauseInconsistentWithGroundTruth) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .Build();
+  {
+    // Initialize and put a document.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+    EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+  }
+
+  {
+    // Delete the index file to trigger RestoreIndexIfNeeded.
+    std::string idx_subdir = GetIndexDir() + "/idx";
+    filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
+  }
+
+  {
+    // Index is empty but ground truth is not. Index should be restored due to
+    // the inconsistency.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .index_restoration_cause(),
+                Eq(NativeInitializeStats::INCONSISTENT_WITH_GROUND_TRUTH));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .index_restoration_latency_ms(),
+                Gt(0));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(NativeInitializeStats::NONE));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_data_status(),
+                Eq(NativeInitializeStats::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(NativeInitializeStats::NONE));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(IcingSearchEngineTest,
+       InitializeShouldLogRecoveryCauseTotalChecksumMismatch) {
+  {
+    // Initialize and index some documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+    // We need to index enough documents to make
+    // DocumentStore::UpdateSchemaStore() run longer than 1 ms.
+    for (int i = 0; i < 50; ++i) {
+      DocumentProto document =
+          DocumentBuilder()
+              .SetKey("icing", "fake_type/" + std::to_string(i))
+              .SetSchema("Message")
+              .AddStringProperty("body", "message body")
+              .Build();
+      ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+    }
+  }
+
+  {
+    // Change the header's checksum value to a random value.
+    uint32_t invalid_checksum = 1;
+    filesystem()->PWrite(GetHeaderFilename().c_str(),
+                         offsetof(IcingSearchEngine::Header, checksum),
+                         &invalid_checksum, sizeof(invalid_checksum));
+  }
+
+  {
+    // Both document store and index should be recovered from checksum mismatch.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .index_restoration_cause(),
+                Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .index_restoration_latency_ms(),
+                Gt(0));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Gt(0));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_data_status(),
+                Eq(NativeInitializeStats::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(NativeInitializeStats::NONE));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseIndexIOError) {
+  {
+    // Initialize and index some documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+    // We need to index enough documents to make RestoreIndexIfNeeded() run
+    // longer than 1 ms.
+    for (int i = 0; i < 50; ++i) {
+      DocumentProto document =
+          DocumentBuilder()
+              .SetKey("icing", "fake_type/" + std::to_string(i))
+              .SetSchema("Message")
+              .AddStringProperty("body", "message body")
+              .Build();
+      ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+    }
+  }
+
+  // lambda to fail OpenForWrite on lite index hit buffer once.
+  bool has_failed_already = false;
+  auto open_write_lambda = [this, &has_failed_already](const char* filename) {
+    std::string lite_index_buffer_file_path =
+        absl_ports::StrCat(GetIndexDir(), "/idx/lite.hb");
+    std::string filename_string(filename);
+    if (!has_failed_already && filename_string == lite_index_buffer_file_path) {
+      has_failed_already = true;
+      return -1;
+    }
+    return this->filesystem()->OpenForWrite(filename);
+  };
+
+  auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
+  // This fails Index::Create() once.
+  ON_CALL(*mock_icing_filesystem, OpenForWrite)
+      .WillByDefault(open_write_lambda);
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::move(mock_icing_filesystem),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .index_restoration_cause(),
+              Eq(NativeInitializeStats::IO_ERROR));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .index_restoration_latency_ms(),
+              Gt(0));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .document_store_recovery_cause(),
+              Eq(NativeInitializeStats::NONE));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .document_store_recovery_latency_ms(),
+              Eq(0));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .document_store_data_status(),
+              Eq(NativeInitializeStats::NO_DATA_LOSS));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .schema_store_recovery_cause(),
+              Eq(NativeInitializeStats::NONE));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .schema_store_recovery_latency_ms(),
+              Eq(0));
+}
+
+TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseDocStoreIOError) {
+  {
+    // Initialize and index some documents.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+    // We need to index enough documents to make RestoreIndexIfNeeded() run
+    // longer than 1 ms.
+    for (int i = 0; i < 50; ++i) {
+      DocumentProto document =
+          DocumentBuilder()
+              .SetKey("icing", "fake_type/" + std::to_string(i))
+              .SetSchema("Message")
+              .AddStringProperty("body", "message body")
+              .Build();
+      ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+    }
+  }
+
+  // lambda to fail Read on document store header once.
+  bool has_failed_already = false;
+  auto read_lambda = [this, &has_failed_already](const char* filename,
+                                                 void* buf, size_t buf_size) {
+    std::string document_store_header_file_path =
+        absl_ports::StrCat(GetDocumentDir(), "/document_store_header");
+    std::string filename_string(filename);
+    if (!has_failed_already &&
+        filename_string == document_store_header_file_path) {
+      has_failed_already = true;
+      return false;
+    }
+    return this->filesystem()->Read(filename, buf, buf_size);
+  };
+
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  // This fails DocumentStore::InitializeDerivedFiles() once.
+  ON_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
+      .WillByDefault(read_lambda);
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<IcingFilesystem>(),
+                              std::make_unique<FakeClock>(), GetTestJniCache());
+
+  InitializeResultProto initialize_result_proto = icing.Initialize();
+  EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .document_store_recovery_cause(),
+              Eq(NativeInitializeStats::IO_ERROR));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .document_store_recovery_latency_ms(),
+              Gt(0));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .document_store_data_status(),
+              Eq(NativeInitializeStats::NO_DATA_LOSS));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .index_restoration_cause(),
+              Eq(NativeInitializeStats::NONE));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .index_restoration_latency_ms(),
+              Eq(0));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .schema_store_recovery_cause(),
+              Eq(NativeInitializeStats::NONE));
+  EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                  .schema_store_recovery_latency_ms(),
+              Eq(0));
+}
+
+TEST_F(IcingSearchEngineTest,
+       InitializeShouldLogRecoveryCauseSchemaStoreIOError) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  }
+
+  {
+    // Delete the schema store header file to trigger an I/O error.
+    std::string schema_store_header_file_path =
+        GetSchemaDir() + "/schema_store_header";
+    filesystem()->DeleteFile(schema_store_header_file_path.c_str());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .schema_store_recovery_cause(),
+                Eq(NativeInitializeStats::IO_ERROR));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .schema_store_recovery_latency_ms(),
+                Gt(0));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_recovery_cause(),
+                Eq(NativeInitializeStats::NONE));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_recovery_latency_ms(),
+                Eq(0));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .document_store_data_status(),
+                Eq(NativeInitializeStats::NO_DATA_LOSS));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .index_restoration_cause(),
+                Eq(NativeInitializeStats::NONE));
+    EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+                    .index_restoration_latency_ms(),
+                Eq(0));
+  }
+}
+
+TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfSchemaTypes) {
+  {
+    // Initialize an empty storage.
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    // There should be 0 schema types.
+    EXPECT_THAT(
+        initialize_result_proto.native_initialize_stats().num_schema_types(),
+        Eq(0));
+
+    // Set a schema with one type config.
+    ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    // There should be 1 schema type.
+    EXPECT_THAT(
+        initialize_result_proto.native_initialize_stats().num_schema_types(),
+        Eq(1));
+
+    // Create and set a schema with two type configs: Email and Message.
+    SchemaProto schema = CreateEmailSchema();
+
+    auto type = schema.add_types();
+    type->set_schema_type("Message");
+    auto body = type->add_properties();
+    body->set_property_name("body");
+    body->set_data_type(PropertyConfigProto::DataType::STRING);
+    body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    body->mutable_string_indexing_config()->set_term_match_type(
+        TermMatchType::PREFIX);
+    body->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
+
+    ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    InitializeResultProto initialize_result_proto = icing.Initialize();
+    EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+    EXPECT_THAT(
+        initialize_result_proto.native_initialize_stats().num_schema_types(),
+        Eq(2));
+  }
+}
+
+TEST_F(IcingSearchEngineTest, PutDocumentShouldLogFunctionLatency) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  PutResultProto put_result_proto = icing.Put(document);
+  EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(put_result_proto.native_put_document_stats().latency_ms(), Gt(0));
+}
+
+TEST_F(IcingSearchEngineTest, PutDocumentShouldLogDocumentStoreStats) {
+  // Create a large enough document so that document_store_latency_ms can be
+  // longer than 1 ms.
+  std::default_random_engine random;
+  std::string random_string_10000 =
+      RandomString(kAlNumAlphabet, /*len=*/10000, &random);
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", random_string_10000)
+                               .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  PutResultProto put_result_proto = icing.Put(document);
+  EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(
+      put_result_proto.native_put_document_stats().document_store_latency_ms(),
+      Gt(0));
+  EXPECT_THAT(put_result_proto.native_put_document_stats().document_size(),
+              Eq(document.ByteSizeLong()));
+}
+
+TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexingStats) {
+  // Create a large enough document so that index_latency_ms can be longer than
+  // 1 ms.
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", kIpsumText)
+                               .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  PutResultProto put_result_proto = icing.Put(document);
+  EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(put_result_proto.native_put_document_stats().index_latency_ms(),
+              Gt(0));
+  // No merge should happen.
+  EXPECT_THAT(
+      put_result_proto.native_put_document_stats().index_merge_latency_ms(),
+      Eq(0));
+  // Number of tokens should not exceed.
+  EXPECT_FALSE(put_result_proto.native_put_document_stats()
+                   .tokenization_stats()
+                   .exceeded_max_token_num());
+  // kIpsumText has 137 tokens.
+  EXPECT_THAT(put_result_proto.native_put_document_stats()
+                  .tokenization_stats()
+                  .num_tokens_indexed(),
+              Eq(137));
+}
+
+TEST_F(IcingSearchEngineTest, PutDocumentShouldLogWhetherNumTokensExceeds) {
+  // Create a document with 2 tokens.
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("icing", "fake_type/0")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .Build();
+
+  // Create an icing instance with max_tokens_per_doc = 1.
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+  icing_options.set_max_tokens_per_doc(1);
+  IcingSearchEngine icing(icing_options, GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  PutResultProto put_result_proto = icing.Put(document);
+  EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+  // Number of tokens(2) exceeds the max allowed value(1).
+  EXPECT_TRUE(put_result_proto.native_put_document_stats()
+                  .tokenization_stats()
+                  .exceeded_max_token_num());
+  EXPECT_THAT(put_result_proto.native_put_document_stats()
+                  .tokenization_stats()
+                  .num_tokens_indexed(),
+              Eq(1));
+}
+
+TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexMergeLatency) {
+  // Create 2 large enough documents so that index_merge_latency_ms can be
+  // longer than 1 ms.
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", kIpsumText)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("icing", "fake_type/2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", kIpsumText)
+                                .Build();
+
+  // Create an icing instance with index_merge_size = document1's size.
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+  icing_options.set_index_merge_size(document1.ByteSizeLong());
+  IcingSearchEngine icing(icing_options, GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+  EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+
+  // Putting document2 should trigger an index merge.
+  PutResultProto put_result_proto = icing.Put(document2);
+  EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(
+      put_result_proto.native_put_document_stats().index_merge_latency_ms(),
+      Gt(0));
+}
+
 }  // namespace
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 47111ad..9e57993 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -37,6 +37,7 @@
 #include "icing/tokenization/tokenizer.h"
 #include "icing/transform/normalizer.h"
 #include "icing/util/status-macros.h"
+#include "icing/util/timer.h"
 
 namespace icing {
 namespace lib {
@@ -56,7 +57,10 @@ IndexProcessor::Create(const SchemaStore* schema_store,
 }
 
 libtextclassifier3::Status IndexProcessor::IndexDocument(
-    const DocumentProto& document, DocumentId document_id) {
+    const DocumentProto& document, DocumentId document_id,
+    NativePutDocumentStats* put_document_stats) {
+  Timer index_timer;
+
   if (index_->last_added_document_id() != kInvalidDocumentId &&
       document_id <= index_->last_added_document_id()) {
     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
@@ -80,6 +84,12 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
                              tokenizer->Tokenize(subcontent));
       while (itr->Advance()) {
         if (++num_tokens > options_.max_tokens_per_document) {
+          if (put_document_stats != nullptr) {
+            put_document_stats->mutable_tokenization_stats()
+                ->set_exceeded_max_token_num(true);
+            put_document_stats->mutable_tokenization_stats()
+                ->set_num_tokens_indexed(options_.max_tokens_per_document);
+          }
           switch (options_.token_limit_behavior) {
             case Options::TokenLimitBehavior::kReturnError:
               return absl_ports::ResourceExhaustedError(
@@ -106,10 +116,20 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
     }
   }
 
+  if (put_document_stats != nullptr) {
+    put_document_stats->set_index_latency_ms(
+        index_timer.GetElapsedMilliseconds());
+    put_document_stats->mutable_tokenization_stats()->set_num_tokens_indexed(
+        num_tokens);
+  }
+
   // Merge if necessary.
   if (overall_status.ok() && index_->WantsMerge()) {
     ICING_VLOG(1) << "Merging the index at docid " << document_id << ".";
+
+    Timer merge_timer;
     libtextclassifier3::Status merge_status = index_->Merge();
+
     if (!merge_status.ok()) {
       ICING_LOG(ERROR) << "Index merging failed. Clearing index.";
       if (!index_->Reset().ok()) {
@@ -123,6 +143,11 @@ libtextclassifier3::Status IndexProcessor::IndexDocument(
             merge_status.error_code(), merge_status.error_message().c_str()));
       }
     }
+
+    if (put_document_stats != nullptr) {
+      put_document_stats->set_index_merge_latency_ms(
+          merge_timer.GetElapsedMilliseconds());
+    }
   }
 
   return overall_status;
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 083efea..91719d0 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -69,6 +69,9 @@ class IndexProcessor {
   // Indexing a document *may* trigger an index merge. If a merge fails, then
   // all content in the index will be lost.
   //
+  // If put_document_stats is present, the fields related to indexing will be
+  // populated.
+  //
   // Returns:
   //   INVALID_ARGUMENT if document_id is less than the document_id of a
   //   previously indexed document or tokenization fails.
@@ -77,8 +80,9 @@ class IndexProcessor {
   //       cleared as a result.
   //   NOT_FOUND if there is no definition for the document's schema type.
   //   INTERNAL_ERROR if any other errors occur
-  libtextclassifier3::Status IndexDocument(const DocumentProto& document,
-                                           DocumentId document_id);
+  libtextclassifier3::Status IndexDocument(
+      const DocumentProto& document, DocumentId document_id,
+      NativePutDocumentStats* put_document_stats = nullptr);
 
  private:
   IndexProcessor(const SchemaStore* schema_store,
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index a9b298e..584cb9b 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -76,10 +76,10 @@ void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
         IcingStringUtil::StringPrintf("p%d", i));  //  p0 - p9
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    property->mutable_indexing_config()->set_term_match_type(
+    property->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    property->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
   }
 }
 
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 84c822b..e193842 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -166,9 +166,10 @@ class IndexProcessorTest : public Test {
     prop->set_property_name(std::string(name));
     prop->set_data_type(type);
     prop->set_cardinality(cardinality);
-    prop->mutable_indexing_config()->set_term_match_type(term_match_type);
-    prop->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    prop->mutable_string_indexing_config()->set_term_match_type(
+        term_match_type);
+    prop->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
   }
 
   static void AddNonIndexedProperty(std::string_view name, DataType::Code type,
@@ -210,6 +211,7 @@ class IndexProcessorTest : public Test {
     prop->set_data_type(DataType::DOCUMENT);
     prop->set_cardinality(Cardinality::OPTIONAL);
     prop->set_schema_type(std::string(kNestedType));
+    prop->mutable_document_indexing_config()->set_index_nested_properties(true);
 
     // Add nested type
     type_config = schema.add_types();
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index df79c6d..b29217c 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -64,10 +64,10 @@ class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
     property->set_property_name(indexed_property_);
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    property->mutable_indexing_config()->set_term_match_type(
+    property->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    property->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
 
     // First and only indexed property, so it gets the first id of 0
     indexed_section_id_ = 0;
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index b3696b7..a60764d 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -54,8 +54,12 @@ libtextclassifier3::Status DocHitInfoIteratorTermMain::Advance() {
     // next posting list in the chain.
     libtextclassifier3::Status status = RetrieveMoreHits();
     if (!status.ok()) {
-      ICING_LOG(ERROR) << "Failed to retrieve more hits "
-                       << status.error_message();
+      if (!absl_ports::IsNotFound(status)) {
+        // NOT_FOUND is expected to happen (not every term will be in the main
+        // index!). Other errors are worth logging.
+        ICING_LOG(ERROR) << "Failed to retrieve more hits "
+                         << status.error_message();
+      }
       return absl_ports::ResourceExhaustedError(
           "No more DocHitInfos in iterator");
     }
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 2edc624..16bd120 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -70,10 +70,10 @@ void AddIndexedProperty(SchemaTypeConfigProto* type_config, std::string name) {
   property_config->set_property_name(name);
   property_config->set_data_type(PropertyConfigProto::DataType::STRING);
   property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property_config->mutable_indexing_config()->set_term_match_type(
+  property_config->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
-  property_config->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  property_config->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 }
 
 void AddUnindexedProperty(SchemaTypeConfigProto* type_config,
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 0d2c2c5..1d1f824 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc
@@ -77,18 +77,18 @@ class ResultRetrieverTest : public testing::Test {
     prop_config->set_property_name("subject");
     prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
     prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    prop_config->mutable_indexing_config()->set_term_match_type(
+    prop_config->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::PREFIX);
-    prop_config->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    prop_config->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
     prop_config = type_config->add_properties();
     prop_config->set_property_name("body");
     prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
     prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    prop_config->mutable_indexing_config()->set_term_match_type(
+    prop_config->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
-    prop_config->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    prop_config->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
     ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
   }
 
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index 676ea92..e552cf2 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -76,18 +76,18 @@ class SnippetRetrieverTest : public testing::Test {
     prop_config->set_property_name("subject");
     prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
     prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    prop_config->mutable_indexing_config()->set_term_match_type(
+    prop_config->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::PREFIX);
-    prop_config->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    prop_config->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
     prop_config = type_config->add_properties();
     prop_config->set_property_name("body");
     prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
     prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    prop_config->mutable_indexing_config()->set_term_match_type(
+    prop_config->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
-    prop_config->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    prop_config->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
     ICING_ASSERT_OK(schema_store_->SetSchema(schema));
 
     ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index 34ccf22..9173031 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -39,6 +39,7 @@
 #include "icing/util/crc32.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
+#include "icing/util/timer.h"
 
 namespace icing {
 namespace lib {
@@ -103,12 +104,13 @@ std::unordered_set<SchemaTypeId> SchemaTypeIdsChanged(
 }  // namespace
 
 libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
-    const Filesystem* filesystem, const std::string& base_dir) {
+    const Filesystem* filesystem, const std::string& base_dir,
+    NativeInitializeStats* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(filesystem);
 
   std::unique_ptr<SchemaStore> schema_store =
       std::unique_ptr<SchemaStore>(new SchemaStore(filesystem, base_dir));
-  ICING_RETURN_IF_ERROR(schema_store->Initialize());
+  ICING_RETURN_IF_ERROR(schema_store->Initialize(initialize_stats));
   return schema_store;
 }
 
@@ -125,7 +127,8 @@ SchemaStore::~SchemaStore() {
   }
 }
 
-libtextclassifier3::Status SchemaStore::Initialize() {
+libtextclassifier3::Status SchemaStore::Initialize(
+    NativeInitializeStats* initialize_stats) {
   auto schema_proto_or = GetSchema();
   if (absl_ports::IsNotFound(schema_proto_or.status())) {
     // Don't have an existing schema proto, that's fine
@@ -139,10 +142,22 @@ libtextclassifier3::Status SchemaStore::Initialize() {
     ICING_VLOG(3)
         << "Couldn't find derived files or failed to initialize them, "
            "regenerating derived files for SchemaStore.";
+    Timer regenerate_timer;
+    if (initialize_stats != nullptr) {
+      initialize_stats->set_schema_store_recovery_cause(
+          NativeInitializeStats::IO_ERROR);
+    }
     ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
+    if (initialize_stats != nullptr) {
+      initialize_stats->set_schema_store_recovery_latency_ms(
+          regenerate_timer.GetElapsedMilliseconds());
+    }
   }
 
   initialized_ = true;
+  if (initialize_stats != nullptr) {
+    initialize_stats->set_num_schema_types(type_config_map_.size());
+  }
 
   return libtextclassifier3::Status::OK;
 }
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index f5c6588..76f36b4 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -27,6 +27,7 @@
 #include "icing/file/file-backed-proto.h"
 #include "icing/file/filesystem.h"
 #include "icing/proto/document.pb.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/schema/schema-util.h"
 #include "icing/schema/section-manager.h"
@@ -104,12 +105,16 @@ class SchemaStore {
   // outlive the created SchemaStore instance. The base_dir must already exist.
   // There does not need to be an existing schema already.
   //
+  // If initialize_stats is present, the fields related to SchemaStore will be
+  // populated.
+  //
   // Returns:
   //   A SchemaStore on success
   //   FAILED_PRECONDITION on any null pointer input
   //   INTERNAL_ERROR on any IO errors
   static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
-      const Filesystem* filesystem, const std::string& base_dir);
+      const Filesystem* filesystem, const std::string& base_dir,
+      NativeInitializeStats* initialize_stats = nullptr);
 
   // Not copyable
   SchemaStore(const SchemaStore&) = delete;
@@ -229,7 +234,8 @@ class SchemaStore {
   // Returns:
   //   OK on success
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status Initialize();
+  libtextclassifier3::Status Initialize(
+      NativeInitializeStats* initialize_stats);
 
   // Creates sub-components and verifies the integrity of each sub-component.
   //
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 957fd89..4a458b2 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -56,10 +56,10 @@ class SchemaStoreTest : public ::testing::Test {
     property->set_property_name("subject");
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    property->mutable_indexing_config()->set_term_match_type(
+    property->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
-    property->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    property->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
   }
 
   void TearDown() override {
@@ -444,10 +444,10 @@ TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) {
 
   // Make a previously unindexed property indexed
   property = schema.mutable_types(0)->mutable_properties(0);
-  property->mutable_indexing_config()->set_term_match_type(
+  property->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
-  property->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   // With a new indexed property, we'll need to reindex
   result.index_incompatible = true;
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index 12f7c4c..a755e88 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -37,10 +37,6 @@ namespace lib {
 
 namespace {
 
-// Data types that can be indexed. This follows rule 11 of SchemaUtil::Validate
-static std::unordered_set<PropertyConfigProto::DataType::Code>
-    kIndexableDataTypes = {PropertyConfigProto::DataType::STRING};
-
 bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
                              const PropertyConfigProto& new_property) {
   if (old_property.cardinality() < new_property.cardinality()) {
@@ -91,8 +87,8 @@ bool IsPropertyCompatible(const PropertyConfigProto& old_property,
          IsCardinalityCompatible(old_property, new_property);
 }
 
-bool IsTermMatchTypeCompatible(const IndexingConfig& old_indexed,
-                               const IndexingConfig& new_indexed) {
+bool IsTermMatchTypeCompatible(const StringIndexingConfig& old_indexed,
+                               const StringIndexingConfig& new_indexed) {
   return old_indexed.term_match_type() == new_indexed.term_match_type() &&
          old_indexed.tokenizer_type() == new_indexed.tokenizer_type();
 }
@@ -162,9 +158,11 @@ libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
       ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
                                                 schema_type, property_name));
 
-      ICING_RETURN_IF_ERROR(
-          ValidateIndexingConfig(property_config.indexing_config(), data_type,
-                                 schema_type, property_name));
+      if (data_type == PropertyConfigProto::DataType::STRING) {
+        ICING_RETURN_IF_ERROR(ValidateStringIndexingConfig(
+            property_config.string_indexing_config(), data_type, schema_type,
+            property_name));
+      }
     }
   }
 
@@ -239,33 +237,26 @@ libtextclassifier3::Status SchemaUtil::ValidateCardinality(
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::Status SchemaUtil::ValidateIndexingConfig(
-    const IndexingConfig& config, PropertyConfigProto::DataType::Code data_type,
-    std::string_view schema_type, std::string_view property_name) {
+libtextclassifier3::Status SchemaUtil::ValidateStringIndexingConfig(
+    const StringIndexingConfig& config,
+    PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
+    std::string_view property_name) {
   if (config.term_match_type() == TermMatchType::UNKNOWN &&
-      config.tokenizer_type() != IndexingConfig::TokenizerType::NONE) {
+      config.tokenizer_type() != StringIndexingConfig::TokenizerType::NONE) {
     // They set a tokenizer type, but no term match type.
     return absl_ports::InvalidArgumentError(absl_ports::StrCat(
-        "Indexed property '", schema_type, ".", property_name,
+        "Indexed string property '", schema_type, ".", property_name,
         "' cannot have a term match type UNKNOWN"));
   }
 
   if (config.term_match_type() != TermMatchType::UNKNOWN &&
-      config.tokenizer_type() == IndexingConfig::TokenizerType::NONE) {
+      config.tokenizer_type() == StringIndexingConfig::TokenizerType::NONE) {
     // They set a term match type, but no tokenizer type
     return absl_ports::InvalidArgumentError(
-        absl_ports::StrCat("Indexed property '", property_name,
+        absl_ports::StrCat("Indexed string property '", property_name,
                            "' cannot have a tokenizer type of NONE"));
   }
 
-  if (config.term_match_type() != TermMatchType::UNKNOWN &&
-      kIndexableDataTypes.find(data_type) == kIndexableDataTypes.end()) {
-    // They want this section indexed, but it's not an indexable data type.
-    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
-        "Cannot index non-string data type for schema property '", schema_type,
-        ".", property_name, "'"));
-  }
-
   return libtextclassifier3::Status::OK;
 }
 
@@ -293,7 +284,7 @@ SchemaUtil::ParsedPropertyConfigs SchemaUtil::ParsePropertyConfigs(
 
     // A non-default term_match_type indicates that this property is meant to be
     // indexed.
-    if (property_config.indexing_config().term_match_type() !=
+    if (property_config.string_indexing_config().term_match_type() !=
         TermMatchType::UNKNOWN) {
       parsed_property_configs.num_indexed_properties++;
     }
@@ -368,14 +359,15 @@ const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
 
       // A non-default term_match_type indicates that this property is meant to
       // be indexed.
-      if (old_property_config.indexing_config().term_match_type() !=
+      if (old_property_config.string_indexing_config().term_match_type() !=
           TermMatchType::UNKNOWN) {
         ++old_indexed_properties;
       }
 
       // Any change in the indexed property requires a reindexing
-      if (!IsTermMatchTypeCompatible(old_property_config.indexing_config(),
-                                     new_property_config->indexing_config())) {
+      if (!IsTermMatchTypeCompatible(
+              old_property_config.string_indexing_config(),
+              new_property_config->string_indexing_config())) {
         schema_delta.index_incompatible = true;
       }
     }
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index dfa3aa2..ccb2eea 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -80,7 +80,7 @@ class SchemaUtil {
   //   9. PropertyConfigProtos.schema_type's must correspond to a
   //      SchemaTypeConfigProto.schema_type
   //  10. Property names can only be alphanumeric.
-  //  11. Only STRING data types are indexed
+  //  11. Any STRING data types have a valid string_indexing_config
   //
   // Returns:
   //   ALREADY_EXISTS for case 1 and 2
@@ -137,6 +137,20 @@ class SchemaUtil {
   static const SchemaDelta ComputeCompatibilityDelta(
       const SchemaProto& old_schema, const SchemaProto& new_schema);
 
+  // Validates the 'property_name' field.
+  //   1. Can't be an empty string
+  //   2. Can only contain alphanumeric characters
+  //
+  // NOTE: schema_type is only used for logging. It is not necessary to populate
+  // it.
+  //
+  // RETURNS:
+  //   - OK if property_name is valid
+  //   - INVALID_ARGUMENT if property name is empty or contains an
+  //     non-alphabetic character.
+  static libtextclassifier3::Status ValidatePropertyName(
+      std::string_view property_name, std::string_view schema_type = "");
+
  private:
   // Validates the 'schema_type' field
   //
@@ -146,16 +160,6 @@ class SchemaUtil {
   static libtextclassifier3::Status ValidateSchemaType(
       std::string_view schema_type);
 
-  // Validates the 'property_name' field.
-  //   1. Can't be an empty string
-  //   2. Can only contain alphanumeric characters
-  //
-  // Returns:
-  //   INVALID_ARGUMENT if any of the rules are not followed
-  //   OK on success
-  static libtextclassifier3::Status ValidatePropertyName(
-      std::string_view property_name, std::string_view schema_type);
-
   // Validates the 'data_type' field.
   //
   // Returns:
@@ -174,15 +178,15 @@ class SchemaUtil {
       PropertyConfigProto::Cardinality::Code cardinality,
       std::string_view schema_type, std::string_view property_name);
 
-  // Checks that the 'indexing_config' satisfies the following rules:
+  // Checks that the 'string_indexing_config' satisfies the following rules:
   //   1. Only STRING data types can be indexed
   //   2. An indexed property must have a valid tokenizer type
   //
   // Returns:
   //   INVALID_ARGUMENT if any of the rules are not followed
   //   OK on success
-  static libtextclassifier3::Status ValidateIndexingConfig(
-      const IndexingConfig& config,
+  static libtextclassifier3::Status ValidateStringIndexingConfig(
+      const StringIndexingConfig& config,
       PropertyConfigProto::DataType::Code data_type,
       std::string_view schema_type, std::string_view property_name);
 };
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index 6012989..ed3bde7 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -486,17 +486,17 @@ TEST_F(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
   schema_delta.index_incompatible = true;
 
   // New schema gained a new indexed property.
-  old_property->mutable_indexing_config()->set_term_match_type(
+  old_property->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::UNKNOWN);
-  new_property->mutable_indexing_config()->set_term_match_type(
+  new_property->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
   EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
               Eq(schema_delta));
 
   // New schema lost an indexed property.
-  old_property->mutable_indexing_config()->set_term_match_type(
+  old_property->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
-  new_property->mutable_indexing_config()->set_term_match_type(
+  new_property->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::UNKNOWN);
   EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
               Eq(schema_delta));
@@ -527,7 +527,7 @@ TEST_F(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
   new_property->set_property_name("NewIndexedProperty");
   new_property->set_data_type(PropertyConfigProto::DataType::STRING);
   new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  new_property->mutable_indexing_config()->set_term_match_type(
+  new_property->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
 
   SchemaUtil::SchemaDelta schema_delta;
@@ -583,15 +583,15 @@ TEST_F(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
   prop->set_property_name("Foo");
   prop->set_data_type(PropertyConfigProto::DataType::STRING);
   prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  prop->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   // Error if we don't set a term match type
   EXPECT_THAT(SchemaUtil::Validate(schema),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
   // Passes once we set a term match type
-  prop->mutable_indexing_config()->set_term_match_type(
+  prop->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
   EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
 }
@@ -605,7 +605,7 @@ TEST_F(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
   prop->set_property_name("Foo");
   prop->set_data_type(PropertyConfigProto::DataType::STRING);
   prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  prop->mutable_indexing_config()->set_term_match_type(
+  prop->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
 
   // Error if we don't set a tokenizer type
@@ -613,190 +613,11 @@ TEST_F(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
   // Passes once we set a tokenizer type
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  prop->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
   EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
 }
 
-TEST_F(SchemaUtilTest, ValidateIntPropertyShouldntHaveIndexingConfig) {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("MyType");
-
-  auto* prop = type->add_properties();
-  prop->set_property_name("IntProperty");
-  prop->set_data_type(PropertyConfigProto::DataType::INT64);
-  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  // Passes if it doesn't have indexing config
-  EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
-
-  // Fails if we try to set an indexing_config.term_match_type
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  // Fails if we try to set an indexing_config.tokenizer_type
-  prop->mutable_indexing_config()->clear_term_match_type();
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  // Fails if we try to set an indexing config
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-TEST_F(SchemaUtilTest, ValidateDoublePropertyShouldntHaveIndexingConfig) {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("MyType");
-
-  auto* prop = type->add_properties();
-  prop->set_property_name("DoubleProperty");
-  prop->set_data_type(PropertyConfigProto::DataType::DOUBLE);
-  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  // Passes if it doesn't have indexing config
-  EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
-
-  // Fails if we try to set an indexing_config.term_match_type
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  // Fails if we try to set an indexing_config.tokenizer_type
-  prop->mutable_indexing_config()->clear_term_match_type();
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  // Fails if we try to set an indexing config
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-TEST_F(SchemaUtilTest, ValidateBooleanPropertyShouldntHaveIndexingConfig) {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("MyType");
-
-  auto* prop = type->add_properties();
-  prop->set_property_name("BooleanProperty");
-  prop->set_data_type(PropertyConfigProto::DataType::BOOLEAN);
-  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  // Passes if it doesn't have indexing config
-  EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
-
-  // Fails if we try to set an indexing_config.term_match_type
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  // Fails if we try to set an indexing_config.tokenizer_type
-  prop->mutable_indexing_config()->clear_term_match_type();
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  // Fails if we try to set an indexing config
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-TEST_F(SchemaUtilTest, ValidateBytesPropertyShouldntHaveIndexingConfig) {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("MyType");
-
-  auto* prop = type->add_properties();
-  prop->set_property_name("BytesProperty");
-  prop->set_data_type(PropertyConfigProto::DataType::BYTES);
-  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  // Passes if it doesn't have indexing config
-  EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
-
-  // Fails if we try to set an indexing_config.term_match_type
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  // Fails if we try to set an indexing_config.tokenizer_type
-  prop->mutable_indexing_config()->clear_term_match_type();
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  // Fails if we try to set an indexing config
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
-TEST_F(SchemaUtilTest, ValidateDocumentPropertyShouldntHaveIndexingConfig) {
-  SchemaProto schema;
-  auto* type = schema.add_types();
-  type->set_schema_type("OtherType");
-
-  type = schema.add_types();
-  type->set_schema_type("MyType");
-
-  auto* prop = type->add_properties();
-  prop->set_property_name("SubType");
-  prop->set_schema_type("OtherType");
-  prop->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  // Passes if it doesn't have indexing config
-  EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
-
-  // Fails if we try to set an indexing_config.term_match_type
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  // Fails if we try to set an indexing_config.tokenizer_type
-  prop->mutable_indexing_config()->clear_term_match_type();
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-
-  // Fails if we try to set an indexing config
-  prop->mutable_indexing_config()->set_term_match_type(
-      TermMatchType::EXACT_ONLY);
-  prop->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
-  EXPECT_THAT(SchemaUtil::Validate(schema),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
-}
-
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index 0eed2fe..73aa947 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc
@@ -119,24 +119,33 @@ libtextclassifier3::Status AssignSections(
   for (const auto& property_config : sorted_properties) {
     if (property_config.data_type() ==
         PropertyConfigProto::DataType::DOCUMENT) {
-      // Tries to find sections recursively
       auto nested_type_config_iter =
           type_config_map.find(property_config.schema_type());
       if (nested_type_config_iter == type_config_map.end()) {
+        // This should never happen because our schema should already be
+        // validated by this point.
         return absl_ports::NotFoundError(absl_ports::StrCat(
-            "type config not found: ", property_config.schema_type()));
+            "Type config not found: ", property_config.schema_type()));
+      }
+
+      if (property_config.document_indexing_config()
+              .index_nested_properties()) {
+        // Assign any indexed sections recursively
+        const SchemaTypeConfigProto& nested_type_config =
+            nested_type_config_iter->second;
+        ICING_RETURN_IF_ERROR(
+            AssignSections(nested_type_config,
+                           ConcatenatePath(current_section_path,
+                                           property_config.property_name()),
+                           type_config_map, visited_states, metadata_list));
       }
-      const SchemaTypeConfigProto& nested_type_config =
-          nested_type_config_iter->second;
-      ICING_RETURN_IF_ERROR(
-          AssignSections(nested_type_config,
-                         ConcatenatePath(current_section_path,
-                                         property_config.property_name()),
-                         type_config_map, visited_states, metadata_list));
     }
 
-    if (property_config.indexing_config().term_match_type() ==
-        TermMatchType::UNKNOWN) {
+    // Only index strings currently.
+    if (property_config.has_data_type() !=
+            PropertyConfigProto::DataType::STRING ||
+        property_config.string_indexing_config().term_match_type() ==
+            TermMatchType::UNKNOWN) {
       // No need to create section for current property
       continue;
     }
@@ -155,8 +164,9 @@ libtextclassifier3::Status AssignSections(
     }
     // Creates section metadata from property config
     metadata_list->emplace_back(
-        new_section_id, property_config.indexing_config().term_match_type(),
-        property_config.indexing_config().tokenizer_type(),
+        new_section_id,
+        property_config.string_indexing_config().term_match_type(),
+        property_config.string_indexing_config().tokenizer_type(),
         ConcatenatePath(current_section_path, property_config.property_name()));
   }
   return libtextclassifier3::Status::OK;
@@ -199,16 +209,6 @@ std::vector<std::string> GetPropertyContent(const PropertyProto& property) {
   if (!property.string_values().empty()) {
     std::copy(property.string_values().begin(), property.string_values().end(),
               std::back_inserter(values));
-  } else if (!property.int64_values().empty()) {
-    std::transform(
-        property.int64_values().begin(), property.int64_values().end(),
-        std::back_inserter(values),
-        [](int64_t i) { return IcingStringUtil::StringPrintf("%" PRId64, i); });
-  } else {
-    std::transform(
-        property.double_values().begin(), property.double_values().end(),
-        std::back_inserter(values),
-        [](double d) { return IcingStringUtil::StringPrintf("%f", d); });
   }
   return values;
 }
@@ -264,9 +264,8 @@ SectionManager::GetSectionContent(const DocumentProto& document,
     // Property name not found, it could be one of the following 2 cases:
     // 1. The property is optional and it's not in the document
     // 2. The property name is invalid
-    return absl_ports::NotFoundError(
-        absl_ports::StrCat("Section path ", section_path,
-                           " not found in type config ", document.schema()));
+    return absl_ports::NotFoundError(absl_ports::StrCat(
+        "Section path '", section_path, "' not found in document."));
   }
 
   if (separator_position == std::string::npos) {
@@ -275,9 +274,8 @@ SectionManager::GetSectionContent(const DocumentProto& document,
     if (content.empty()) {
       // The content of property is explicitly set to empty, we'll treat it as
       // NOT_FOUND because the index doesn't care about empty strings.
-      return absl_ports::NotFoundError(
-          absl_ports::StrCat("Section path ", section_path,
-                             " not found in type config ", document.schema()));
+      return absl_ports::NotFoundError(absl_ports::StrCat(
+          "Section path '", section_path, "' content was empty"));
     }
     return content;
   }
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index ad9d07d..1a4d324 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -20,6 +20,7 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/proto/schema.proto.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/term.pb.h"
 #include "icing/schema/schema-util.h"
@@ -29,9 +30,12 @@
 
 namespace icing {
 namespace lib {
+
 using ::testing::ElementsAre;
 using ::testing::Eq;
 using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
 
 // type and property names of EmailMessage
 constexpr char kTypeEmail[] = "EmailMessage";
@@ -93,16 +97,16 @@ class SectionManagerTest : public ::testing::Test {
     subject->set_property_name(kPropertySubject);
     subject->set_data_type(PropertyConfigProto::DataType::STRING);
     subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    subject->mutable_indexing_config()->set_term_match_type(
+    subject->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
-    subject->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    subject->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
 
     auto text = type.add_properties();
     text->set_property_name(kPropertyText);
     text->set_data_type(PropertyConfigProto::DataType::STRING);
     text->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    text->mutable_indexing_config()->set_term_match_type(
+    text->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::UNKNOWN);
 
     auto attachment = type.add_properties();
@@ -114,10 +118,10 @@ class SectionManagerTest : public ::testing::Test {
     recipients->set_property_name(kPropertyRecipients);
     recipients->set_data_type(PropertyConfigProto::DataType::STRING);
     recipients->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-    recipients->mutable_indexing_config()->set_term_match_type(
+    recipients->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
-    recipients->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    recipients->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
 
     return type;
   }
@@ -130,7 +134,7 @@ class SectionManagerTest : public ::testing::Test {
     name->set_property_name(kPropertyName);
     name->set_data_type(PropertyConfigProto::DataType::STRING);
     name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    name->mutable_indexing_config()->set_term_match_type(
+    name->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
 
     auto emails = type.add_properties();
@@ -138,6 +142,8 @@ class SectionManagerTest : public ::testing::Test {
     emails->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
     emails->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
     emails->set_schema_type(kTypeEmail);
+    emails->mutable_document_indexing_config()->set_index_nested_properties(
+        true);
 
     return type;
   }
@@ -166,6 +172,8 @@ TEST_F(SectionManagerTest, CreationWithSchemaInfiniteLoopShouldFail) {
   property1->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
   property1->set_schema_type("type2");  // Here we reference type2
   property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  property1->mutable_document_indexing_config()->set_index_nested_properties(
+      true);
 
   SchemaTypeConfigProto type_config2;
   type_config2.set_schema_type("type2");
@@ -175,6 +183,8 @@ TEST_F(SectionManagerTest, CreationWithSchemaInfiniteLoopShouldFail) {
   // Here we reference type1, which references type2 causing the infinite loop
   property2->set_schema_type("type1");
   property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  property2->mutable_document_indexing_config()->set_index_nested_properties(
+      true);
 
   SchemaUtil::TypeConfigMap type_config_map;
   type_config_map.emplace("type1", type_config1);
@@ -194,11 +204,13 @@ TEST_F(SectionManagerTest, CreationWithSchemaSelfReferenceShouldFail) {
   property1->set_property_name("property1");
   property1->set_data_type(PropertyConfigProto::DataType::STRING);
   property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-  property1->mutable_indexing_config()->set_term_match_type(
+  property1->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
   auto property2 = type_config.add_properties();
   property2->set_property_name("property2");
   property2->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  property2->mutable_document_indexing_config()->set_index_nested_properties(
+      true);
   // Here we're referencing our own type, causing an infinite loop
   property2->set_schema_type("type");
   property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
@@ -222,7 +234,7 @@ TEST_F(SectionManagerTest, CreationWithTooManyPropertiesShouldFail) {
     property->set_property_name("property" + std::to_string(i));
     property->set_data_type(PropertyConfigProto::DataType::STRING);
     property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-    property->mutable_indexing_config()->set_term_match_type(
+    property->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
   }
 
@@ -235,24 +247,6 @@ TEST_F(SectionManagerTest, CreationWithTooManyPropertiesShouldFail) {
                HasSubstr("Too many properties")));
 }
 
-TEST_F(SectionManagerTest, CreationWithUnknownSchemaTypeNameShouldFail) {
-  SchemaTypeConfigProto type_config;
-  type_config.set_schema_type("type");
-  auto property = type_config.add_properties();
-  property->set_property_name("property");
-  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
-  property->set_schema_type("unknown_name");
-  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
-  SchemaUtil::TypeConfigMap type_config_map;
-  type_config_map.emplace("type", type_config);
-
-  EXPECT_THAT(
-      SectionManager::Create(type_config_map, schema_type_mapper_.get()),
-      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
-               HasSubstr("type config not found")));
-}
-
 TEST_F(SectionManagerTest, GetSectionContent) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto section_manager,
@@ -393,5 +387,261 @@ TEST_F(SectionManagerTest, ExtractSections) {
   EXPECT_THAT(sections[1].content, ElementsAre("the subject", "the subject"));
 }
 
+TEST_F(SectionManagerTest,
+       NonStringFieldsWithStringIndexingConfigDontCreateSections) {
+  // Create a schema for an empty document.
+  SchemaTypeConfigProto empty_type;
+  empty_type.set_schema_type("EmptySchema");
+
+  // Create a schema with all the non-string fields
+  SchemaTypeConfigProto type_with_non_string_properties;
+  type_with_non_string_properties.set_schema_type("Schema");
+
+  // Create an int property with a string_indexing_config
+  auto int_property = type_with_non_string_properties.add_properties();
+  int_property->set_property_name("int");
+  int_property->set_data_type(PropertyConfigProto::DataType::INT64);
+  int_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  int_property->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  int_property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+
+  // Create a double property with a string_indexing_config
+  auto double_property = type_with_non_string_properties.add_properties();
+  double_property->set_property_name("double");
+  double_property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
+  double_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  double_property->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  double_property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+
+  // Create a boolean property with a string_indexing_config
+  auto boolean_property = type_with_non_string_properties.add_properties();
+  boolean_property->set_property_name("boolean");
+  boolean_property->set_data_type(PropertyConfigProto::DataType::BOOLEAN);
+  boolean_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  boolean_property->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  boolean_property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+
+  // Create a bytes property with a string_indexing_config
+  auto bytes_property = type_with_non_string_properties.add_properties();
+  bytes_property->set_property_name("bytes");
+  bytes_property->set_data_type(PropertyConfigProto::DataType::BYTES);
+  bytes_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  bytes_property->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  bytes_property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+
+  // Create a document property with a string_indexing_config
+  auto document_property = type_with_non_string_properties.add_properties();
+  document_property->set_property_name("document");
+  document_property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  document_property->set_schema_type(empty_type.schema_type());
+  document_property->set_cardinality(
+      PropertyConfigProto::Cardinality::REQUIRED);
+  document_property->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  document_property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+
+  // Setup classes to create the section manager
+  SchemaUtil::TypeConfigMap type_config_map;
+  type_config_map.emplace(type_with_non_string_properties.schema_type(),
+                          type_with_non_string_properties);
+  type_config_map.emplace(empty_type.schema_type(), empty_type);
+
+  // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so
+  // the total KeyMapper should get 384KiB
+  int key_mapper_size = 3 * 128 * 1024;
+  std::string dir = GetTestTempDir() + "/non_string_fields";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      KeyMapper<SchemaTypeId>::Create(filesystem_, dir, key_mapper_size));
+  ICING_ASSERT_OK(schema_type_mapper->Put(
+      type_with_non_string_properties.schema_type(), /*schema_type_id=*/0));
+  ICING_ASSERT_OK(schema_type_mapper->Put(empty_type.schema_type(),
+                                          /*schema_type_id=*/1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto section_manager,
+      SectionManager::Create(type_config_map, schema_type_mapper.get()));
+
+  // Create an empty document to be nested
+  DocumentProto empty_document = DocumentBuilder()
+                                     .SetKey("icing", "uri1")
+                                     .SetSchema(empty_type.schema_type())
+                                     .Build();
+
+  // Create a document that follows "Schema"
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "uri2")
+          .SetSchema(type_with_non_string_properties.schema_type())
+          .AddInt64Property("int", 1)
+          .AddDoubleProperty("double", 0.2)
+          .AddBooleanProperty("boolean", true)
+          .AddBytesProperty("bytes", "attachment bytes")
+          .AddDocumentProperty("document", empty_document)
+          .Build();
+
+  // Extracts sections from 'Schema' document
+  ICING_ASSERT_OK_AND_ASSIGN(auto sections,
+                             section_manager->ExtractSections(document));
+  EXPECT_THAT(sections.size(), Eq(0));
+}
+
+TEST_F(SectionManagerTest, AssignSectionsRecursivelyForDocumentFields) {
+  // Create the inner schema that the document property is.
+  SchemaTypeConfigProto document_type;
+  document_type.set_schema_type("DocumentSchema");
+
+  auto string_property = document_type.add_properties();
+  string_property->set_property_name("string");
+  string_property->set_data_type(PropertyConfigProto::DataType::STRING);
+  string_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  string_property->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  string_property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+
+  // Create the outer schema which has the document property.
+  SchemaTypeConfigProto type;
+  type.set_schema_type("Schema");
+
+  auto document_property = type.add_properties();
+  document_property->set_property_name("document");
+  document_property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  document_property->set_schema_type(document_type.schema_type());
+  document_property->set_cardinality(
+      PropertyConfigProto::Cardinality::REQUIRED);
+
+  // Opt into recursing into the document fields.
+  document_property->mutable_document_indexing_config()
+      ->set_index_nested_properties(true);
+
+  // Create the inner document.
+  DocumentProto inner_document = DocumentBuilder()
+                                     .SetKey("icing", "uri1")
+                                     .SetSchema(document_type.schema_type())
+                                     .AddStringProperty("string", "foo")
+                                     .Build();
+
+  // Create the outer document that holds the inner document
+  DocumentProto outer_document =
+      DocumentBuilder()
+          .SetKey("icing", "uri2")
+          .SetSchema(type.schema_type())
+          .AddDocumentProperty("document", inner_document)
+          .Build();
+
+  // Setup classes to create the section manager
+  SchemaUtil::TypeConfigMap type_config_map;
+  type_config_map.emplace(type.schema_type(), type);
+  type_config_map.emplace(document_type.schema_type(), document_type);
+
+  // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so
+  // the total KeyMapper should get 384KiB
+  int key_mapper_size = 3 * 128 * 1024;
+  std::string dir = GetTestTempDir() + "/recurse_into_document";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      KeyMapper<SchemaTypeId>::Create(filesystem_, dir, key_mapper_size));
+  int type_schema_type_id = 0;
+  int document_type_schema_type_id = 1;
+  ICING_ASSERT_OK(
+      schema_type_mapper->Put(type.schema_type(), type_schema_type_id));
+  ICING_ASSERT_OK(schema_type_mapper->Put(document_type.schema_type(),
+                                          document_type_schema_type_id));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto section_manager,
+      SectionManager::Create(type_config_map, schema_type_mapper.get()));
+
+  // Extracts sections from 'Schema' document; there should be the 1 string
+  // property inside the document.
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Section> sections,
+                             section_manager->ExtractSections(outer_document));
+  EXPECT_THAT(sections, SizeIs(1));
+}
+
+TEST_F(SectionManagerTest, DontAssignSectionsRecursivelyForDocumentFields) {
+  // Create the inner schema that the document property is.
+  SchemaTypeConfigProto document_type;
+  document_type.set_schema_type("DocumentSchema");
+
+  auto string_property = document_type.add_properties();
+  string_property->set_property_name("string");
+  string_property->set_data_type(PropertyConfigProto::DataType::STRING);
+  string_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  string_property->mutable_string_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  string_property->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
+
+  // Create the outer schema which has the document property.
+  SchemaTypeConfigProto type;
+  type.set_schema_type("Schema");
+
+  auto document_property = type.add_properties();
+  document_property->set_property_name("document");
+  document_property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  document_property->set_schema_type(document_type.schema_type());
+  document_property->set_cardinality(
+      PropertyConfigProto::Cardinality::REQUIRED);
+
+  // Opt into recursing into the document fields.
+  document_property->mutable_document_indexing_config()
+      ->set_index_nested_properties(false);
+
+  // Create the inner document.
+  DocumentProto inner_document = DocumentBuilder()
+                                     .SetKey("icing", "uri1")
+                                     .SetSchema(document_type.schema_type())
+                                     .AddStringProperty("string", "foo")
+                                     .Build();
+
+  // Create the outer document that holds the inner document
+  DocumentProto outer_document =
+      DocumentBuilder()
+          .SetKey("icing", "uri2")
+          .SetSchema(type.schema_type())
+          .AddDocumentProperty("document", inner_document)
+          .Build();
+
+  // Setup classes to create the section manager
+  SchemaUtil::TypeConfigMap type_config_map;
+  type_config_map.emplace(type.schema_type(), type);
+  type_config_map.emplace(document_type.schema_type(), document_type);
+
+  // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so
+  // the total KeyMapper should get 384KiB
+  int key_mapper_size = 3 * 128 * 1024;
+  std::string dir = GetTestTempDir() + "/recurse_into_document";
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper,
+      KeyMapper<SchemaTypeId>::Create(filesystem_, dir, key_mapper_size));
+  int type_schema_type_id = 0;
+  int document_type_schema_type_id = 1;
+  ICING_ASSERT_OK(
+      schema_type_mapper->Put(type.schema_type(), type_schema_type_id));
+  ICING_ASSERT_OK(schema_type_mapper->Put(document_type.schema_type(),
+                                          document_type_schema_type_id));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto section_manager,
+      SectionManager::Create(type_config_map, schema_type_mapper.get()));
+
+  // Extracts sections from 'Schema' document; there won't be any since we
+  // didn't recurse into the document to see the inner string property
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Section> sections,
+                             section_manager->ExtractSections(outer_document));
+  EXPECT_THAT(sections, IsEmpty());
+}
+
 }  // namespace lib
 }  // namespace icing
diff --git a/icing/schema/section.h b/icing/schema/section.h
index daf4fd0..7669c97 100644
--- a/icing/schema/section.h
+++ b/icing/schema/section.h
@@ -54,9 +54,9 @@ struct SectionMetadata {
   // A unique id of property within a type config
   SectionId id;
 
-  // How content in this section should be tokenized. It is invalid for a
-  // section to have tokenizer == 'NONE'.
-  IndexingConfig::TokenizerType::Code tokenizer;
+  // How strings should be tokenized. It is invalid for a section to have
+  // tokenizer == 'NONE'.
+  StringIndexingConfig::TokenizerType::Code tokenizer;
 
   // How tokens in this section should be matched.
   //
@@ -71,7 +71,7 @@ struct SectionMetadata {
   TermMatchType::Code term_match_type = TermMatchType::UNKNOWN;
 
   SectionMetadata(SectionId id_in, TermMatchType::Code term_match_type_in,
-                  IndexingConfig::TokenizerType::Code tokenizer,
+                  StringIndexingConfig::TokenizerType::Code tokenizer,
                   std::string&& path_in)
       : path(std::move(path_in)),
         id(id_in),
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 1e47d59..8ddde14 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -35,6 +35,7 @@
 #include "icing/legacy/core/icing-string-util.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/schema/schema-store.h"
 #include "icing/store/document-associated-score-data.h"
 #include "icing/store/document-filter-data.h"
@@ -45,6 +46,7 @@
 #include "icing/util/crc32.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
+#include "icing/util/timer.h"
 
 namespace icing {
 namespace lib {
@@ -185,8 +187,8 @@ DocumentStore::DocumentStore(const Filesystem* filesystem,
       document_validator_(schema_store) {}
 
 libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
-    const DocumentProto& document) {
-  return Put(DocumentProto(document));
+    const DocumentProto& document, NativePutDocumentStats* put_document_stats) {
+  return Put(DocumentProto(document), put_document_stats);
 }
 
 DocumentStore::~DocumentStore() {
@@ -200,18 +202,20 @@ DocumentStore::~DocumentStore() {
 
 libtextclassifier3::StatusOr<std::unique_ptr<DocumentStore>>
 DocumentStore::Create(const Filesystem* filesystem, const std::string& base_dir,
-                      const Clock* clock, const SchemaStore* schema_store) {
+                      const Clock* clock, const SchemaStore* schema_store,
+                      NativeInitializeStats* initialize_stats) {
   ICING_RETURN_ERROR_IF_NULL(filesystem);
   ICING_RETURN_ERROR_IF_NULL(clock);
   ICING_RETURN_ERROR_IF_NULL(schema_store);
 
   auto document_store = std::unique_ptr<DocumentStore>(
       new DocumentStore(filesystem, base_dir, clock, schema_store));
-  ICING_RETURN_IF_ERROR(document_store->Initialize());
+  ICING_RETURN_IF_ERROR(document_store->Initialize(initialize_stats));
   return document_store;
 }
 
-libtextclassifier3::Status DocumentStore::Initialize() {
+libtextclassifier3::Status DocumentStore::Initialize(
+    NativeInitializeStats* initialize_stats) {
   auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
       filesystem_, MakeDocumentLogFilename(base_dir_),
       FileBackedProtoLog<DocumentWrapper>::Options(
@@ -227,10 +231,30 @@ libtextclassifier3::Status DocumentStore::Initialize() {
       std::move(create_result_or).ValueOrDie();
   document_log_ = std::move(create_result.proto_log);
 
-  if (create_result.data_loss) {
+  if (create_result.has_data_loss()) {
     ICING_LOG(WARNING)
         << "Data loss in document log, regenerating derived files.";
+    if (initialize_stats != nullptr) {
+      initialize_stats->set_document_store_recovery_cause(
+          NativeInitializeStats::DATA_LOSS);
+
+      if (create_result.data_status ==
+          FileBackedProtoLog<DocumentWrapper>::CreateResult::PARTIAL_LOSS) {
+        // Ground truth is partially lost.
+        initialize_stats->set_document_store_data_status(
+            NativeInitializeStats::PARTIAL_LOSS);
+      } else {
+        // Ground truth is completely lost.
+        initialize_stats->set_document_store_data_status(
+            NativeInitializeStats::COMPLETE_LOSS);
+      }
+    }
+    Timer document_recovery_timer;
     libtextclassifier3::Status status = RegenerateDerivedFiles();
+    if (initialize_stats != nullptr) {
+      initialize_stats->set_document_store_recovery_latency_ms(
+          document_recovery_timer.GetElapsedMilliseconds());
+    }
     if (!status.ok()) {
       ICING_LOG(ERROR)
           << "Failed to regenerate derived files for DocumentStore";
@@ -241,7 +265,16 @@ libtextclassifier3::Status DocumentStore::Initialize() {
       ICING_VLOG(1)
           << "Couldn't find derived files or failed to initialize them, "
              "regenerating derived files for DocumentStore.";
+      if (initialize_stats != nullptr) {
+        initialize_stats->set_document_store_recovery_cause(
+            NativeInitializeStats::IO_ERROR);
+      }
+      Timer document_recovery_timer;
       libtextclassifier3::Status status = RegenerateDerivedFiles();
+      if (initialize_stats != nullptr) {
+        initialize_stats->set_document_store_recovery_latency_ms(
+            document_recovery_timer.GetElapsedMilliseconds());
+      }
       if (!status.ok()) {
         ICING_LOG(ERROR)
             << "Failed to regenerate derived files for DocumentStore";
@@ -251,6 +284,9 @@ libtextclassifier3::Status DocumentStore::Initialize() {
   }
 
   initialized_ = true;
+  if (initialize_stats != nullptr) {
+    initialize_stats->set_num_documents(document_id_mapper_->num_elements());
+  }
 
   return libtextclassifier3::Status::OK;
 }
@@ -689,9 +725,14 @@ libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) {
 }
 
 libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
-    DocumentProto&& document) {
+    DocumentProto&& document, NativePutDocumentStats* put_document_stats) {
+  Timer put_timer;
   ICING_RETURN_IF_ERROR(document_validator_.Validate(document));
 
+  if (put_document_stats != nullptr) {
+    put_document_stats->set_document_size(document.ByteSizeLong());
+  }
+
   // Copy fields needed before they are moved
   std::string name_space = document.namespace_();
   std::string uri = document.uri();
@@ -765,6 +806,11 @@ libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
     }
   }
 
+  if (put_document_stats != nullptr) {
+    put_document_stats->set_document_store_latency_ms(
+        put_timer.GetElapsedMilliseconds());
+  }
+
   return new_document_id;
 }
 
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 5c1b902..d6ffbaa 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -28,6 +28,7 @@
 #include "icing/file/filesystem.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/document_wrapper.pb.h"
+#include "icing/proto/logging.pb.h"
 #include "icing/schema/schema-store.h"
 #include "icing/store/document-associated-score-data.h"
 #include "icing/store/document-filter-data.h"
@@ -81,8 +82,11 @@ class DocumentStore {
   // previously initialized with this directory, it will reload the files saved
   // by the last instance.
   //
-  // Does not take any ownership, and all pointers must refer to valid objects
-  // that outlive the one constructed.
+  // If initialize_stats is present, the fields related to DocumentStore will be
+  // populated.
+  //
+  // Does not take any ownership, and all pointers except initialize_stats must
+  // refer to valid objects that outlive the one constructed.
   //
   // TODO(cassiewang): Consider returning a status indicating that derived files
   // were regenerated. This may be helpful in logs.
@@ -93,29 +97,41 @@ class DocumentStore {
   //   INTERNAL_ERROR on IO error
   static libtextclassifier3::StatusOr<std::unique_ptr<DocumentStore>> Create(
       const Filesystem* filesystem, const std::string& base_dir,
-      const Clock* clock, const SchemaStore* schema_store);
+      const Clock* clock, const SchemaStore* schema_store,
+      NativeInitializeStats* initialize_stats = nullptr);
 
   // Returns the maximum DocumentId that the DocumentStore has assigned. If
   // there has not been any DocumentIds assigned, i.e. the DocumentStore is
   // empty, then kInvalidDocumentId is returned. This does not filter out
-  // DocumentIds of deleted documents.
-  const DocumentId last_added_document_id() const {
+  // DocumentIds of deleted or expired documents.
+  DocumentId last_added_document_id() const {
     if (document_id_mapper_->num_elements() == 0) {
       return kInvalidDocumentId;
     }
     return document_id_mapper_->num_elements() - 1;
   }
 
+  // Returns the number of documents. The result does not filter out DocumentIds
+  // of deleted or expired documents.
+  int num_documents() const { return document_id_mapper_->num_elements(); }
+
   // Puts the document into document store.
   //
+  // If put_document_stats is present, the fields related to DocumentStore will
+  // be populated.
+  //
   // Returns:
   //   A newly generated document id on success
   //   FAILED_PRECONDITION if schema hasn't been set yet
   //   NOT_FOUND if the schema_type or a property config of the document doesn't
   //     exist in schema
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<DocumentId> Put(const DocumentProto& document);
-  libtextclassifier3::StatusOr<DocumentId> Put(DocumentProto&& document);
+  libtextclassifier3::StatusOr<DocumentId> Put(
+      const DocumentProto& document,
+      NativePutDocumentStats* put_document_stats = nullptr);
+  libtextclassifier3::StatusOr<DocumentId> Put(
+      DocumentProto&& document,
+      NativePutDocumentStats* put_document_stats = nullptr);
 
   // Finds and returns the document identified by the given key (namespace +
   // uri)
@@ -422,7 +438,8 @@ class DocumentStore {
   // worry about this field.
   bool initialized_ = false;
 
-  libtextclassifier3::Status Initialize();
+  libtextclassifier3::Status Initialize(
+      NativeInitializeStats* initialize_stats);
 
   // Creates sub-components and verifies the integrity of each sub-component.
   //
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 301dbdd..d97ec46 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -111,19 +111,19 @@ class DocumentStoreTest : public ::testing::Test {
     subject->set_property_name("subject");
     subject->set_data_type(PropertyConfigProto::DataType::STRING);
     subject->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    subject->mutable_indexing_config()->set_term_match_type(
+    subject->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
-    subject->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    subject->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
 
     auto body = type_config->add_properties();
     body->set_property_name("body");
     body->set_data_type(PropertyConfigProto::DataType::STRING);
     body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    body->mutable_indexing_config()->set_term_match_type(
+    body->mutable_string_indexing_config()->set_term_match_type(
         TermMatchType::EXACT_ONLY);
-    body->mutable_indexing_config()->set_tokenizer_type(
-        IndexingConfig::TokenizerType::PLAIN);
+    body->mutable_string_indexing_config()->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
 
     ICING_ASSERT_OK_AND_ASSIGN(
         schema_store_, SchemaStore::Create(&filesystem_, schema_store_dir_));
@@ -1941,10 +1941,10 @@ TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
   property_config->set_property_name("subject");
   property_config->set_data_type(PropertyConfigProto::DataType::STRING);
   property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property_config->mutable_indexing_config()->set_term_match_type(
+  property_config->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
-  property_config->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  property_config->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
@@ -2168,10 +2168,10 @@ TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
   property_config->set_property_name("subject");
   property_config->set_data_type(PropertyConfigProto::DataType::STRING);
   property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-  property_config->mutable_indexing_config()->set_term_match_type(
+  property_config->mutable_string_indexing_config()->set_term_match_type(
       TermMatchType::EXACT_ONLY);
-  property_config->mutable_indexing_config()->set_tokenizer_type(
-      IndexingConfig::TokenizerType::PLAIN);
+  property_config->mutable_string_indexing_config()->set_tokenizer_type(
+      StringIndexingConfig::TokenizerType::PLAIN);
 
   ICING_ASSERT_OK_AND_ASSIGN(
       std::unique_ptr<SchemaStore> schema_store,
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index 7e14d0a..31d41fc 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -32,10 +32,11 @@ namespace lib {
 
 // Used to match Token(Token::Type type, std::string_view text)
 MATCHER_P2(EqualsToken, type, text, "") {
+  std::string arg_string(arg.text.data(), arg.text.length());
   if (arg.type != type || arg.text != text) {
     *result_listener << IcingStringUtil::StringPrintf(
         "(Expected: type=%d, text=\"%s\". Actual: type=%d, text=\"%s\")", type,
-        &text[0], arg.type, arg.text.data());
+        text, arg.type, arg_string.c_str());
     return false;
   }
   return true;
diff --git a/icing/testing/schema-generator.h b/icing/testing/schema-generator.h
index e733612..863f43f 100644
--- a/icing/testing/schema-generator.h
+++ b/icing/testing/schema-generator.h
@@ -31,9 +31,11 @@ class ExactStringPropertyGenerator {
     prop.set_property_name(name.data(), name.length());
     prop.set_data_type(PropertyConfigProto::DataType::STRING);
     prop.set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-    IndexingConfig* indexing_config = prop.mutable_indexing_config();
-    indexing_config->set_term_match_type(TermMatchType::EXACT_ONLY);
-    indexing_config->set_tokenizer_type(IndexingConfig::TokenizerType::PLAIN);
+    StringIndexingConfig* string_indexing_config =
+        prop.mutable_string_indexing_config();
+    string_indexing_config->set_term_match_type(TermMatchType::EXACT_ONLY);
+    string_indexing_config->set_tokenizer_type(
+        StringIndexingConfig::TokenizerType::PLAIN);
     return prop;
   }
 };
diff --git a/icing/tokenization/icu/icu-language-segmenter.cc b/icing/tokenization/icu/icu-language-segmenter.cc
index d43a78d..74d22cd 100644
--- a/icing/tokenization/icu/icu-language-segmenter.cc
+++ b/icing/tokenization/icu/icu-language-segmenter.cc
@@ -169,7 +169,7 @@ class IcuLanguageSegmenterIterator : public LanguageSegmenter::Iterator {
   // Returns true on success
   bool Initialize() {
     UErrorCode status = U_ZERO_ERROR;
-    utext_openUTF8(&u_text_, text_.data(), /*length=*/-1, &status);
+    utext_openUTF8(&u_text_, text_.data(), text_.length(), &status);
     break_iterator_ = ubrk_open(UBRK_WORD, locale_.data(), /*text=*/nullptr,
                                 /*textLength=*/0, &status);
     ubrk_setUText(break_iterator_, &u_text_, &status);
diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc
index d9db75a..df0981b 100644
--- a/icing/tokenization/plain-tokenizer_test.cc
+++ b/icing/tokenization/plain-tokenizer_test.cc
@@ -43,10 +43,10 @@ class PlainTokenizerTest : public ::testing::Test {
 };
 
 TEST_F(PlainTokenizerTest, CreationWithNullPointerShouldFail) {
-  EXPECT_THAT(
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, /*lang_segmenter=*/nullptr),
-      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  EXPECT_THAT(tokenizer_factory::CreateIndexingTokenizer(
+                  StringIndexingConfig::TokenizerType::PLAIN,
+                  /*lang_segmenter=*/nullptr),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
 
 TEST_F(PlainTokenizerTest, Simple) {
@@ -54,10 +54,10 @@ TEST_F(PlainTokenizerTest, Simple) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   EXPECT_THAT(plain_tokenizer->TokenizeAll(""), IsOkAndHolds(IsEmpty()));
 
@@ -88,10 +88,10 @@ TEST_F(PlainTokenizerTest, Whitespace) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   // There're many unicode characters that are whitespaces, here we choose tabs
   // to represent others.
@@ -116,10 +116,10 @@ TEST_F(PlainTokenizerTest, Punctuation) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   // Half-width punctuation marks are filtered out.
   EXPECT_THAT(plain_tokenizer->TokenizeAll(
@@ -147,10 +147,10 @@ TEST_F(PlainTokenizerTest, SpecialCharacters) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   // Right now we don't have special logic for these characters, just output
   // them as tokens.
@@ -170,10 +170,10 @@ TEST_F(PlainTokenizerTest, CJKT) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   // In plain tokenizer, CJKT characters are handled the same way as non-CJKT
   // characters, just add these tests as sanity checks.
@@ -224,10 +224,10 @@ TEST_F(PlainTokenizerTest, ResetToTokenAfterSimple) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   constexpr std::string_view kText = "f b";
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
@@ -243,10 +243,10 @@ TEST_F(PlainTokenizerTest, ResetToTokenBeforeSimple) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   constexpr std::string_view kText = "f b";
   auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
@@ -262,10 +262,10 @@ TEST_F(PlainTokenizerTest, ResetToTokenAfter) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   constexpr std::string_view kText = " foo . bar baz.. bat ";
   EXPECT_THAT(plain_tokenizer->TokenizeAll(kText),
@@ -312,10 +312,10 @@ TEST_F(PlainTokenizerTest, ResetToTokenBefore) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto language_segmenter,
       language_segmenter_factory::Create(std::move(options)));
-  ICING_ASSERT_OK_AND_ASSIGN(
-      std::unique_ptr<Tokenizer> plain_tokenizer,
-      tokenizer_factory::CreateIndexingTokenizer(
-          IndexingConfig::TokenizerType::PLAIN, language_segmenter.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Tokenizer> plain_tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 StringIndexingConfig::TokenizerType::PLAIN,
+                                 language_segmenter.get()));
 
   constexpr std::string_view kText = " foo . bar baz.. bat ";
   EXPECT_THAT(plain_tokenizer->TokenizeAll(kText),
diff --git a/icing/tokenization/raw-query-tokenizer.cc b/icing/tokenization/raw-query-tokenizer.cc
index 8b2edc9..50b25c5 100644
--- a/icing/tokenization/raw-query-tokenizer.cc
+++ b/icing/tokenization/raw-query-tokenizer.cc
@@ -26,6 +26,9 @@
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/tokenization/token.h"
 #include "icing/tokenization/tokenizer.h"
@@ -70,7 +73,7 @@ constexpr char kColon = ':';
 constexpr char kLeftParentheses = '(';
 constexpr char kRightParentheses = ')';
 constexpr char kExclusion = '-';
-constexpr char kOrOperator[] = "OR";
+constexpr std::string_view kOrOperator = "OR";
 
 enum State {
   // Ready to process any terms
@@ -100,10 +103,14 @@ enum State {
   // When seeing right parentheses
   CLOSING_PARENTHESES = 8,
 
+  PROCESSING_NON_ASCII_ALPHABETIC_TERM = 9,
+
+  PROCESSING_PROPERTY_TERM_APPENDING = 10,
+
   // Valid state count
-  STATE_COUNT = 9,
+  STATE_COUNT = 11,
 
-  INVALID = 10
+  INVALID = 12
 };
 
 enum TermType {
@@ -111,27 +118,29 @@ enum TermType {
   WHITESPACE = 0,
 
   // A term that consists of unicode alphabetic and numeric characters
-  ALPHANUMERIC_TERM = 1,
+  ASCII_ALPHANUMERIC_TERM = 1,
+
+  NON_ASCII_ALPHABETIC_TERM = 2,
 
   // "("
-  LEFT_PARENTHESES = 2,
+  LEFT_PARENTHESES = 3,
 
   // ")"
-  RIGHT_PARENTHESES = 3,
+  RIGHT_PARENTHESES = 4,
 
   // "-"
-  EXCLUSION_OPERATOR = 4,
+  EXCLUSION_OPERATOR = 5,
 
   // "OR"
-  OR_OPERATOR = 5,
+  OR_OPERATOR = 6,
 
   // ":"
-  COLON = 6,
+  COLON = 7,
 
   // All the other characters seen that are not the types above
-  OTHER = 7,
+  OTHER = 8,
 
-  TYPE_COUNT = 8
+  TYPE_COUNT = 9
 };
 
 enum ActionOrError {
@@ -145,6 +154,9 @@ enum ActionOrError {
   // Ignore / throw away the current term
   IGNORE = 2,
 
+  // Concatenate with next term
+  CONCATENATE = 3,
+
   // Errors
   ERROR_UNKNOWN = 100,
   ERROR_NO_WHITESPACE_AROUND_OR = 101,
@@ -154,6 +166,7 @@ enum ActionOrError {
   ERROR_EXCLUSION_PROPERTY_TOGETHER = 105,
   ERROR_EXCLUSION_OR_TOGETHER = 106,
   ERROR_PROPERTY_OR_TOGETHER = 107,
+  ERROR_NON_ASCII_AS_PROPERTY_NAME = 108,
 };
 
 std::string_view GetErrorMessage(ActionOrError maybe_error) {
@@ -175,6 +188,8 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
       return "Exclusion and OR operators can't be used together";
     case ERROR_PROPERTY_OR_TOGETHER:
       return "Property restriction and OR operators can't be used together";
+    case ERROR_NON_ASCII_AS_PROPERTY_NAME:
+      return "Characters in property name must all be ASCII.";
     default:
       return "";
   }
@@ -186,7 +201,7 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
 // States:
 //
 // READY = 0
-// PROCESSING_ALPHANUMERIC_TERM = 1
+// PROCESSING_ASCII_ALPHANUMERIC_TERM = 1
 // PROCESSING_EXCLUSION = 2
 // PROCESSING_EXCLUSION_TERM = 3
 // PROCESSING_PROPERTY_RESTRICT = 4
@@ -194,24 +209,28 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
 // PROCESSING_OR = 6
 // OPENING_PARENTHESES = 7
 // CLOSING_PARENTHESES = 8
+// PROCESSING_NON_ASCII_ALPHABETIC_TERM = 9
+// PROCESSING_PROPERTY_TERM_APPENDING = 10
 //
 // Actions:
 //
 // OUTPUT = a
 // KEEP = b
 // IGNORE = c
+// CONCAT = d, concatenate the current term and the new term.
 //
-//                    ========================================================
-//   Transition Table ||  0  |  1  |  2  |  3  |  4  |  5  |  6  |  7  |  8  |
-// ===========================================================================
-//         WHITESPACE || 0,c | 0,a | 0,c | 0,a | 0,a | 0,a | 0,a | 0,a | 0,a |
-//  ALPHANUMERIC_TERM || 1,c | 1,a | 3,a | 1,a | 5,a | 1,a |ERROR| 1,a | 1,a |
-//   LEFT_PARENTHESES || 7,c | 7,a |ERROR| 7,a |ERROR| 7,a | 7,a | 7,a | 7,a |
-//  RIGHT_PARENTHESES || 8,c | 8,a | 8,c | 8,a | 8,a | 8,a | 8,c | 8,a | 8,a |
-// EXCLUSION_OPERATOR || 2,c | 0,a | 2,c | 0,a |ERROR| 0,a |ERROR| 2,a | 2,a |
-//        OR_OPERATOR || 6,c |ERROR|ERROR|ERROR|ERROR|ERROR|ERROR| 7,b | 6,a |
-//              COLON || 0,c | 4,b |ERROR|ERROR| 4,b | 0,a |ERROR| 0,a |ERROR|
-//              OTHER || 0,c | 0,a | 0,c | 0,a | 0,a | 0,a | 0,a | 0,a | 0,a |
+// =============================================================================
+// Transition     ||  0 |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 | 10 |
+// =============================================================================
+//     WHITESPACE || 0,c| 0,a| 0,c| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a|
+//    ASCII_ALPHA || 1,c| 1,d| 3,a| 1,a| 5,a| 1,a|ERR | 1,a| 1,a| 1,a|10,d|
+// NONASCII_ALPHA || 9,c| 9,a| 3,a| 9,a| 5,a| 9,a|ERR | 9,a| 9,a| 9,a|10,d|
+//     LEFT_PAREN || 7,c| 7,a|ERR | 7,a|ERR | 7,a| 7,a| 7,a| 7,a| 7,a| 7,a|
+//    RIGHT_PAREN || 8,c| 8,a| 8,c| 8,a| 8,a| 8,a| 8,c| 8,a| 8,a| 8,a| 8,a|
+//   EXCLUSION_OP || 2,c| 0,a| 2,c| 0,a|ERR | 0,a|ERR | 2,a| 2,a| 0,a| 0,a|
+//    OR_OPERATOR || 6,c|ERR |ERR |ERR |ERR |ERR |ERR | 7,b| 6,a|ERR |ERR |
+//          COLON || 0,c| 4,b|ERR |ERR | 4,b|10,d|ERR | 0,a|ERR |ERR |10,d|
+//          OTHER || 0,c| 0,a| 0,c| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a| 0,a|
 //
 // Each cell is a rule that consists of 4 things:
 // [current state] + [next term type] -> [new state] + [action]
@@ -234,33 +253,46 @@ std::string_view GetErrorMessage(ActionOrError maybe_error) {
 // like "+", "&", "@", "#" in indexing and query tokenizers.
 constexpr State state_transition_rules[STATE_COUNT][TYPE_COUNT] = {
     /*State: Ready*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
-     CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, READY, READY},
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
+     OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
+     PROCESSING_OR, READY, READY},
     /*State: PROCESSING_ALPHANUMERIC_TERM*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
-     CLOSING_PARENTHESES, READY, INVALID, PROCESSING_PROPERTY_RESTRICT, READY},
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
+     OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID,
+     PROCESSING_PROPERTY_RESTRICT, READY},
     /*State: PROCESSING_EXCLUSION*/
-    {READY, PROCESSING_EXCLUSION_TERM, INVALID, CLOSING_PARENTHESES,
-     PROCESSING_EXCLUSION, INVALID, INVALID, READY},
+    {READY, PROCESSING_EXCLUSION_TERM, PROCESSING_EXCLUSION_TERM, INVALID,
+     CLOSING_PARENTHESES, PROCESSING_EXCLUSION, INVALID, INVALID, READY},
     /*State: PROCESSING_EXCLUSION_TERM*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
-     CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
+     OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
     /*State: PROCESSING_PROPERTY_RESTRICT*/
-    {READY, PROCESSING_PROPERTY_TERM, INVALID, CLOSING_PARENTHESES, INVALID,
-     INVALID, PROCESSING_PROPERTY_RESTRICT, READY},
+    {READY, PROCESSING_PROPERTY_TERM, PROCESSING_PROPERTY_TERM, INVALID,
+     CLOSING_PARENTHESES, INVALID, INVALID, PROCESSING_PROPERTY_RESTRICT,
+     READY},
     /*State: PROCESSING_PROPERTY_TERM*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
-     CLOSING_PARENTHESES, READY, INVALID, READY, READY},
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
+     OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID,
+     PROCESSING_PROPERTY_TERM_APPENDING, READY},
     /*State: PROCESSING_OR*/
-    {READY, INVALID, OPENING_PARENTHESES, CLOSING_PARENTHESES, INVALID, INVALID,
-     INVALID, READY},
+    {READY, INVALID, INVALID, OPENING_PARENTHESES, CLOSING_PARENTHESES, INVALID,
+     INVALID, INVALID, READY},
     /*State: OPENING_PARENTHESES*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
-     CLOSING_PARENTHESES, PROCESSING_EXCLUSION, OPENING_PARENTHESES, READY,
-     READY},
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
+     OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
+     OPENING_PARENTHESES, READY, READY},
     /*State: CLOSING_PARENTHESES*/
-    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
-     CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, INVALID, READY}};
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
+     OPENING_PARENTHESES, CLOSING_PARENTHESES, PROCESSING_EXCLUSION,
+     PROCESSING_OR, INVALID, READY},
+    /*State: PROCESSING_NON_ASCII_ALPHABETIC_TERM*/
+    {READY, PROCESSING_ALPHANUMERIC_TERM, PROCESSING_NON_ASCII_ALPHABETIC_TERM,
+     OPENING_PARENTHESES, CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
+    /*State: PROCESSING_PROPERTY_TERM_APPENDING*/
+    {READY, PROCESSING_PROPERTY_TERM_APPENDING,
+     PROCESSING_PROPERTY_TERM_APPENDING, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, READY, INVALID, PROCESSING_PROPERTY_TERM_APPENDING,
+     READY}};
 
 // We use a 2D array to encode the action rules,
 // The value of action_rules[state1][term_type1] means "what action we need to
@@ -269,62 +301,121 @@ constexpr State state_transition_rules[STATE_COUNT][TYPE_COUNT] = {
 // NOTE: Please update the state transition table above if this is updated.
 constexpr ActionOrError action_rules[STATE_COUNT][TYPE_COUNT] = {
     /*State: Ready*/
-    {IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE},
+    {IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE},
     /*State: PROCESSING_ALPHANUMERIC_TERM*/
-    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR,
-     KEEP, OUTPUT},
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_NO_WHITESPACE_AROUND_OR, KEEP, OUTPUT},
     /*State: PROCESSING_EXCLUSION*/
-    {IGNORE, OUTPUT, ERROR_GROUP_AFTER_EXCLUSION, IGNORE, IGNORE,
+    {IGNORE, OUTPUT, OUTPUT, ERROR_GROUP_AFTER_EXCLUSION, IGNORE, IGNORE,
      ERROR_EXCLUSION_OR_TOGETHER, ERROR_EXCLUSION_PROPERTY_TOGETHER, IGNORE},
     /*State: PROCESSING_EXCLUSION_TERM*/
-    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR,
-     ERROR_EXCLUSION_PROPERTY_TOGETHER, OUTPUT},
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_NO_WHITESPACE_AROUND_OR, ERROR_EXCLUSION_PROPERTY_TOGETHER, OUTPUT},
     /*State: PROCESSING_PROPERTY_RESTRICT*/
-    {OUTPUT, OUTPUT, ERROR_GROUP_AFTER_PROPERTY_RESTRICTION, OUTPUT,
+    {OUTPUT, OUTPUT, OUTPUT, ERROR_GROUP_AFTER_PROPERTY_RESTRICTION, OUTPUT,
      ERROR_EXCLUSION_PROPERTY_TOGETHER, ERROR_PROPERTY_OR_TOGETHER, KEEP,
      OUTPUT},
     /*State: PROCESSING_PROPERTY_TERM*/
-    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR,
-     OUTPUT, OUTPUT},
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_NO_WHITESPACE_AROUND_OR, CONCATENATE, OUTPUT},
     /*State: PROCESSING_OR*/
-    {OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR, OUTPUT, IGNORE,
-     ERROR_NO_WHITESPACE_AROUND_OR, ERROR_NO_WHITESPACE_AROUND_OR,
-     ERROR_NO_WHITESPACE_AROUND_OR, OUTPUT},
+    {OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR, ERROR_NO_WHITESPACE_AROUND_OR,
+     OUTPUT, IGNORE, ERROR_NO_WHITESPACE_AROUND_OR,
+     ERROR_NO_WHITESPACE_AROUND_OR, ERROR_NO_WHITESPACE_AROUND_OR, OUTPUT},
     /*State: OPENING_PARENTHESES*/
-    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, KEEP, OUTPUT, OUTPUT},
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, KEEP, OUTPUT, OUTPUT},
     /*State: CLOSING_PARENTHESES*/
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_GROUP_AS_PROPERTY_NAME, OUTPUT},
+    /*State: PROCESSING_NON_ASCII_ALPHABETIC_TERM*/
     {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
-     ERROR_GROUP_AS_PROPERTY_NAME, OUTPUT}};
-
-// Helper function to get the TermType of the input term.
-TermType GetTermType(std::string_view term) {
-  if (term.length() == 1) {
-    // Must be an ASCII char
-    const char& first_term_char = term[0];
-    if (first_term_char == kWhitespace) {
-      return WHITESPACE;
-    } else if (first_term_char == kColon) {
-      return COLON;
-    } else if (first_term_char == kLeftParentheses) {
-      return LEFT_PARENTHESES;
-    } else if (first_term_char == kRightParentheses) {
-      return RIGHT_PARENTHESES;
-    } else if (first_term_char == kExclusion) {
-      return EXCLUSION_OPERATOR;
-    }
-  } else if (term.length() == 2 && term == kOrOperator) {
-    return OR_OPERATOR;
+     ERROR_NO_WHITESPACE_AROUND_OR, ERROR_NON_ASCII_AS_PROPERTY_NAME, OUTPUT},
+    /*State: PROCESSING_PROPERTY_TERM_APPENDING*/
+    {OUTPUT, CONCATENATE, CONCATENATE, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_NO_WHITESPACE_AROUND_OR, CONCATENATE, OUTPUT}};
+
+// Determines the length of the whitespace term beginning at text[pos] and
+// returns a pair with the WHITESPACE TermType and a string_view of the
+// whitespace term.
+std::pair<TermType, std::string_view> GetWhitespaceTerm(std::string_view text,
+                                                        size_t pos) {
+  size_t cur = pos;
+  while (cur < text.length() && text[cur] == kWhitespace) {
+    ++cur;
   }
+  return std::make_pair(WHITESPACE, text.substr(pos, cur - pos));
+}
+
+// Determines the length of the potential content term beginning at text[pos]
+// and returns a pair with the appropriate TermType and a string_view of the
+// content term.
+//
+// NOTE: The potential content term could multiple content terms (segmentation
+// is needed to determine this), a property restrict (depending on other
+// neighboring tokens). It could also be multiple content terms surrounding an
+// OR operator (segmentation is also needed to determine this).
+std::pair<TermType, std::string_view> GetContentTerm(std::string_view text,
+                                                     size_t pos) {
+  size_t len = 0;
   // Checks the first char to see if it's an ASCII term
-  if (i18n_utils::IsAscii(term[0])) {
-    if (std::isalnum(term[0])) {
-      return ALPHANUMERIC_TERM;
+  TermType type = ASCII_ALPHANUMERIC_TERM;
+  if (!i18n_utils::IsAscii(text[pos])) {
+    type = NON_ASCII_ALPHABETIC_TERM;
+  } else if (std::isalnum(text[pos])) {
+    type = OTHER;
+  }
+  for (size_t cur = pos; cur < text.length() && len == 0; ++cur) {
+    switch (text[cur]) {
+      case kLeftParentheses:
+        [[fallthrough]];
+      case kRightParentheses:
+        [[fallthrough]];
+      case kExclusion:
+        [[fallthrough]];
+      case kWhitespace:
+        [[fallthrough]];
+      case kColon:
+        // If we reach any of our special characters (colon, exclusion or
+        // parentheses), then we've reached the end of the content term. Set len
+        // and exit the loop.
+        len = cur - pos;
+        break;
+      default:
+        break;
     }
-    return OTHER;
   }
-  // All non-ASCII terms are alphabetic since language segmenter already
-  // filters out non-ASCII and non-alphabetic terms
-  return ALPHANUMERIC_TERM;
+  if (len == 0) {
+    // If len isn't set, then we must have reached the end of the string.
+    len = text.length() - pos;
+  }
+  return std::make_pair(type, text.substr(pos, len));
+}
+
+// Determines the type and length of the term beginning at text[pos].
+std::pair<TermType, std::string_view> GetTerm(std::string_view text,
+                                              size_t pos) {
+  switch (text[pos]) {
+    case kLeftParentheses:
+      return std::make_pair(LEFT_PARENTHESES, text.substr(pos, 1));
+    case kRightParentheses:
+      return std::make_pair(RIGHT_PARENTHESES, text.substr(pos, 1));
+    case kExclusion:
+      return std::make_pair(EXCLUSION_OPERATOR, text.substr(pos, 1));
+    case kWhitespace:
+      // Get length of whitespace
+      return GetWhitespaceTerm(text, pos);
+    case kColon:
+      return std::make_pair(COLON, text.substr(pos, 1));
+    case kOrOperator[0]:
+      if (text.length() >= pos + kOrOperator.length() &&
+          text.substr(pos, kOrOperator.length()) == kOrOperator) {
+        return std::make_pair(OR_OPERATOR,
+                              text.substr(pos, kOrOperator.length()));
+      }
+      [[fallthrough]];
+    default:
+      return GetContentTerm(text, pos);
+  }
 }
 
 // Helper function to remove the last token if it's OR operator. This is used to
@@ -378,12 +469,18 @@ libtextclassifier3::Status OutputToken(State new_state,
                                        TermType current_term_type,
                                        std::vector<Token>* tokens) {
   switch (current_term_type) {
-    case ALPHANUMERIC_TERM:
+    case ASCII_ALPHANUMERIC_TERM:
+      [[fallthrough]];
+    case NON_ASCII_ALPHABETIC_TERM:
       if (new_state == PROCESSING_PROPERTY_TERM) {
-        // Asserts extra rule 1: property name must be in ASCII
-        if (!i18n_utils::IsAscii(current_term[0])) {
-          return absl_ports::InvalidArgumentError(
-              "Characters in property name must all be ASCII.");
+        // Asserts extra rule 1: each property name in the property path is a
+        // valid term.
+        for (std::string_view property :
+             absl_ports::StrSplit(current_term, kPropertySeparator)) {
+          if (!SchemaUtil::ValidatePropertyName(property).ok()) {
+            return absl_ports::InvalidArgumentError(
+                GetErrorMessage(ERROR_NON_ASCII_AS_PROPERTY_NAME));
+          }
         }
         tokens->emplace_back(Token::QUERY_PROPERTY, current_term);
       } else {
@@ -416,13 +513,11 @@ libtextclassifier3::Status OutputToken(State new_state,
 // Returns:
 //   OK on success
 //   INVALID_ARGUMENT with error message on invalid query syntax
-libtextclassifier3::Status ProcessTerm(State* current_state,
-                                       std::string_view* current_term,
-                                       TermType* current_term_type,
-                                       int* unclosed_parentheses_count,
-                                       const std::string_view next_term,
-                                       TermType next_term_type,
-                                       std::vector<Token>* tokens) {
+libtextclassifier3::Status ProcessTerm(
+    State* current_state, std::string_view* current_term,
+    TermType* current_term_type, int* unclosed_parentheses_count,
+    const std::string_view next_term, TermType next_term_type,
+    const LanguageSegmenter* language_segmenter, std::vector<Token>* tokens) {
   // Asserts extra rule 4: parentheses must appear in pairs.
   if (next_term_type == LEFT_PARENTHESES) {
     ++(*unclosed_parentheses_count);
@@ -440,8 +535,25 @@ libtextclassifier3::Status ProcessTerm(State* current_state,
   }
   switch (action_or_error) {
     case OUTPUT:
-      ICING_RETURN_IF_ERROR(
-          OutputToken(new_state, *current_term, *current_term_type, tokens));
+      if (*current_state == PROCESSING_PROPERTY_TERM_APPENDING) {
+        // We appended multiple terms together in case they actually should have
+        // been connected by a colon connector.
+        ICING_ASSIGN_OR_RETURN(std::vector<std::string_view> content_terms,
+                               language_segmenter->GetAllTerms(*current_term));
+        for (std::string_view term : content_terms) {
+          TermType type = ASCII_ALPHANUMERIC_TERM;
+          if (!i18n_utils::IsAscii(term[0])) {
+            type = NON_ASCII_ALPHABETIC_TERM;
+          } else if (!std::isalnum(term[0])) {
+            // Skip OTHER tokens here.
+            continue;
+          }
+          ICING_RETURN_IF_ERROR(OutputToken(new_state, term, type, tokens));
+        }
+      } else {
+        ICING_RETURN_IF_ERROR(
+            OutputToken(new_state, *current_term, *current_term_type, tokens));
+      }
       [[fallthrough]];
     case IGNORE:
       *current_term = next_term;
@@ -449,6 +561,11 @@ libtextclassifier3::Status ProcessTerm(State* current_state,
       break;
     case KEEP:
       break;
+    case CONCATENATE:
+      *current_term = std::string_view(
+          current_term->data(),
+          next_term.data() - current_term->data() + next_term.length());
+      break;
     default:
       return absl_ports::InvalidArgumentError(GetErrorMessage(ERROR_UNKNOWN));
   }
@@ -463,56 +580,55 @@ libtextclassifier3::Status ProcessTerm(State* current_state,
 //   A list of tokens on success
 //   INVALID_ARGUMENT with error message on invalid query syntax
 libtextclassifier3::StatusOr<std::vector<Token>> ProcessTerms(
-    std::unique_ptr<LanguageSegmenter::Iterator> base_iterator) {
+    const LanguageSegmenter* language_segmenter,
+    std::vector<std::pair<TermType, std::string_view>> prescanned_terms) {
   std::vector<Token> tokens;
   State current_state = READY;
   std::string_view current_term;
   TermType current_term_type;
   int unclosed_parentheses_count = 0;
-  while (base_iterator->Advance()) {
-    const std::string_view next_term = base_iterator->GetTerm();
-    size_t colon_position = next_term.find(kColon);
-    // Since colon ":" is a word connector per ICU's rule
-    // (https://unicode.org/reports/tr29/#Word_Boundaries), strings like
-    // "foo:bar" are returned by LanguageSegmenter as one term. Here we're
-    // trying to find the first colon as it represents property restriction in
-    // raw query.
-    if (colon_position == std::string_view::npos) {
-      // No colon found
-      ICING_RETURN_IF_ERROR(ProcessTerm(&current_state, &current_term,
-                                        &current_term_type,
-                                        &unclosed_parentheses_count, next_term,
-                                        GetTermType(next_term), &tokens));
-    } else if (next_term.size() == 1 && next_term[0] == kColon) {
-      // The whole term is a colon
+  for (int i = 0; i < prescanned_terms.size(); ++i) {
+    const std::pair<TermType, std::string_view>& prescanned_term =
+        prescanned_terms.at(i);
+    if (prescanned_term.first != ASCII_ALPHANUMERIC_TERM &&
+        prescanned_term.first != NON_ASCII_ALPHABETIC_TERM &&
+        prescanned_term.first != OTHER) {
+      // This can't be a property restrict. Just pass it in.
       ICING_RETURN_IF_ERROR(
           ProcessTerm(&current_state, &current_term, &current_term_type,
-                      &unclosed_parentheses_count, next_term, COLON, &tokens));
+                      &unclosed_parentheses_count, prescanned_term.second,
+                      prescanned_term.first, language_segmenter, &tokens));
     } else {
-      // String before the colon is the property name
-      std::string_view property_name = next_term.substr(0, colon_position);
-      ICING_RETURN_IF_ERROR(
-          ProcessTerm(&current_state, &current_term, &current_term_type,
-                      &unclosed_parentheses_count, property_name,
-                      GetTermType(property_name), &tokens));
-      ICING_RETURN_IF_ERROR(
-          ProcessTerm(&current_state, &current_term, &current_term_type,
-                      &unclosed_parentheses_count, std::string_view(&kColon, 1),
-                      COLON, &tokens));
-      // String after the colon is the term that property restriction is applied
-      // on.
-      std::string_view property_term = next_term.substr(colon_position + 1);
-      ICING_RETURN_IF_ERROR(
-          ProcessTerm(&current_state, &current_term, &current_term_type,
-                      &unclosed_parentheses_count, property_term,
-                      GetTermType(property_term), &tokens));
+      // There's no colon after this term. Now, we need to segment this.
+      ICING_ASSIGN_OR_RETURN(
+          std::vector<std::string_view> content_terms,
+          language_segmenter->GetAllTerms(prescanned_term.second));
+      for (std::string_view term : content_terms) {
+        TermType type = ASCII_ALPHANUMERIC_TERM;
+        if (term == kOrOperator) {
+          // TODO(tjbarron) Decide whether we should revise this and other
+          // handled syntax. This is used to allow queries like "term1,OR,term2"
+          // to succeed. It's not clear if we should allow this or require
+          // clients to ensure that OR operators are always surrounded by
+          // whitespace.
+          type = OR_OPERATOR;
+        } else if (!i18n_utils::IsAscii(term[0])) {
+          type = NON_ASCII_ALPHABETIC_TERM;
+        } else if (!std::isalnum(term[0])) {
+          type = OTHER;
+        }
+        ICING_RETURN_IF_ERROR(ProcessTerm(&current_state, &current_term,
+                                          &current_term_type,
+                                          &unclosed_parentheses_count, term,
+                                          type, language_segmenter, &tokens));
+      }
     }
   }
   // Adds a fake whitespace at the end to flush the last term.
-  ICING_RETURN_IF_ERROR(
-      ProcessTerm(&current_state, &current_term, &current_term_type,
-                  &unclosed_parentheses_count,
-                  std::string_view(&kWhitespace, 1), WHITESPACE, &tokens));
+  ICING_RETURN_IF_ERROR(ProcessTerm(
+      &current_state, &current_term, &current_term_type,
+      &unclosed_parentheses_count, std::string_view(&kWhitespace, 1),
+      WHITESPACE, language_segmenter, &tokens));
   if (unclosed_parentheses_count > 0) {
     return absl_ports::InvalidArgumentError("Unclosed left parentheses.");
   }
@@ -553,10 +669,16 @@ RawQueryTokenizer::Tokenize(std::string_view text) const {
 
 libtextclassifier3::StatusOr<std::vector<Token>> RawQueryTokenizer::TokenizeAll(
     std::string_view text) const {
-  ICING_ASSIGN_OR_RETURN(
-      std::unique_ptr<LanguageSegmenter::Iterator> base_iterator,
-      language_segmenter_.Segment(text));
-  return ProcessTerms(std::move(base_iterator));
+  // 1. Prescan all terms in the text, to determine which ones are potentially
+  // content and which ones are not.
+  std::vector<std::pair<TermType, std::string_view>> prescanned_terms;
+  for (size_t pos = 0; pos < text.length();) {
+    std::pair<TermType, std::string_view> term_pair = GetTerm(text, pos);
+    pos += term_pair.second.length();
+    prescanned_terms.push_back(term_pair);
+  }
+  // 2. Process the prescanned terms, segmenting content terms as needed.
+  return ProcessTerms(&language_segmenter_, std::move(prescanned_terms));
 }
 
 }  // namespace lib
diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
index 9b71e8a..d4af9ed 100644
--- a/icing/tokenization/raw-query-tokenizer_test.cc
+++ b/icing/tokenization/raw-query-tokenizer_test.cc
@@ -59,6 +59,10 @@ TEST_F(RawQueryTokenizerTest, Simple) {
   EXPECT_THAT(raw_query_tokenizer->TokenizeAll("Hello World!"),
               IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
                                        EqualsToken(Token::REGULAR, "World"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("hElLo WORLD"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "hElLo"),
+                                       EqualsToken(Token::REGULAR, "WORLD"))));
 }
 
 TEST_F(RawQueryTokenizerTest, Parentheses) {
@@ -293,6 +297,12 @@ TEST_F(RawQueryTokenizerTest, PropertyRestriction) {
                                EqualsToken(Token::REGULAR, "term2"))));
 
   EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:今天:天气"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "今天"),
+                               EqualsToken(Token::REGULAR, "天气"))));
+
+  EXPECT_THAT(
       raw_query_tokenizer->TokenizeAll("property1:term1-"),
       IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
                                EqualsToken(Token::REGULAR, "term1"))));
diff --git a/icing/tokenization/tokenizer-factory.cc b/icing/tokenization/tokenizer-factory.cc
index 9ebbce5..9b59acf 100644
--- a/icing/tokenization/tokenizer-factory.cc
+++ b/icing/tokenization/tokenizer-factory.cc
@@ -31,14 +31,14 @@ namespace lib {
 namespace tokenizer_factory {
 
 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>>
-CreateIndexingTokenizer(IndexingConfig::TokenizerType::Code type,
+CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,
                         const LanguageSegmenter* lang_segmenter) {
   ICING_RETURN_ERROR_IF_NULL(lang_segmenter);
 
   switch (type) {
-    case IndexingConfig::TokenizerType::PLAIN:
+    case StringIndexingConfig::TokenizerType::PLAIN:
       return std::make_unique<PlainTokenizer>(lang_segmenter);
-    case IndexingConfig::TokenizerType::NONE:
+    case StringIndexingConfig::TokenizerType::NONE:
       [[fallthrough]];
     default:
       // This should never happen.
diff --git a/icing/tokenization/tokenizer-factory.h b/icing/tokenization/tokenizer-factory.h
index f81fd96..8b9226d 100644
--- a/icing/tokenization/tokenizer-factory.h
+++ b/icing/tokenization/tokenizer-factory.h
@@ -37,7 +37,7 @@ namespace tokenizer_factory {
 //   FAILED_PRECONDITION on any null pointer input
 //   INVALID_ARGUMENT if tokenizer type is invalid
 libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>>
-CreateIndexingTokenizer(IndexingConfig::TokenizerType::Code type,
+CreateIndexingTokenizer(StringIndexingConfig::TokenizerType::Code type,
                         const LanguageSegmenter* lang_segmenter);
 
 // All the supported query tokenizer types
author	Alexander Dorokhine <adorokhine@google.com>	2020-11-05 15:28:28 -0800
committer	Alexander Dorokhine <adorokhine@google.com>	2020-11-05 15:29:03 -0800
commit	e111bb917a63282b8e11012acb8f97cba882f342 (patch)
tree	1c59a59fa2c918d9a3f512a0e4b7d4934f6bb07b /icing
parent	71b8eddc99c6337ff304a2f3cd0588c42239202f (diff)
download	icing-e111bb917a63282b8e11012acb8f97cba882f342.tar.gz