aboutsummaryrefslogtreecommitdiff
path: root/icing/store
diff options
context:
space:
mode:
Diffstat (limited to 'icing/store')
-rw-r--r--icing/store/document-store.cc63
-rw-r--r--icing/store/document-store.h23
-rw-r--r--icing/store/document-store_test.cc27
3 files changed, 63 insertions, 50 deletions
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 35ee172..710ff58 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -89,17 +89,6 @@ constexpr int32_t kUriMapperMaxSize = 36 * 1024 * 1024; // 36 MiB
constexpr int32_t kNamespaceMapperMaxSize = 3 * 128 * 1024; // 384 KiB
constexpr int32_t kCorpusMapperMaxSize = 3 * 128 * 1024; // 384 KiB
-// Whether to use namespace id or namespace name to build up fingerprint for
-// document_key_mapper_ and corpus_mapper_.
-// Note: Changing this flag will require a reconstruction of the internal
-// mappers in the document store. A easy way to trigger a rebuild is to change
-// the kMagic value.
-//
-// TODO(b/259969017) Flip this flag to true at the time when we switch to use
-// persistent hash map for document_key_mapper_ so that we just need one
-// reconstruction of the internal mappers.
-constexpr bool kNamespaceIdFingerprint = false;
-
DocumentWrapper CreateDocumentWrapper(DocumentProto&& document) {
DocumentWrapper document_wrapper;
*document_wrapper.mutable_document() = std::move(document);
@@ -157,23 +146,6 @@ std::string EncodeNamespaceId(NamespaceId namespace_id) {
return encoding;
}
-std::string MakeFingerprint(NamespaceId namespace_id,
- std::string_view namespace_,
- std::string_view uri_or_schema) {
- if (!kNamespaceIdFingerprint) {
- // Using a 64-bit fingerprint to represent the key could lead to collisions.
- // But, even with 200K unique keys, the probability of collision is about
- // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack).
- uint64_t fprint = tc3farmhash::Fingerprint64(
- absl_ports::StrCat(namespace_, uri_or_schema));
- return fingerprint_util::GetFingerprintString(fprint);
- } else {
- return absl_ports::StrCat(EncodeNamespaceId(namespace_id),
- encode_util::EncodeIntToCString(
- tc3farmhash::Fingerprint64(uri_or_schema)));
- }
-}
-
int64_t CalculateExpirationTimestampMs(int64_t creation_timestamp_ms,
int64_t ttl_ms) {
if (ttl_ms == 0) {
@@ -236,15 +208,34 @@ std::unordered_map<NamespaceId, std::string> GetNamespaceIdsToNamespaces(
} // namespace
+std::string DocumentStore::MakeFingerprint(
+ NamespaceId namespace_id, std::string_view namespace_,
+ std::string_view uri_or_schema) const {
+ if (!namespace_id_fingerprint_) {
+ // Using a 64-bit fingerprint to represent the key could lead to collisions.
+ // But, even with 200K unique keys, the probability of collision is about
+ // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack).
+ uint64_t fprint = tc3farmhash::Fingerprint64(
+ absl_ports::StrCat(namespace_, uri_or_schema));
+ return fingerprint_util::GetFingerprintString(fprint);
+ } else {
+ return absl_ports::StrCat(EncodeNamespaceId(namespace_id),
+ encode_util::EncodeIntToCString(
+ tc3farmhash::Fingerprint64(uri_or_schema)));
+ }
+}
+
DocumentStore::DocumentStore(const Filesystem* filesystem,
const std::string_view base_dir,
const Clock* clock,
- const SchemaStore* schema_store)
+ const SchemaStore* schema_store,
+ bool namespace_id_fingerprint)
: filesystem_(filesystem),
base_dir_(base_dir),
clock_(*clock),
schema_store_(schema_store),
- document_validator_(schema_store) {}
+ document_validator_(schema_store),
+ namespace_id_fingerprint_(namespace_id_fingerprint) {}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
const DocumentProto& document, int32_t num_tokens,
@@ -271,14 +262,14 @@ DocumentStore::~DocumentStore() {
libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- bool force_recovery_and_revalidate_documents,
+ bool force_recovery_and_revalidate_documents, bool namespace_id_fingerprint,
InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
ICING_RETURN_ERROR_IF_NULL(schema_store);
- auto document_store = std::unique_ptr<DocumentStore>(
- new DocumentStore(filesystem, base_dir, clock, schema_store));
+ auto document_store = std::unique_ptr<DocumentStore>(new DocumentStore(
+ filesystem, base_dir, clock, schema_store, namespace_id_fingerprint));
ICING_ASSIGN_OR_RETURN(
DataLoss data_loss,
document_store->Initialize(force_recovery_and_revalidate_documents,
@@ -386,7 +377,8 @@ libtextclassifier3::Status DocumentStore::InitializeExistingDerivedFiles() {
absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
}
- if (header.magic != DocumentStore::Header::kMagic) {
+ if (header.magic !=
+ DocumentStore::Header::GetCurrentMagic(namespace_id_fingerprint_)) {
return absl_ports::InternalError(absl_ports::StrCat(
"Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_)));
}
@@ -859,7 +851,8 @@ bool DocumentStore::HeaderExists() {
libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) {
// Write the header
DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
+ header.magic =
+ DocumentStore::Header::GetCurrentMagic(namespace_id_fingerprint_);
header.checksum = checksum.Get();
// This should overwrite the header.
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 3e02636..7c414d7 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -59,13 +59,19 @@ namespace lib {
class DocumentStore {
public:
struct Header {
- static constexpr int32_t kMagic = 0x746f7265;
+ static int32_t GetCurrentMagic(bool namespace_id_fingerprint) {
+ return namespace_id_fingerprint ? kNewMagic : kOldMagic;
+ }
// Holds the magic as a quick sanity check against file corruption.
int32_t magic;
// Checksum of the DocumentStore's sub-component's checksums.
uint32_t checksum;
+
+ private:
+ static constexpr int32_t kOldMagic = 0x746f7265;
+ static constexpr int32_t kNewMagic = 0x1b99c8b0;
};
struct OptimizeInfo {
@@ -136,6 +142,7 @@ class DocumentStore {
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
bool force_recovery_and_revalidate_documents = false,
+ bool namespace_id_fingerprint = false,
InitializeStatsProto* initialize_stats = nullptr);
// Returns the maximum DocumentId that the DocumentStore has assigned. If
@@ -472,7 +479,8 @@ class DocumentStore {
private:
// Use DocumentStore::Create() to instantiate.
DocumentStore(const Filesystem* filesystem, std::string_view base_dir,
- const Clock* clock, const SchemaStore* schema_store);
+ const Clock* clock, const SchemaStore* schema_store,
+ bool namespace_id_fingerprint);
const Filesystem* const filesystem_;
const std::string base_dir_;
@@ -485,6 +493,10 @@ class DocumentStore {
// Used to validate incoming documents
DocumentValidator document_validator_;
+ // Whether to use namespace id or namespace name to build up fingerprint for
+ // document_key_mapper_ and corpus_mapper_.
+ bool namespace_id_fingerprint_;
+
// A log used to store all documents, it serves as a ground truth of doc
// store. key_mapper_ and document_id_mapper_ can be regenerated from it.
std::unique_ptr<PortableFileBackedProtoLog<DocumentWrapper>> document_log_;
@@ -733,6 +745,13 @@ class DocumentStore {
libtextclassifier3::StatusOr<
google::protobuf::RepeatedPtrField<DocumentDebugInfoProto::CorpusInfo>>
CollectCorpusInfo() const;
+
+ // Build fingerprint for the keys of document_key_mapper_ and corpus_mapper_.
+ // Note that namespace_id_fingerprint_ controls the way that a fingerprint is
+ // built.
+ std::string MakeFingerprint(NamespaceId namespace_id,
+ std::string_view namespace_,
+ std::string_view uri_or_schema) const;
};
} // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index a115e11..81da191 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -207,7 +207,8 @@ class DocumentStoreTest : public ::testing::Test {
const std::string header_file =
absl_ports::StrCat(document_store_dir_, "/document_store_header");
DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
+ header.magic = DocumentStore::Header::GetCurrentMagic(
+ /*namespace_id_fingerprint=*/false);
header.checksum = 10; // Arbitrary garbage checksum
filesystem_.DeleteFile(header_file.c_str());
filesystem_.Write(header_file.c_str(), &header, sizeof(header));
@@ -3285,10 +3286,10 @@ TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- &initialize_stats));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
// The document log is using the legacy v0 format so that a migration is
@@ -3489,10 +3490,10 @@ TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/true,
- &initialize_stats));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true,
+ /*namespace_id_fingerprint=*/false, &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
@@ -3875,10 +3876,10 @@ TEST_F(DocumentStoreTest, MigrateToPortableFileBackedProtoLog) {
InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir, &fake_clock_,
- schema_store.get(),
- /*force_recovery_and_revalidate_documents=*/false,
- &initialize_stats));
+ DocumentStore::Create(
+ &filesystem_, document_store_dir, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ /*namespace_id_fingerprint=*/false, &initialize_stats));
std::unique_ptr<DocumentStore> document_store =
std::move(create_result.document_store);