diff options
author | Alex Saveliev <alexsav@google.com> | 2023-02-07 20:02:36 -0800 |
---|---|---|
committer | Alex Saveliev <alexsav@google.com> | 2023-02-07 20:25:19 -0800 |
commit | 5a41ca990be33387b0d5d15836a465bbe5ff5a28 (patch) | |
tree | 10e0e3d043aa1ba8effa3964ccf3287d83961cb0 /icing/file/persistent-storage.h | |
parent | cccafab8dfcae94d7072eb49ea971e3c688bdfc4 (diff) | |
download | icing-5a41ca990be33387b0d5d15836a465bbe5ff5a28.tar.gz |
Update icing from upstream
======================================================================
Adds a proto change for the delete propagation option
======================================================================
[ez] Change version to magic for PersistentHashMap
======================================================================
[iOS][testing][nitro] Disabling ICU language segmenter_test.
======================================================================
1. Add support for segmentation in the QueryVisitor.
======================================================================
Support the new double list type in ScoreExpression
======================================================================
Pass JoinChildrenFetcher from IcingSearchEngine all the way down to ScoringVisitor
======================================================================
Refactor the logic of Icing Joins so that nested search and scoring will be performed before the parent
======================================================================
Add lite-index thread-safety tests.
======================================================================
Split IcingSearchEngineTest into separate tests to cover specific apis:
======================================================================
Fix index tests TearDown method.
======================================================================
Improve query concurrency by providing a finer-grained lock around the LiteIndex.
======================================================================
Fix Icing normalization bug
======================================================================
[ez] Fix integer overflow error for IntegerIndexStorage
======================================================================
[NumericSearch][Storage][11/x] Implement Reset and destructor for IntegerIndex
======================================================================
[NumericSearch][Storage][10/x] Add class IntegerIndex
======================================================================
Refactor NumericIndex based on PersistentStorage
======================================================================
Refactor IntegerIndexStorage based on PersistentStorage
======================================================================
Add "working_path" into PersistentStorage
======================================================================
Refactor PersistentHashMap based on PersistentStorage
======================================================================
Create virtual class PersistentStorage for refactoring
======================================================================
Avoids returning reference to local temporary object.
======================================================================
LSC: Clean up references to the legacy protobuf compat library
======================================================================
Fix time complexity regression for snippet retriever
Bug: 256022027
Bug: 193919210
Bug: 266132035
Bug: 208654892
Bug: 261474063
Bug: 266103594
Bug: 146008613
Bug: 253182853
Bug: 266204868
Bug: 249829533
Bug: 266665956
Bug: 265258364
Change-Id: Ib2398c5097b6a2a57900e2ad4e3737502aa13820
Diffstat (limited to 'icing/file/persistent-storage.h')
-rw-r--r-- | icing/file/persistent-storage.h | 338 |
1 files changed, 338 insertions, 0 deletions
diff --git a/icing/file/persistent-storage.h b/icing/file/persistent-storage.h new file mode 100644 index 0000000..a70c9e9 --- /dev/null +++ b/icing/file/persistent-storage.h @@ -0,0 +1,338 @@ +// Copyright (C) 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ICING_FILE_PERSISTENT_STORAGE_H_ +#define ICING_FILE_PERSISTENT_STORAGE_H_ + +#include <cstdint> +#include <string> +#include <string_view> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" +#include "icing/absl_ports/canonical_errors.h" +#include "icing/absl_ports/str_cat.h" +#include "icing/file/filesystem.h" +#include "icing/util/crc32.h" +#include "icing/util/status-macros.h" + +namespace icing { +namespace lib { + +// PersistentStorage: an abstract class for all persistent data structures. +// - It provides some common persistent file methods, e.g. PersistToDisk. +// - It encapsulates most of the checksum handling logics (including update and +// validation). +// +// Terminology: +// - Crcs: checksum section +// - Info: (custom) information for derived class +// - Metadata: Crcs + Info +// +// Usually a persistent data structure will have its own custom Info and +// storages (single or composite storages) definition. To create a new +// persistent data structure via PersistentStorage: +// - Decide what type the working path is (single file or directory). See +// working_path_ and WorkingPathType for more details. +// - Create a new class that inherits PersistentStorage: +// - Declare custom Info and design the metadata section layout. +// Usually the layout is <Crcs><Info>, and there are 2 common ways to +// manage metadata section: +// - Have a separate file for metadata. In this case, the new persistent +// data structure contains multiple files, so working path should be used +// as directory path and multiple files will be stored under it. Example: +// PersistentHashMap. +// - Have a single file for both metadata and storage data. In this case, +// the file layout should be <Crcs><Info><Storage Data>, and +// working path should be used as file path. Example: FileBackedVector. +// - Handle working path file/directory creation and deletion. +// PersistentStorage only provides static Discard() method to use. The +// derived class should implement other logics, e.g. working path (file +// /directory) creation, check condition to discard working path and start +// over new file(s). +// - Implement all pure virtual methods: +// - PersistStoragesToDisk: persist all (composite) storages. In general, +// the implementation will be calling PersistToDisk for all composite +// storages. +// - PersistMetadataToDisk: persist metadata, including Crcs and Info. +// - If the derived class maintains a concrete Crc and (custom) Info +// instance, then it should perform write/pwrite into the metadata +// section. +// - If the derived class uses memory-mapped region directly for metadata, +// then it should call MemoryMappedFile::PersistToDisk. +// - See crcs() for more details. +// - ComputeInfoChecksum: compute the checksum for custom Info. +// - ComputeStoragesChecksum: compute the (combined) checksum for all +// (composite) storages. In general, the implementation will be calling +// UpdateChecksums for all composite storages and XOR all checksums. +// - crcs(): provide the reference for PersistentStorage to write checksums. +// The derived class can either maintain a concrete Crcs instance, or +// reinterpret_cast the memory-mapped region to Crcs reference. Either +// choice is fine as long as PersistMetadataToDisk flushes it to disk +// correctly. +// - Call either InitializeNewStorage or InitializeExistingStorage when creating +// and initializing an instance, depending on initializing new storage or from +// existing file(s). +class PersistentStorage { + public: + enum class WorkingPathType { + kSingleFile, + kDirectory, + kDummy, + }; + + // Crcs and Info will be written into the metadata section. Info is defined by + // the actual implementation of each persistent storage. Usually the Metadata + // layout is: <Crcs><Info> + struct Crcs { + struct ComponentCrcs { + uint32_t info_crc; + uint32_t storages_crc; + + bool operator==(const ComponentCrcs& other) const { + return info_crc == other.info_crc && storages_crc == other.storages_crc; + } + + Crc32 ComputeChecksum() const { + return Crc32(std::string_view(reinterpret_cast<const char*>(this), + sizeof(ComponentCrcs))); + } + } __attribute__((packed)); + + bool operator==(const Crcs& other) const { + return all_crc == other.all_crc && component_crcs == other.component_crcs; + } + + uint32_t all_crc; + ComponentCrcs component_crcs; + } __attribute__((packed)); + static_assert(sizeof(Crcs) == 12, ""); + + // Deletes working_path according to its type. + // + // Returns: + // - OK on success + // - INTERNAL_ERROR on I/O error + // - INVALID_ARGUMENT_ERROR if working_path_type is unknown type + static libtextclassifier3::Status Discard(const Filesystem& filesystem, + const std::string& working_path, + WorkingPathType working_path_type); + + virtual ~PersistentStorage() = default; + + // Initializes new persistent storage. It computes the initial checksums and + // writes into the metadata file. + // + // Note: either InitializeNewStorage or InitializeExistingStorage should be + // invoked after creating a PersistentStorage instance before using, otherwise + // an uninitialized instance will fail to use persistent storage features, + // e.g. PersistToDisk, UpdateChecksums. + // + // Returns: + // - OK on success or already initialized + // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending + // on actual implementation + libtextclassifier3::Status InitializeNewStorage() { + if (is_initialized_) { + return libtextclassifier3::Status::OK; + } + + ICING_RETURN_IF_ERROR(UpdateChecksumsInternal()); + ICING_RETURN_IF_ERROR(PersistMetadataToDisk()); + + is_initialized_ = true; + return libtextclassifier3::Status::OK; + } + + // Initializes persistent storage from existing file(s). + // + // It enforces the following check(s): + // - Validate checksums. + // + // Note: either InitializeNewStorage or InitializeExistingStorage should be + // invoked after creating a PersistentStorage instance before using. + // + // Returns: + // - OK on success or already initialized + // - FAILED_PRECONDITION_ERROR if checksum validation fails. + // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending + // on actual implementation + libtextclassifier3::Status InitializeExistingStorage() { + if (is_initialized_) { + return libtextclassifier3::Status::OK; + } + + ICING_RETURN_IF_ERROR(ValidateChecksums()); + + is_initialized_ = true; + return libtextclassifier3::Status::OK; + } + + // Flushes contents to underlying files. + // 1) Flushes storages. + // 2) Updates all checksums by new data. + // 3) Flushes metadata. + // + // Returns: + // - OK on success + // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized + // - Any errors from PersistStoragesToDisk, UpdateChecksums, + // PersistMetadataToDisk, depending on actual implementation + libtextclassifier3::Status PersistToDisk() { + if (!is_initialized_) { + return absl_ports::FailedPreconditionError(absl_ports::StrCat( + "PersistentStorage ", working_path_, " not initialized")); + } + + ICING_RETURN_IF_ERROR(PersistStoragesToDisk()); + ICING_RETURN_IF_ERROR(UpdateChecksums()); + ICING_RETURN_IF_ERROR(PersistMetadataToDisk()); + return libtextclassifier3::Status::OK; + } + + // Updates checksums of all components and returns the overall crc (all_crc) + // of the persistent storage. + // + // Returns: + // - Overall crc of the persistent storage on success + // - FAILED_PRECONDITION_ERROR if PersistentStorage is uninitialized + // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending + // on actual implementation + libtextclassifier3::StatusOr<Crc32> UpdateChecksums() { + if (!is_initialized_) { + return absl_ports::FailedPreconditionError(absl_ports::StrCat( + "PersistentStorage ", working_path_, " not initialized")); + } + + return UpdateChecksumsInternal(); + } + + protected: + explicit PersistentStorage(const Filesystem& filesystem, + std::string working_path, + WorkingPathType working_path_type) + : filesystem_(filesystem), + working_path_(std::move(working_path)), + working_path_type_(working_path_type), + is_initialized_(false) {} + + // Flushes contents of metadata. The implementation should flush Crcs and Info + // correctly, depending on whether they're using memory-mapped regions or + // concrete instances in the derived class. + // + // Returns: + // - OK on success + // - Any other errors, depending on actual implementation + virtual libtextclassifier3::Status PersistMetadataToDisk() = 0; + + // Flushes contents of all storages to underlying files. + // + // Returns: + // - OK on success + // - Any other errors, depending on actual implementation + virtual libtextclassifier3::Status PersistStoragesToDisk() = 0; + + // Computes and returns Info checksum. + // + // This function will be mainly called by UpdateChecksums. + // + // Returns: + // - Crc of the Info on success + // - Any other errors, depending on actual implementation + virtual libtextclassifier3::StatusOr<Crc32> ComputeInfoChecksum() = 0; + + // Computes and returns all storages checksum. If there are multiple storages, + // usually we XOR their checksums together to a single checksum. + // + // This function will be mainly called by UpdateChecksums. + // + // Returns: + // - Crc of all storages on success + // - Any other errors from depending on actual implementation + virtual libtextclassifier3::StatusOr<Crc32> ComputeStoragesChecksum() = 0; + + // Returns the Crcs instance reference. The derived class can either own a + // concrete Crcs instance, or reinterpret_cast the memory-mapped region to + // Crcs reference. PersistMetadataToDisk should flush it to disk correctly. + virtual Crcs& crcs() = 0; + virtual const Crcs& crcs() const = 0; + + const Filesystem& filesystem_; + // Path to the storage. It can be a single file path or a directory path + // depending on the implementation of the derived class. + // + // Note that the derived storage class will take full ownership and of + // working_path_, including creation/deletion. It is the caller's + // responsibility to specify correct working path and avoid mixing different + // persistent storages together under the same path. Also the caller has the + // ownership for the parent directory of working_path_, and it is responsible + // for parent directory creation/deletion. + std::string working_path_; + WorkingPathType working_path_type_; + + bool is_initialized_; + + private: + // Updates checksums of all components and returns the overall crc (all_crc) + // of the persistent storage. Different from UpdateChecksums, it won't check + // if PersistentStorage is initialized or not. + // + // Returns: + // - Overall crc of the persistent storage on success + // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending + // on actual implementation + libtextclassifier3::StatusOr<Crc32> UpdateChecksumsInternal() { + Crcs& crcs_ref = crcs(); + // Compute and update storages + info checksums. + ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum()); + ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum()); + crcs_ref.component_crcs.info_crc = info_crc.Get(); + crcs_ref.component_crcs.storages_crc = storages_crc.Get(); + + // Finally compute and update overall checksum. + crcs_ref.all_crc = crcs_ref.component_crcs.ComputeChecksum().Get(); + return Crc32(crcs_ref.all_crc); + } + + // Validates all checksums of the persistent storage. + // + // Returns: + // - OK on success + // - FAILED_PRECONDITION_ERROR if any checksum is incorrect. + // - Any errors from ComputeInfoChecksum, ComputeStoragesChecksum, depending + // on actual implementation + libtextclassifier3::Status ValidateChecksums() { + const Crcs& crcs_ref = crcs(); + if (crcs_ref.all_crc != crcs_ref.component_crcs.ComputeChecksum().Get()) { + return absl_ports::FailedPreconditionError("Invalid all crc"); + } + + ICING_ASSIGN_OR_RETURN(Crc32 info_crc, ComputeInfoChecksum()); + if (crcs_ref.component_crcs.info_crc != info_crc.Get()) { + return absl_ports::FailedPreconditionError("Invalid info crc"); + } + + ICING_ASSIGN_OR_RETURN(Crc32 storages_crc, ComputeStoragesChecksum()); + if (crcs_ref.component_crcs.storages_crc != storages_crc.Get()) { + return absl_ports::FailedPreconditionError("Invalid storages crc"); + } + + return libtextclassifier3::Status::OK; + } +}; + +} // namespace lib +} // namespace icing + +#endif // ICING_FILE_PERSISTENT_STORAGE_H_ |