diff options
author | Grace Zhao <gracezrx@google.com> | 2022-10-27 13:52:32 -0700 |
---|---|---|
committer | Grace Zhao <gracezrx@google.com> | 2022-10-27 21:06:49 +0000 |
commit | d0795655ecf1aac89bb1802f4e1d3f5fb7dcb2b0 (patch) | |
tree | ff367f5a9c0f94a1da09638e5d986d5e9bcb62ff /icing/file | |
parent | 4af97ca767a9f2e88dc75e05784dd010e7541d13 (diff) | |
download | icing-d0795655ecf1aac89bb1802f4e1d3f5fb7dcb2b0.tar.gz |
Sync from upstream.
Descriptions:
======================================================================
Fix the bug in PostingListAccessor found by the Icing Monkey test
======================================================================
Add the logic to handle fatal errors from IcingDynamicTrie to avoid crashing
======================================================================
Clear out the dead code IcingDynamicTrie::Compact
======================================================================
[MemoryMappedFile][RemapV2][1/x] Add factory method for
MemoryMappedFile
======================================================================
[MemoryMappedFile][RemapV2][2/x] Create GrowAndRemapIfNecessary and change factory method
======================================================================
[MemoryMappedFile][RemapV2][3/x] Migrate FileBackedVector to use GrowAndRemapIfNecessary
======================================================================
Add JNI latency for query latency stats breakdown
======================================================================
Bug: 247671531
Bug: 247929909
Bug: 253282365
Change-Id: Ic3b88d0f044edacfe2dfeb08fa381b2186c731cb
Diffstat (limited to 'icing/file')
-rw-r--r-- | icing/file/file-backed-bitmap.cc | 13 | ||||
-rw-r--r-- | icing/file/file-backed-bitmap.h | 5 | ||||
-rw-r--r-- | icing/file/file-backed-proto-log.h | 33 | ||||
-rw-r--r-- | icing/file/file-backed-proto_test.cc | 16 | ||||
-rw-r--r-- | icing/file/file-backed-vector.h | 193 | ||||
-rw-r--r-- | icing/file/file-backed-vector_test.cc | 113 | ||||
-rw-r--r-- | icing/file/memory-mapped-file-leak_test.cc | 14 | ||||
-rw-r--r-- | icing/file/memory-mapped-file.cc | 337 | ||||
-rw-r--r-- | icing/file/memory-mapped-file.h | 272 | ||||
-rw-r--r-- | icing/file/memory-mapped-file_test.cc | 668 | ||||
-rw-r--r-- | icing/file/persistent-hash-map.cc | 25 | ||||
-rw-r--r-- | icing/file/persistent-hash-map.h | 5 | ||||
-rw-r--r-- | icing/file/portable-file-backed-proto-log.h | 6 | ||||
-rw-r--r-- | icing/file/portable-file-backed-proto-log_test.cc | 6 |
14 files changed, 1377 insertions, 329 deletions
diff --git a/icing/file/file-backed-bitmap.cc b/icing/file/file-backed-bitmap.cc index a8231e3..bdcfc79 100644 --- a/icing/file/file-backed-bitmap.cc +++ b/icing/file/file-backed-bitmap.cc @@ -47,8 +47,12 @@ FileBackedBitmap::Create(const Filesystem* filesystem, "mmap strategy."); } + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile mmapper, + MemoryMappedFile::Create(*filesystem, file_path, mmap_strategy)); + auto bitmap = std::unique_ptr<FileBackedBitmap>( - new FileBackedBitmap(filesystem, file_path, mmap_strategy)); + new FileBackedBitmap(filesystem, file_path, std::move(mmapper))); // TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR // that can support error logging. @@ -62,10 +66,10 @@ FileBackedBitmap::Create(const Filesystem* filesystem, FileBackedBitmap::FileBackedBitmap(const Filesystem* filesystem, std::string_view file_path, - MemoryMappedFile::Strategy mmap_strategy) + MemoryMappedFile&& mmapper) : filesystem_(filesystem), file_path_(file_path), - mmapper_(new MemoryMappedFile(*filesystem, file_path, mmap_strategy)) {} + mmapper_(std::make_unique<MemoryMappedFile>(std::move(mmapper))) {} FileBackedBitmap::~FileBackedBitmap() { // Only update if we have auto_sync setup, otherwise the checksum will be @@ -269,7 +273,8 @@ libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) { return status; } - ICING_VLOG(1) << "Grew file " << file_path_ << " to new size " << new_file_size; + ICING_VLOG(1) << "Grew file " << file_path_ << " to new size " + << new_file_size; mutable_header()->state = Header::ChecksumState::kStale; return libtextclassifier3::Status::OK; } diff --git a/icing/file/file-backed-bitmap.h b/icing/file/file-backed-bitmap.h index e3d98ad..beba14e 100644 --- a/icing/file/file-backed-bitmap.h +++ b/icing/file/file-backed-bitmap.h @@ -175,8 +175,9 @@ class FileBackedBitmap { Header* mutable_header(); // Use FileBackedBitmap::Create() to instantiate. - FileBackedBitmap(const Filesystem* filesystem, std::string_view file_path, - MemoryMappedFile::Strategy mmap_strategy); + explicit FileBackedBitmap(const Filesystem* filesystem, + std::string_view file_path, + MemoryMappedFile&& mmapper); // Verify the contents of the bitmap and get ready for read/write operations. // diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h index ad7fae9..78236ba 100644 --- a/icing/file/file-backed-proto-log.h +++ b/icing/file/file-backed-proto-log.h @@ -186,8 +186,9 @@ class FileBackedProtoLog { // } class Iterator { public: - Iterator(const Filesystem& filesystem, const std::string& file_path, - int64_t initial_offset); + explicit Iterator(const Filesystem& filesystem, + const std::string& file_path, int64_t initial_offset, + MemoryMappedFile&& mmapped_file); // Advances to the position of next proto whether it has been erased or not. // @@ -213,7 +214,7 @@ class FileBackedProtoLog { // Returns an iterator of current proto log. The caller needs to keep the // proto log unchanged while using the iterator, otherwise unexpected // behaviors could happen. - Iterator GetIterator(); + libtextclassifier3::StatusOr<Iterator> GetIterator(); private: // Object can only be instantiated via the ::Create factory. @@ -472,8 +473,10 @@ template <typename ProtoT> libtextclassifier3::StatusOr<Crc32> FileBackedProtoLog<ProtoT>::ComputeChecksum( const Filesystem* filesystem, const std::string& file_path, Crc32 initial_crc, int64_t start, int64_t end) { - auto mmapped_file = MemoryMappedFile(*filesystem, file_path, - MemoryMappedFile::Strategy::READ_ONLY); + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(*filesystem, file_path, + MemoryMappedFile::Strategy::READ_ONLY)); Crc32 new_crc(initial_crc.Get()); if (start < 0) { @@ -544,8 +547,10 @@ template <typename ProtoT> libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto( int64_t file_offset) const { int64_t file_size = filesystem_->GetFileSize(fd_.get()); - MemoryMappedFile mmapped_file(*filesystem_, file_path_, - MemoryMappedFile::Strategy::READ_ONLY); + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(*filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_ONLY)); if (file_offset >= file_size) { // file_size points to the next byte to write at, so subtract one to get // the inclusive, actual size of file. @@ -588,9 +593,9 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto( template <typename ProtoT> FileBackedProtoLog<ProtoT>::Iterator::Iterator(const Filesystem& filesystem, const std::string& file_path, - int64_t initial_offset) - : mmapped_file_(filesystem, file_path, - MemoryMappedFile::Strategy::READ_ONLY), + int64_t initial_offset, + MemoryMappedFile&& mmapped_file) + : mmapped_file_(std::move(mmapped_file)), initial_offset_(initial_offset), current_offset_(kInvalidOffset), file_size_(filesystem.GetFileSize(file_path.c_str())) { @@ -629,10 +634,14 @@ int64_t FileBackedProtoLog<ProtoT>::Iterator::GetOffset() { } template <typename ProtoT> -typename FileBackedProtoLog<ProtoT>::Iterator +libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::Iterator> FileBackedProtoLog<ProtoT>::GetIterator() { + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(*filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_ONLY)); return Iterator(*filesystem_, file_path_, - /*initial_offset=*/sizeof(Header)); + /*initial_offset=*/sizeof(Header), std::move(mmapped_file)); } template <typename ProtoT> diff --git a/icing/file/file-backed-proto_test.cc b/icing/file/file-backed-proto_test.cc index 7f994fb..009af52 100644 --- a/icing/file/file-backed-proto_test.cc +++ b/icing/file/file-backed-proto_test.cc @@ -45,7 +45,7 @@ TEST_F(FileBackedProtoTest, SimpleReadWriteTest) { DocumentBuilder().SetKey("namespace", "google.com").Build(); FileBackedProto<DocumentProto> file_proto(filesystem_, filename_); - ICING_ASSERT_OK(file_proto.Write(absl::make_unique<DocumentProto>(document))); + ICING_ASSERT_OK(file_proto.Write(std::make_unique<DocumentProto>(document))); EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document)))); // Multiple reads work. EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document)))); @@ -61,7 +61,7 @@ TEST_F(FileBackedProtoTest, DataPersistsAcrossMultipleInstancesTest) { EXPECT_THAT(file_proto.Read(), Not(IsOk())); // Nothing to read. ICING_ASSERT_OK( - file_proto.Write(absl::make_unique<DocumentProto>(document))); + file_proto.Write(std::make_unique<DocumentProto>(document))); EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document)))); } @@ -84,12 +84,12 @@ TEST_F(FileBackedProtoTest, MultipleUpdatesToProtoTest) { { FileBackedProto<DocumentProto> file_proto(filesystem_, filename_); ICING_ASSERT_OK( - file_proto.Write(absl::make_unique<DocumentProto>(googleProto))); + file_proto.Write(std::make_unique<DocumentProto>(googleProto))); EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(googleProto)))); ICING_ASSERT_OK( - file_proto.Write(absl::make_unique<DocumentProto>(youtubeProto))); + file_proto.Write(std::make_unique<DocumentProto>(youtubeProto))); EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(youtubeProto)))); } @@ -100,12 +100,12 @@ TEST_F(FileBackedProtoTest, MultipleUpdatesToProtoTest) { IsOkAndHolds(Pointee(EqualsProto(youtubeProto)))); ICING_ASSERT_OK( - file_proto.Write(absl::make_unique<DocumentProto>(wazeProto))); + file_proto.Write(std::make_unique<DocumentProto>(wazeProto))); EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(wazeProto)))); ICING_ASSERT_OK( - file_proto.Write(absl::make_unique<DocumentProto>(googleProto))); + file_proto.Write(std::make_unique<DocumentProto>(googleProto))); EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(googleProto)))); } @@ -117,7 +117,7 @@ TEST_F(FileBackedProtoTest, InvalidFilenameTest) { FileBackedProto<DocumentProto> file_proto(filesystem_, ""); EXPECT_THAT(file_proto.Read(), Not(IsOk())); - EXPECT_THAT(file_proto.Write(absl::make_unique<DocumentProto>(document)), + EXPECT_THAT(file_proto.Write(std::make_unique<DocumentProto>(document)), Not(IsOk())); } @@ -128,7 +128,7 @@ TEST_F(FileBackedProtoTest, FileCorruptionTest) { { FileBackedProto<DocumentProto> file_proto(filesystem_, filename_); ICING_ASSERT_OK( - file_proto.Write(absl::make_unique<DocumentProto>(document))); + file_proto.Write(std::make_unique<DocumentProto>(document))); EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document)))); } diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h index 165597d..183c091 100644 --- a/icing/file/file-backed-vector.h +++ b/icing/file/file-backed-vector.h @@ -166,8 +166,31 @@ class FileBackedVector { // mmap_strategy : Strategy/optimizations to access the content in the vector, // see MemoryMappedFile::Strategy for more details // max_file_size: Maximum file size for FileBackedVector, default - // kMaxFileSize. See max_file_size_ and kMaxFileSize for more - // details. + // kMaxFileSize. Note that this value won't be written into the + // header, so maximum file size will always be specified in + // runtime and the caller should make sure the value is correct + // and reasonable. Also it will be cached in MemoryMappedFile + // member, so we can always call mmapped_file_->max_file_size() + // to get it. + // The range should be in + // [Header::kHeaderSize + kElementTypeSize, kMaxFileSize], and + // (max_file_size - Header::kHeaderSize) / kElementTypeSize is + // max # of elements that can be stored. + // pre_mapping_mmap_size: pre-mapping size of MemoryMappedFile, default 0. + // Pre-mapping a large memory region to the file and + // grow the underlying file later, so we can avoid + // remapping too frequently and reduce the cost of + // system call and memory paging after remap. The user + // should specify reasonable size to save remapping + // cost and avoid exhausting the memory at once in the + // beginning. + // Note: if the file exists and pre_mapping_mmap_size + // is smaller than file_size - Header::kHeaderSize, + // then it still pre-maps file_size - + // Header::kHeaderSize to make all existing elements + // available. + // TODO(b/247671531): figure out pre_mapping_mmap_size for each + // FileBackedVector use case. // // Return: // FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored @@ -178,7 +201,8 @@ class FileBackedVector { static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>> Create(const Filesystem& filesystem, const std::string& file_path, MemoryMappedFile::Strategy mmap_strategy, - int32_t max_file_size = kMaxFileSize); + int32_t max_file_size = kMaxFileSize, + int32_t pre_mapping_mmap_size = 0); // Deletes the FileBackedVector // @@ -282,7 +306,8 @@ class FileBackedVector { // returned from Get/GetMutable/Allocate may be invalidated. // // Returns: - // OUT_OF_RANGE_ERROR if file cannot be grown (i.e. reach max_file_size_) + // OUT_OF_RANGE_ERROR if file cannot be grown (i.e. reach + // mmapped_file_->max_file_size()) libtextclassifier3::Status Append(const T& value) { return Set(header_->num_elements, value); } @@ -306,7 +331,7 @@ class FileBackedVector { // // Returns: // OUT_OF_RANGE_ERROR if len <= 0 or file cannot be grown (i.e. reach - // max_file_size_) + // mmapped_file_->max_file_size()) libtextclassifier3::StatusOr<MutableArrayView> Allocate(int32_t len); // Resizes to first len elements. The crc is cleared on truncation and will be @@ -447,21 +472,20 @@ class FileBackedVector { explicit FileBackedVector(const Filesystem& filesystem, const std::string& file_path, std::unique_ptr<Header> header, - std::unique_ptr<MemoryMappedFile> mmapped_file, - int32_t max_file_size); + MemoryMappedFile&& mmapped_file); // Initialize a new FileBackedVector, and create the file. static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>> InitializeNewFile(const Filesystem& filesystem, const std::string& file_path, ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy, - int32_t max_file_size); + int32_t max_file_size, int32_t pre_mapping_mmap_size); // Initialize a FileBackedVector from an existing file. static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>> InitializeExistingFile(const Filesystem& filesystem, const std::string& file_path, ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy, - int32_t max_file_size); + int32_t max_file_size, int32_t pre_mapping_mmap_size); // Grows the underlying file to hold at least num_elements // @@ -490,17 +514,6 @@ class FileBackedVector { // Buffer of the original elements that have been changed since the last crc // update. Will be cleared if the size grows too big. std::string saved_original_buffer_; - - // Max file size for FileBackedVector, default kMaxFileSize. Note that this - // value won't be written into the header, so maximum file size will always be - // specified in runtime and the caller should make sure its value is correct - // and reasonable. Note that file size includes size of header + elements. - // - // The range should be in - // [Header::kHeaderSize + kElementTypeSize, kMaxFileSize], and - // (max_file_size_ - Header::kHeaderSize) / kElementTypeSize is max # of - // elements that can be stored. - int32_t max_file_size_; }; template <typename T> @@ -526,7 +539,8 @@ libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>> FileBackedVector<T>::Create(const Filesystem& filesystem, const std::string& file_path, MemoryMappedFile::Strategy mmap_strategy, - int32_t max_file_size) { + int32_t max_file_size, + int32_t pre_mapping_mmap_size) { if (mmap_strategy == MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC) { // FileBackedVector's behavior of growing the file underneath the mmap is // inherently broken with MAP_PRIVATE. Growing the vector requires extending @@ -567,10 +581,12 @@ FileBackedVector<T>::Create(const Filesystem& filesystem, const bool new_file = file_size == 0; if (new_file) { return InitializeNewFile(filesystem, file_path, std::move(fd), - mmap_strategy, max_file_size); + mmap_strategy, max_file_size, + pre_mapping_mmap_size); } return InitializeExistingFile(filesystem, file_path, std::move(fd), - mmap_strategy, max_file_size); + mmap_strategy, max_file_size, + pre_mapping_mmap_size); } template <typename T> @@ -579,7 +595,8 @@ FileBackedVector<T>::InitializeNewFile(const Filesystem& filesystem, const std::string& file_path, ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy, - int32_t max_file_size) { + int32_t max_file_size, + int32_t pre_mapping_mmap_size) { // Create header. auto header = std::make_unique<Header>(); header->magic = FileBackedVector<T>::Header::kMagic; @@ -594,17 +611,21 @@ FileBackedVector<T>::InitializeNewFile(const Filesystem& filesystem, return absl_ports::InternalError("Failed to write header"); } - // Constructor of MemoryMappedFile doesn't actually call mmap(), mmap() - // happens on MemoryMappedFile::Remap(). So having a potentially unflushed fd - // at this point shouldn't run into issues with a mmap of the same file. But - // we'll close the fd just in case. + // Close the fd since constructor of MemoryMappedFile calls mmap() and we need + // to flush fd before mmap(). fd.reset(); - auto mmapped_file = - std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy); - return std::unique_ptr<FileBackedVector<T>>( - new FileBackedVector<T>(filesystem, file_path, std::move(header), - std::move(mmapped_file), max_file_size)); + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem, file_path, mmap_strategy, + max_file_size, + /*pre_mapping_file_offset=*/Header::kHeaderSize, + /*pre_mapping_mmap_size=*/ + std::min(max_file_size - Header::kHeaderSize, + pre_mapping_mmap_size))); + + return std::unique_ptr<FileBackedVector<T>>(new FileBackedVector<T>( + filesystem, file_path, std::move(header), std::move(mmapped_file))); } template <typename T> @@ -612,7 +633,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>> FileBackedVector<T>::InitializeExistingFile( const Filesystem& filesystem, const std::string& file_path, const ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy, - int32_t max_file_size) { + int32_t max_file_size, int32_t pre_mapping_mmap_size) { int64_t file_size = filesystem.GetFileSize(file_path.c_str()); if (file_size == Filesystem::kBadFileSize) { return absl_ports::InternalError( @@ -662,14 +683,23 @@ FileBackedVector<T>::InitializeExistingFile( // Mmap the content of the vector, excluding the header so its easier to // access elements from the mmapped region - auto mmapped_file = - std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy); - ICING_RETURN_IF_ERROR(mmapped_file->Remap(Header::kHeaderSize, - file_size - Header::kHeaderSize)); + // Although users can specify their own pre_mapping_mmap_size, we should make + // sure that the pre-map size is at least file_size - Header::kHeaderSize to + // make all existing elements available. + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create( + filesystem, file_path, mmap_strategy, max_file_size, + /*pre_mapping_file_offset=*/Header::kHeaderSize, + /*pre_mapping_mmap_size=*/ + std::max( + file_size - Header::kHeaderSize, + static_cast<int64_t>(std::min(max_file_size - Header::kHeaderSize, + pre_mapping_mmap_size))))); // Check vector contents Crc32 vector_checksum( - std::string_view(reinterpret_cast<const char*>(mmapped_file->region()), + std::string_view(reinterpret_cast<const char*>(mmapped_file.region()), header->num_elements * kElementTypeSize)); if (vector_checksum.Get() != header->vector_checksum) { @@ -677,9 +707,8 @@ FileBackedVector<T>::InitializeExistingFile( absl_ports::StrCat("Invalid vector contents for ", file_path)); } - return std::unique_ptr<FileBackedVector<T>>( - new FileBackedVector<T>(filesystem, file_path, std::move(header), - std::move(mmapped_file), max_file_size)); + return std::unique_ptr<FileBackedVector<T>>(new FileBackedVector<T>( + filesystem, file_path, std::move(header), std::move(mmapped_file))); } template <typename T> @@ -693,16 +722,16 @@ libtextclassifier3::Status FileBackedVector<T>::Delete( } template <typename T> -FileBackedVector<T>::FileBackedVector( - const Filesystem& filesystem, const std::string& file_path, - std::unique_ptr<Header> header, - std::unique_ptr<MemoryMappedFile> mmapped_file, int32_t max_file_size) +FileBackedVector<T>::FileBackedVector(const Filesystem& filesystem, + const std::string& file_path, + std::unique_ptr<Header> header, + MemoryMappedFile&& mmapped_file) : filesystem_(&filesystem), file_path_(file_path), header_(std::move(header)), - mmapped_file_(std::move(mmapped_file)), - changes_end_(header_->num_elements), - max_file_size_(max_file_size) {} + mmapped_file_( + std::make_unique<MemoryMappedFile>(std::move(mmapped_file))), + changes_end_(header_->num_elements) {} template <typename T> FileBackedVector<T>::~FileBackedVector() { @@ -831,8 +860,9 @@ FileBackedVector<T>::Allocate(int32_t len) { } // Although header_->num_elements + len doesn't exceed kMaxNumElements, the - // actual max # of elements are determined by max_file_size, kElementTypeSize, - // and kHeaderSize. Thus, it is still possible to fail to grow the file. + // actual max # of elements are determined by mmapped_file_->max_file_size(), + // kElementTypeSize, and kHeaderSize. Thus, it is still possible to fail to + // grow the file. ICING_RETURN_IF_ERROR(GrowIfNecessary(header_->num_elements + len)); int32_t start_idx = header_->num_elements; @@ -853,62 +883,35 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary( return libtextclassifier3::Status::OK; } - if (num_elements > - (max_file_size_ - Header::kHeaderSize) / kElementTypeSize) { + if (num_elements > (mmapped_file_->max_file_size() - Header::kHeaderSize) / + kElementTypeSize) { return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf( "%d elements total size exceed maximum bytes of elements allowed, " "%d bytes", - num_elements, max_file_size_ - Header::kHeaderSize)); + num_elements, mmapped_file_->max_file_size() - Header::kHeaderSize)); } - int32_t least_element_file_size_needed = - num_elements * kElementTypeSize; // Won't overflow - if (least_element_file_size_needed <= mmapped_file_->region_size()) { - // Our mmapped region can hold the target num_elements cause we've grown - // before + int32_t least_file_size_needed = + Header::kHeaderSize + num_elements * kElementTypeSize; // Won't overflow + if (least_file_size_needed <= mmapped_file_->available_size()) { return libtextclassifier3::Status::OK; } - int32_t least_file_size_needed = - Header::kHeaderSize + least_element_file_size_needed; - // Otherwise, we need to grow. Grow to kGrowElements boundary. - // Note that we need to use int64_t here, since int32_t might overflow after - // round up. int64_t round_up_file_size_needed = math_util::RoundUpTo( int64_t{least_file_size_needed}, int64_t{FileBackedVector<T>::kGrowElements} * kElementTypeSize); - least_file_size_needed = - std::min(round_up_file_size_needed, int64_t{max_file_size_}); - - // Get the actual file size here. - int64_t current_file_size = filesystem_->GetFileSize(file_path_.c_str()); - if (current_file_size == Filesystem::kBadFileSize) { - return absl_ports::InternalError("Unable to retrieve file size."); - } - - // We use PWrite here rather than Grow because Grow doesn't actually allocate - // an underlying disk block. This can lead to problems with mmap because mmap - // has no effective way to signal that it was impossible to allocate the disk - // block and ends up crashing instead. PWrite will force the allocation of - // these blocks, which will ensure that any failure to grow will surface here. - int64_t page_size = getpagesize(); - auto buf = std::make_unique<uint8_t[]>(page_size); - int64_t size_to_write = std::min(page_size - (current_file_size % page_size), - max_file_size_ - current_file_size); - ScopedFd sfd(filesystem_->OpenForWrite(file_path_.c_str())); - while (size_to_write > 0 && current_file_size < least_file_size_needed) { - if (!filesystem_->PWrite(sfd.get(), current_file_size, buf.get(), - size_to_write)) { - return absl_ports::InternalError( - absl_ports::StrCat("Couldn't grow file ", file_path_)); - } - current_file_size += size_to_write; - size_to_write = std::min(page_size - (current_file_size % page_size), - max_file_size_ - current_file_size); - } - ICING_RETURN_IF_ERROR(mmapped_file_->Remap( - Header::kHeaderSize, least_file_size_needed - Header::kHeaderSize)); + // Call GrowAndRemapIfNecessary. It handles file growth internally and remaps + // intelligently. + // We've ensured that least_file_size_needed (for num_elements) doesn't exceed + // mmapped_file_->max_file_size(), but it is still possible that + // round_up_file_size_needed exceeds it, so use the smaller value of them as + // new_mmap_size. + ICING_RETURN_IF_ERROR(mmapped_file_->GrowAndRemapIfNecessary( + /*new_file_offset=*/Header::kHeaderSize, + /*new_mmap_size=*/std::min(round_up_file_size_needed, + mmapped_file_->max_file_size()) - + Header::kHeaderSize)); return libtextclassifier3::Status::OK; } diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc index 74e4132..c526dec 100644 --- a/icing/file/file-backed-vector_test.cc +++ b/icing/file/file-backed-vector_test.cc @@ -36,14 +36,12 @@ #include "icing/util/crc32.h" #include "icing/util/logging.h" -using ::testing::DoDefault; using ::testing::ElementsAre; using ::testing::Eq; using ::testing::IsTrue; using ::testing::Lt; using ::testing::Not; using ::testing::Pointee; -using ::testing::Return; using ::testing::SizeIs; namespace icing { @@ -1231,92 +1229,35 @@ TEST_F(FileBackedVectorTest, InitNormalSucceeds) { } } -TEST_F(FileBackedVectorTest, RemapFailureStillValidInstance) { - auto mock_filesystem = std::make_unique<MockFilesystem>(); - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<FileBackedVector<int>> vector, - FileBackedVector<int>::Create( - *mock_filesystem, file_path_, - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); - - // 1. Write data to just before the first block resize. Running the test - // locally has determined that we'll first resize at 65531st entry. - constexpr int kResizingIndex = 16378; - for (int i = 0; i < kResizingIndex; ++i) { - ICING_ASSERT_OK(vector->Set(i, 7)); +TEST_F(FileBackedVectorTest, InitFromExistingFileShouldPreMapAtLeastFileSize) { + { + // 1. Create a vector with a few elements. + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + FileBackedVector<char>::kMaxFileSize)); + Insert(vector.get(), 10000, "A"); + Insert(vector.get(), 10001, "Z"); + ASSERT_THAT(vector->PersistToDisk(), IsOk()); } - // 2. The next Set call should cause a resize and a remap. Make that remap - // fail. - int num_calls = 0; - auto open_lambda = [this, &num_calls](const char* file_name) { - if (++num_calls == 2) { - return -1; - } - return this->filesystem().OpenForWrite(file_name); - }; - ON_CALL(*mock_filesystem, OpenForWrite(_)).WillByDefault(open_lambda); - EXPECT_THAT(vector->Set(kResizingIndex, 7), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); - - // 3. We should still be able to call set correctly for earlier regions. - ICING_EXPECT_OK(vector->Set(kResizingIndex / 2, 9)); - EXPECT_THAT(vector->Get(kResizingIndex / 2), IsOkAndHolds(Pointee(Eq(9)))); -} - -TEST_F(FileBackedVectorTest, BadFileSizeDuringGrowReturnsError) { - auto mock_filesystem = std::make_unique<MockFilesystem>(); - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<FileBackedVector<int>> vector, - FileBackedVector<int>::Create( - *mock_filesystem, file_path_, - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); - - // At first, the vector is empty and has no mapping established. The first Set - // call will cause a Grow. - // During Grow, we will attempt to check the underlying file size to see if - // growing is actually necessary. Return an error on the call to GetFileSize. - ON_CALL(*mock_filesystem, GetFileSize(A<const char*>())) - .WillByDefault(Return(Filesystem::kBadFileSize)); - - // We should fail gracefully and return an INTERNAL error to indicate that - // there was an issue retrieving the file size. - EXPECT_THAT(vector->Set(0, 7), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); -} - -TEST_F(FileBackedVectorTest, PWriteFailsInTheSecondRound) { - auto mock_filesystem = std::make_unique<MockFilesystem>(); - ICING_ASSERT_OK_AND_ASSIGN( - std::unique_ptr<FileBackedVector<int>> vector, - FileBackedVector<int>::Create( - *mock_filesystem, file_path_, - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); - - // At first, the vector is empty and has no mapping established. The first Set - // call will cause a Grow. - // During Grow, we call PWrite for several rounds to grow the file. Mock - // PWrite to succeed in the first round, fail in the second round, and succeed - // in the rest rounds. - - // This unit test checks if we check file size and Remap properly. If the - // first PWrite succeeds but the second PWrite fails, then the file size has - // been grown, but there will be no Remap for the MemoryMappedFile. Then, - // the next several Append() won't require file growth since the file size has - // been grown, but it causes memory error because we haven't remapped. - EXPECT_CALL(*mock_filesystem, - PWrite(A<int>(), A<off_t>(), A<const void*>(), A<size_t>())) - .WillOnce(DoDefault()) - .WillOnce(Return(false)) - .WillRepeatedly(DoDefault()); - - EXPECT_THAT(vector->Append(7), - StatusIs(libtextclassifier3::StatusCode::INTERNAL)); - EXPECT_THAT(vector->Get(/*idx=*/0), - StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); - - EXPECT_THAT(vector->Append(7), IsOk()); - EXPECT_THAT(vector->Get(/*idx=*/0), IsOkAndHolds(Pointee(Eq(7)))); + { + // 2. Attempt to create the file with pre_mapping_mmap_size < file_size. It + // should still pre-map file_size, so we can pass the checksum + // verification when initializing and get the correct contents. + int64_t file_size = filesystem_.GetFileSize(file_path_.c_str()); + int pre_mapping_mmap_size = 10; + ASSERT_THAT(pre_mapping_mmap_size, Lt(file_size)); + ICING_ASSERT_OK_AND_ASSIGN( + std::unique_ptr<FileBackedVector<char>> vector, + FileBackedVector<char>::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + FileBackedVector<char>::kMaxFileSize, pre_mapping_mmap_size)); + EXPECT_THAT(Get(vector.get(), /*idx=*/10000, /*expected_len=*/2), Eq("AZ")); + } } } // namespace diff --git a/icing/file/memory-mapped-file-leak_test.cc b/icing/file/memory-mapped-file-leak_test.cc index 598fb61..ff031df 100644 --- a/icing/file/memory-mapped-file-leak_test.cc +++ b/icing/file/memory-mapped-file-leak_test.cc @@ -12,12 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "icing/file/memory-mapped-file.h" - #include "perftools/profiles/collector/heap/alloc_recorder.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include "icing/file/filesystem.h" +#include "icing/file/memory-mapped-file.h" #include "icing/testing/common-matchers.h" #include "icing/testing/recorder-test-utils.h" #include "icing/testing/tmp-directory.h" @@ -40,8 +39,15 @@ TEST(MemoryMappedFileTest, MMapMemoryLeak) { { std::string mmfile_dir = test_dir + "/file"; ASSERT_TRUE(filesystem.CreateDirectoryRecursively(mmfile_dir.c_str())); - MemoryMappedFile mmfile(filesystem, mmfile_dir + "/mmfile", - MemoryMappedFile::READ_WRITE_AUTO_SYNC); + + // Don't use ICING_ASSERT_OK_AND_ASSIGN or matcher IsOk to prevent + // unnecessary implicit heap memory allocation in these macros. + libtextclassifier3::StatusOr<MemoryMappedFile> mmfile_or = + MemoryMappedFile::Create(filesystem, mmfile_dir + "/mmfile", + MemoryMappedFile::READ_WRITE_AUTO_SYNC); + ASSERT_TRUE(mmfile_or.ok()); + MemoryMappedFile mmfile = std::move(mmfile_or).ValueOrDie(); + // How this works: // We request a 500-byte mapping starting at the 101st byte of the file. // But(!), mmap only accepts offsets that are multiples of page size. So diff --git a/icing/file/memory-mapped-file.cc b/icing/file/memory-mapped-file.cc index fc13a79..43ed030 100644 --- a/icing/file/memory-mapped-file.cc +++ b/icing/file/memory-mapped-file.cc @@ -12,30 +12,96 @@ // See the License for the specific language governing permissions and // limitations under the License. -// TODO(cassiewang) Add unit-tests to this class. - #include "icing/file/memory-mapped-file.h" #include <sys/mman.h> #include <cerrno> +#include <cinttypes> +#include <memory> #include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/absl_ports/canonical_errors.h" #include "icing/absl_ports/str_cat.h" #include "icing/file/filesystem.h" #include "icing/legacy/core/icing-string-util.h" #include "icing/util/math-util.h" +#include "icing/util/status-macros.h" namespace icing { namespace lib { +/* static */ libtextclassifier3::StatusOr<MemoryMappedFile> +MemoryMappedFile::Create(const Filesystem& filesystem, + std::string_view file_path, Strategy mmap_strategy, + int64_t max_file_size) { + if (max_file_size <= 0 || max_file_size > kMaxFileSize) { + return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf( + "Invalid max file size %" PRId64 " for MemoryMappedFile", + max_file_size)); + } + + const std::string file_path_str(file_path); + int64_t file_size = filesystem.FileExists(file_path_str.c_str()) + ? filesystem.GetFileSize(file_path_str.c_str()) + : 0; + if (file_size == Filesystem::kBadFileSize) { + return absl_ports::InternalError( + absl_ports::StrCat("Bad file size for file ", file_path)); + } + + return MemoryMappedFile(filesystem, file_path, mmap_strategy, max_file_size, + file_size); +} + +/* static */ libtextclassifier3::StatusOr<MemoryMappedFile> +MemoryMappedFile::Create(const Filesystem& filesystem, + std::string_view file_path, Strategy mmap_strategy, + int64_t max_file_size, int64_t pre_mapping_file_offset, + int64_t pre_mapping_mmap_size) { + if (max_file_size <= 0 || max_file_size > kMaxFileSize) { + return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf( + "Invalid max file size %" PRId64 " for MemoryMappedFile", + max_file_size)); + } + + // We need at least pre_mapping_file_offset + pre_mapping_mmap_size bytes for + // the underlying file size, so max_file_size should be at least + // pre_mapping_file_offset + pre_mapping_mmap_size. Safe integer check. + if (pre_mapping_file_offset < 0 || pre_mapping_mmap_size < 0 || + pre_mapping_file_offset > max_file_size - pre_mapping_mmap_size) { + return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf( + "Invalid pre-mapping file offset %" PRId64 " and mmap size %" PRId64 + " with max file size %" PRId64 "for MemoryMappedFile", + pre_mapping_file_offset, pre_mapping_mmap_size, max_file_size)); + } + + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile mmapped_file, + Create(filesystem, file_path, mmap_strategy, max_file_size)); + + if (pre_mapping_mmap_size > 0) { + ICING_RETURN_IF_ERROR( + mmapped_file.RemapImpl(pre_mapping_file_offset, pre_mapping_mmap_size)); + } + + return std::move(mmapped_file); +} + MemoryMappedFile::MemoryMappedFile(const Filesystem& filesystem, - const std::string_view file_path, - Strategy mmap_strategy) + std::string_view file_path, + Strategy mmap_strategy, + int64_t max_file_size, int64_t file_size) : filesystem_(&filesystem), file_path_(file_path), - strategy_(mmap_strategy) {} + strategy_(mmap_strategy), + max_file_size_(max_file_size), + file_size_(file_size), + mmap_result_(nullptr), + file_offset_(0), + mmap_size_(0), + alignment_adjustment_(0) {} MemoryMappedFile::MemoryMappedFile(MemoryMappedFile&& other) // Make sure that mmap_result_ is a nullptr before we call Swap. We don't @@ -58,82 +124,44 @@ MemoryMappedFile::~MemoryMappedFile() { Unmap(); } void MemoryMappedFile::MemoryMappedFile::Unmap() { if (mmap_result_ != nullptr) { - munmap(mmap_result_, adjusted_mmap_size_); + munmap(mmap_result_, adjusted_mmap_size()); mmap_result_ = nullptr; } file_offset_ = 0; - region_ = nullptr; - region_size_ = 0; - adjusted_mmap_size_ = 0; + mmap_size_ = 0; + alignment_adjustment_ = 0; } -libtextclassifier3::Status MemoryMappedFile::Remap(size_t file_offset, - size_t mmap_size) { - if (mmap_size == 0) { - // First unmap any previously mmapped region. - Unmap(); - return libtextclassifier3::Status::OK; - } - - size_t aligned_offset = - math_util::RoundDownTo(file_offset, system_page_size()); - size_t alignment_adjustment = file_offset - aligned_offset; - size_t adjusted_mmap_size = alignment_adjustment + mmap_size; +libtextclassifier3::Status MemoryMappedFile::Remap(int64_t file_offset, + int64_t mmap_size) { + return RemapImpl(file_offset, mmap_size); +} - int mmap_flags = 0; - // Determines if the mapped region should just be readable or also writable. - int protection_flags = 0; - ScopedFd fd; - switch (strategy_) { - case Strategy::READ_ONLY: { - mmap_flags = MAP_PRIVATE; - protection_flags = PROT_READ; - fd.reset(filesystem_->OpenForRead(file_path_.c_str())); - break; - } - case Strategy::READ_WRITE_AUTO_SYNC: { - mmap_flags = MAP_SHARED; - protection_flags = PROT_READ | PROT_WRITE; - fd.reset(filesystem_->OpenForWrite(file_path_.c_str())); - break; - } - case Strategy::READ_WRITE_MANUAL_SYNC: { - mmap_flags = MAP_PRIVATE; - protection_flags = PROT_READ | PROT_WRITE; - // TODO(cassiewang) MAP_PRIVATE effectively makes it a read-only file. - // figure out if we can open this file in read-only mode. - fd.reset(filesystem_->OpenForWrite(file_path_.c_str())); - break; - } - default: - return absl_ports::UnknownError(IcingStringUtil::StringPrintf( - "Invalid value in switch statement: %d", strategy_)); +libtextclassifier3::Status MemoryMappedFile::GrowAndRemapIfNecessary( + int64_t new_file_offset, int64_t new_mmap_size) { + // We need at least new_file_offset + new_mmap_size bytes for the underlying + // file size, and it should not exceed max_file_size_. Safe integer check. + if (new_file_offset < 0 || new_mmap_size < 0 || + new_file_offset > max_file_size_ - new_mmap_size) { + return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf( + "Invalid new file offset %" PRId64 " and new mmap size %" PRId64 + " with max file size %" PRId64 "for MemoryMappedFile", + new_file_offset, new_mmap_size, max_file_size_)); } - if (!fd.is_valid()) { - return absl_ports::InternalError(absl_ports::StrCat( - "Unable to open file meant to be mmapped: ", file_path_)); + if (new_mmap_size == 0) { + // Unmap any previously mmapped region. + Unmap(); + return libtextclassifier3::Status::OK; } - void* mmap_result = mmap(nullptr, adjusted_mmap_size, protection_flags, - mmap_flags, fd.get(), aligned_offset); + ICING_RETURN_IF_ERROR(GrowFileSize(new_file_offset + new_mmap_size)); - if (mmap_result == MAP_FAILED) { - mmap_result = nullptr; - return absl_ports::InternalError(absl_ports::StrCat( - "Failed to mmap region due to error: ", strerror(errno))); + if (new_file_offset != file_offset_ || new_mmap_size > mmap_size_) { + ICING_RETURN_IF_ERROR(RemapImpl(new_file_offset, new_mmap_size)); } - // Now we know that we have successfully created a new mapping. We can free - // the old one and switch to the new one. - Unmap(); - - mmap_result_ = mmap_result; - file_offset_ = file_offset; - region_ = reinterpret_cast<char*>(mmap_result_) + alignment_adjustment; - region_size_ = mmap_size; - adjusted_mmap_size_ = adjusted_mmap_size; return libtextclassifier3::Status::OK; } @@ -143,13 +171,27 @@ libtextclassifier3::Status MemoryMappedFile::PersistToDisk() { "Attempting to PersistToDisk on a read-only file: ", file_path_)); } - if (region_ == nullptr) { + if (mmap_result_ == nullptr) { // Nothing mapped to sync. return libtextclassifier3::Status::OK; } + // Sync actual file size via system call. + int64_t actual_file_size = filesystem_->GetFileSize(file_path_.c_str()); + if (actual_file_size == Filesystem::kBadFileSize) { + return absl_ports::InternalError("Unable to retrieve file size"); + } + file_size_ = actual_file_size; + if (strategy_ == Strategy::READ_WRITE_AUTO_SYNC && - msync(mmap_result_, adjusted_mmap_size_, MS_SYNC) != 0) { + // adjusted_mmap_size(), which is the mmap size after alignment + // adjustment, may be larger than the actual underlying file size since we + // can pre-mmap a large memory region before growing the file. Therefore, + // we should std::min with file_size_ - adjusted_offset() as the msync + // size. + msync(mmap_result_, + std::min(file_size_ - adjusted_offset(), adjusted_mmap_size()), + MS_SYNC) != 0) { return absl_ports::InternalError( absl_ports::StrCat("Unable to sync file using msync(): ", file_path_)); } @@ -159,7 +201,13 @@ libtextclassifier3::Status MemoryMappedFile::PersistToDisk() { // can't be synced using msync(). So, we have to directly write to the // underlying file to update it. if (strategy_ == Strategy::READ_WRITE_MANUAL_SYNC && - !filesystem_->PWrite(file_path_.c_str(), 0, region(), region_size())) { + // Contents before file_offset_ won't be modified by the caller, so we + // only need to PWrite contents starting at file_offset_. mmap_size_ may + // be larger than the actual underlying file size since we can pre-mmap a + // large memory before growing the file. Therefore, we should std::min + // with file_size_ - file_offset_ as the PWrite size. + !filesystem_->PWrite(file_path_.c_str(), file_offset_, region(), + std::min(mmap_size_, file_size_ - file_offset_))) { return absl_ports::InternalError( absl_ports::StrCat("Unable to sync file using PWrite(): ", file_path_)); } @@ -180,22 +228,161 @@ libtextclassifier3::Status MemoryMappedFile::OptimizeFor( madvise_flag = MADV_SEQUENTIAL; } - if (madvise(mmap_result_, adjusted_mmap_size_, madvise_flag) != 0) { + if (madvise(mmap_result_, adjusted_mmap_size(), madvise_flag) != 0) { return absl_ports::InternalError(absl_ports::StrCat( "Unable to madvise file ", file_path_, "; Error: ", strerror(errno))); } return libtextclassifier3::Status::OK; } +libtextclassifier3::Status MemoryMappedFile::GrowFileSize( + int64_t new_file_size) { + // Early return if new_file_size doesn't exceed the cached file size + // (file_size_). It saves a system call for getting the actual file size and + // reduces latency significantly. + if (new_file_size <= file_size_) { + return libtextclassifier3::Status::OK; + } + + if (new_file_size > max_file_size_) { + return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf( + "new file size %" PRId64 " exceeds maximum file size allowed, %" PRId64 + " bytes", + new_file_size, max_file_size_)); + } + + // Sync actual file size via system call. + int64_t actual_file_size = filesystem_->GetFileSize(file_path_.c_str()); + if (actual_file_size == Filesystem::kBadFileSize) { + return absl_ports::InternalError("Unable to retrieve file size"); + } + file_size_ = actual_file_size; + + // Early return again if new_file_size doesn't exceed actual_file_size. It + // saves system calls for opening and closing file descriptor. + if (new_file_size <= actual_file_size) { + return libtextclassifier3::Status::OK; + } + + if (strategy_ == Strategy::READ_ONLY) { + return absl_ports::FailedPreconditionError(absl_ports::StrCat( + "Attempting to grow a read-only file: ", file_path_)); + } + + // We use Write here rather than Grow because Grow doesn't actually allocate + // an underlying disk block. This can lead to problems with mmap because mmap + // has no effective way to signal that it was impossible to allocate the disk + // block and ends up crashing instead. Write will force the allocation of + // these blocks, which will ensure that any failure to grow will surface here. + int64_t page_size = system_page_size(); + auto buf = std::make_unique<uint8_t[]>(page_size); + int64_t size_to_write = std::min(page_size - (file_size_ % page_size), + new_file_size - file_size_); + ScopedFd sfd(filesystem_->OpenForAppend(file_path_.c_str())); + if (!sfd.is_valid()) { + return absl_ports::InternalError( + absl_ports::StrCat("Couldn't open file ", file_path_)); + } + while (size_to_write > 0 && file_size_ < new_file_size) { + if (!filesystem_->Write(sfd.get(), buf.get(), size_to_write)) { + return absl_ports::InternalError( + absl_ports::StrCat("Couldn't grow file ", file_path_)); + } + file_size_ += size_to_write; + size_to_write = std::min(page_size - (file_size_ % page_size), + new_file_size - file_size_); + } + + return libtextclassifier3::Status::OK; +} + +libtextclassifier3::Status MemoryMappedFile::RemapImpl(int64_t new_file_offset, + int64_t new_mmap_size) { + if (new_file_offset < 0) { + return absl_ports::OutOfRangeError("Invalid file offset"); + } + + if (new_mmap_size < 0) { + return absl_ports::OutOfRangeError("Invalid mmap size"); + } + + if (new_mmap_size == 0) { + // First unmap any previously mmapped region. + Unmap(); + return libtextclassifier3::Status::OK; + } + + int64_t new_aligned_offset = + math_util::RoundDownTo(new_file_offset, system_page_size()); + int64_t new_alignment_adjustment = new_file_offset - new_aligned_offset; + int64_t new_adjusted_mmap_size = new_alignment_adjustment + new_mmap_size; + + int mmap_flags = 0; + // Determines if the mapped region should just be readable or also writable. + int protection_flags = 0; + ScopedFd fd; + switch (strategy_) { + case Strategy::READ_ONLY: { + mmap_flags = MAP_PRIVATE; + protection_flags = PROT_READ; + fd.reset(filesystem_->OpenForRead(file_path_.c_str())); + break; + } + case Strategy::READ_WRITE_AUTO_SYNC: { + mmap_flags = MAP_SHARED; + protection_flags = PROT_READ | PROT_WRITE; + fd.reset(filesystem_->OpenForWrite(file_path_.c_str())); + break; + } + case Strategy::READ_WRITE_MANUAL_SYNC: { + mmap_flags = MAP_PRIVATE; + protection_flags = PROT_READ | PROT_WRITE; + // TODO(cassiewang) MAP_PRIVATE effectively makes it a read-only file. + // figure out if we can open this file in read-only mode. + fd.reset(filesystem_->OpenForWrite(file_path_.c_str())); + break; + } + default: + return absl_ports::UnknownError(IcingStringUtil::StringPrintf( + "Invalid value in switch statement: %d", strategy_)); + } + + if (!fd.is_valid()) { + return absl_ports::InternalError(absl_ports::StrCat( + "Unable to open file meant to be mmapped: ", file_path_)); + } + + void* new_mmap_result = + mmap(nullptr, new_adjusted_mmap_size, protection_flags, mmap_flags, + fd.get(), new_aligned_offset); + + if (new_mmap_result == MAP_FAILED) { + new_mmap_result = nullptr; + return absl_ports::InternalError(absl_ports::StrCat( + "Failed to mmap region due to error: ", strerror(errno))); + } + + // Now we know that we have successfully created a new mapping. We can free + // the old one and switch to the new one. + Unmap(); + + mmap_result_ = new_mmap_result; + file_offset_ = new_file_offset; + mmap_size_ = new_mmap_size; + alignment_adjustment_ = new_alignment_adjustment; + return libtextclassifier3::Status::OK; +} + void MemoryMappedFile::Swap(MemoryMappedFile* other) { std::swap(filesystem_, other->filesystem_); std::swap(file_path_, other->file_path_); std::swap(strategy_, other->strategy_); - std::swap(file_offset_, other->file_offset_); - std::swap(region_, other->region_); - std::swap(region_size_, other->region_size_); - std::swap(adjusted_mmap_size_, other->adjusted_mmap_size_); + std::swap(max_file_size_, other->max_file_size_); + std::swap(file_size_, other->file_size_); std::swap(mmap_result_, other->mmap_result_); + std::swap(file_offset_, other->file_offset_); + std::swap(mmap_size_, other->mmap_size_); + std::swap(alignment_adjustment_, other->alignment_adjustment_); } } // namespace lib diff --git a/icing/file/memory-mapped-file.h b/icing/file/memory-mapped-file.h index 1d31a42..54507af 100644 --- a/icing/file/memory-mapped-file.h +++ b/icing/file/memory-mapped-file.h @@ -21,18 +21,54 @@ // faster reads as well as background-sync vs manual-sync of changes to disk. // For more details, see comments at MemoryMappedFile::Strategy. // -// Usage: +// ** Usage 1: pre-mmap large memory and grow the underlying file internally ** // -// MemoryMappedFile mmapped_file(filesystem, "/file.pb", READ_WRITE_AUTO_SYNC)); -// mmapped_file->Remap(0, 16* 1024); // load the first 16K of the file. +// // Create MemoryMappedFile instance. +// ICING_ASSIGN_OR_RETURN( +// std::unique_ptr<MemoryMappedFile> mmapped_file, +// MemoryMappedFile::Create(filesystem, "/file.pb", +// READ_WRITE_AUTO_SYNC, +// max_file_size, +// /*pre_mapping_file_offset=*/0, +// /*pre_mapping_mmap_size=*/1024 * 1024)); // +// // Found that we need 4K bytes for the file and mmapped region. +// mmapped_file->GrowAndRemapIfNecessary( +// /*new_file_offset=*/0, /*new_mmap_size=*/4 * 1024); +// char read_byte = mmapped_file->region()[4000]; +// mmapped_file->mutable_region()[4001] = write_byte; +// +// mmapped_file->PersistToDisk(); // Optional; immediately writes changes to +// disk. +// +// // Found that we need 2048 * 1024 bytes for the file and mmapped region. +// mmapped_file->GrowAndRemapIfNecessary( +// /*new_file_offset=*/0, /*new_mmap_size=*/2048 * 1024); +// mmapped_file->mutable_region()[2000 * 1024] = write_byte; +// mmapped_file.reset(); +// +// ** Usage 2: load by segments ** +// +// ICING_ASSIGN_OR_RETURN( +// std::unique_ptr<MemoryMappedFile> mmapped_file, +// MemoryMappedFile::Create(filesystem, "/file.pb", +// READ_WRITE_AUTO_SYNC, +// max_file_size, +// /*pre_mapping_file_offset=*/0, +// /*pre_mapping_mmap_size=*/16 * 1024)); +// +// // load the first 16K. +// mmapped_file->GrowAndRemapIfNecessary( +// /*new_file_offset=*/0, /*new_mmap_size=*/16 * 1024); // char read_byte = mmapped_file->region()[100]; // mmapped_file->mutable_region()[10] = write_byte; // // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to // disk. // -// mmapped_file->Remap(16*1024, 16* 1024); // load the next 16K. +// // load the next 16K. +// mmapped_file->GrowAndRemapIfNecessary( +// /*new_file_offset=*/16 * 1024, /*new_mmap_size=*/16 * 1024); // mmapped_file->mutable_region()[10] = write_byte; // mmapped_file.reset(); @@ -41,12 +77,14 @@ #include <unistd.h> +#include <algorithm> #include <cstdint> #include <memory> #include <string> #include <string_view> #include "icing/text_classifier/lib3/utils/base/status.h" +#include "icing/text_classifier/lib3/utils/base/statusor.h" #include "icing/file/filesystem.h" namespace icing { @@ -54,8 +92,9 @@ namespace lib { class MemoryMappedFile { public: - static size_t __attribute__((const)) system_page_size() { - static const size_t page_size = sysconf(_SC_PAGE_SIZE); + static int64_t __attribute__((const)) system_page_size() { + static const int64_t page_size = + static_cast<int64_t>(sysconf(_SC_PAGE_SIZE)); return page_size; } @@ -71,26 +110,95 @@ class MemoryMappedFile { // Memory map a read-write file into a writable memory region. Changes made // to this region will never be auto-synced to the underlying file. Unless // the caller explicitly calls PersistToDisk(), all changes will be lost - // when the - // MemoryMappedFile is destroyed. + // when the MemoryMappedFile is destroyed. READ_WRITE_MANUAL_SYNC, }; - // file_path : Full path of the file that needs to be memory-mapped. - MemoryMappedFile(const Filesystem& filesystem, std::string_view file_path, - Strategy mmap_strategy); + // Absolute max file size, 16 GiB. + static constexpr int64_t kMaxFileSize = INT64_C(1) << 34; + + // Default max file size, 1 MiB. + static constexpr int64_t kDefaultMaxFileSize = INT64_C(1) << 20; + + // Creates a new MemoryMappedFile to read/write content to. + // + // filesystem : Object to make system level calls + // file_path : Full path of the file that needs to be memory-mapped. + // mmap_strategy : Strategy/optimizations to access the content. + // max_file_size : Maximum file size for MemoryMappedFile, default + // kDefaultMaxFileSize. + // + // Returns: + // A MemoryMappedFile instance on success + // OUT_OF_RANGE_ERROR if max_file_size is invalid + // INTERNAL_ERROR on I/O error + static libtextclassifier3::StatusOr<MemoryMappedFile> Create( + const Filesystem& filesystem, std::string_view file_path, + Strategy mmap_strategy, int64_t max_file_size = kDefaultMaxFileSize); + + // Creates a new MemoryMappedFile to read/write content to. It remaps when + // creating the instance, but doesn't check or grow the actual file size, so + // the caller should call GrowAndRemapIfNecessary before accessing region. + // + // filesystem : Object to make system level calls + // file_path : Full path of the file that needs to be memory-mapped. + // mmap_strategy : Strategy/optimizations to access the content. + // max_file_size : Maximum file size for MemoryMappedFile. + // pre_mapping_file_offset : The offset of the file to be memory mapped. + // pre_mapping_mmap_size : mmap size for pre-mapping. + // + // Returns: + // A MemoryMappedFile instance on success + // OUT_OF_RANGE_ERROR if max_file_size, file_offset, or mmap_size is invalid + // INTERNAL_ERROR on I/O error + static libtextclassifier3::StatusOr<MemoryMappedFile> Create( + const Filesystem& filesystem, std::string_view file_path, + Strategy mmap_strategy, int64_t max_file_size, + int64_t pre_mapping_file_offset, int64_t pre_mapping_mmap_size); + + // Delete copy constructor and assignment operator. MemoryMappedFile(const MemoryMappedFile& other) = delete; - MemoryMappedFile(MemoryMappedFile&& other); MemoryMappedFile& operator=(const MemoryMappedFile& other) = delete; + + MemoryMappedFile(MemoryMappedFile&& other); MemoryMappedFile& operator=(MemoryMappedFile&& other); + // Frees any region that is still memory-mapped region. ~MemoryMappedFile(); + // TODO(b/247671531): migrate all callers to use GrowAndRemapIfNecessary and + // deprecate this API. + // // Memory-map the newly specified region within the file specified by // file_offset and mmap_size. Unmaps any previously mmapped region. + // It doesn't handle the underlying file growth. // // Returns any encountered IO error. - libtextclassifier3::Status Remap(size_t file_offset, size_t mmap_size); + libtextclassifier3::Status Remap(int64_t file_offset, int64_t mmap_size); + + // Attempt to memory-map the newly specified region within the file specified + // by new_file_offset and new_mmap_size. It handles mmap and file growth + // intelligently. + // - Compute least file size needed according to new_file_offset and + // new_mmap_size, and compare with the current file size. If requiring file + // growth, then grow the underlying file (Write) or return error if + // strategy_ is READ_ONLY. + // - If new_file_offset is different from the current file_offset_ or + // new_mmap_size is greater than the current mmap_size_, then memory-map + // the newly specified region and unmap any previously mmapped region. + // + // This API is useful for file growth since it grows the underlying file + // internally and handles remapping intelligently. By pre-mmapping a large + // memory, we only need to grow the underlying file (Write) without remapping + // in each round of growth, which significantly reduces the cost of system + // call and memory paging after remap. + // + // Returns: + // OK on success + // OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid + // Any error from GrowFileSize() and RemapImpl() + libtextclassifier3::Status GrowAndRemapIfNecessary(int64_t new_file_offset, + int64_t new_mmap_size); // unmap and free-up the region that has currently been memory mapped. void Unmap(); @@ -129,35 +237,147 @@ class MemoryMappedFile { }; libtextclassifier3::Status OptimizeFor(AccessPattern access_pattern); + Strategy strategy() const { return strategy_; } + + int64_t max_file_size() const { return max_file_size_; } + // Accessors to the memory-mapped region. Returns null if nothing is mapped. - const char* region() const { return region_; } - char* mutable_region() { return region_; } + const char* region() const { + return reinterpret_cast<const char*>(mmap_result_) + alignment_adjustment_; + } + char* mutable_region() { + return reinterpret_cast<char*>(mmap_result_) + alignment_adjustment_; + } - size_t region_size() const { return region_size_; } - Strategy strategy() const { return strategy_; } + int64_t file_offset() const { return file_offset_; } + + // TODO(b/247671531): remove this API after migrating all callers to use + // GrowAndRemapIfNecessary. + int64_t region_size() const { return mmap_size_; } + + // The size that is safe for the client to read/write. This is only valid for + // callers that use GrowAndRemapIfNecessary. + int64_t available_size() const { + return std::min(mmap_size_, + std::max(INT64_C(0), file_size_ - file_offset_)); + } private: + explicit MemoryMappedFile(const Filesystem& filesystem, + std::string_view file_path, Strategy mmap_strategy, + int64_t max_file_size, int64_t file_size); + + // Grow the underlying file to new_file_size. + // Note: it is possible that Write() (implemented in the file system call + // library) grows the underlying file partially and returns error due to + // failures, so the cached file_size_ may contain out-of-date value, but it is + // still guaranteed that file_size_ is always smaller or equal to the actual + // file size. In the next round of growing: + // - If new_file_size is not greater than file_size_, then we're still + // confident that the actual file size is large enough and therefore skip + // the grow process. + // - If new_file_size is greater than file_size_, then we will invoke the + // system call to sync the actual file size. At this moment, file_size_ is + // the actual file size and therefore we can grow the underlying file size + // correctly. + // + // Returns: + // OK on success + // FAILED_PRECONDITION_ERROR if requiring file growth and strategy_ is + // READ_ONLY + // OUT_OF_RANGE_ERROR if new_mmap_size exceeds max_file_size_ + // INTERNAL_ERROR on I/O error + libtextclassifier3::Status GrowFileSize(int64_t new_file_size); + + // Memory-map the newly specified region within the file specified by + // new_file_offset and new_mmap_size. Unmaps any previously mmapped region. + // It doesn't handle the underlying file growth. + // + // Returns: + // OK on success + // OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid + // INTERNAL_ERROR on I/O error + libtextclassifier3::Status RemapImpl(int64_t new_file_offset, + int64_t new_mmap_size); + // Swaps the contents of this with other. void Swap(MemoryMappedFile* other); + int64_t adjusted_offset() const { + return file_offset_ - alignment_adjustment_; + } + + int64_t adjusted_mmap_size() const { + return alignment_adjustment_ + mmap_size_; + } + // Cached constructor params. const Filesystem* filesystem_; std::string file_path_; Strategy strategy_; - // Offset within the file at which the current memory-mapped region starts. - size_t file_offset_ = 0; + // Raw file related fields: + // - max_file_size_ + // - file_size_ + + // Max file size for MemoryMappedFile. It should not exceed the absolute max + // size of memory mapped file (kMaxFileSize). It is only used in + // GrowAndRemapIfNecessary(), the new API that handles underlying file growth + // internally and remaps intelligently. + // + // Note: max_file_size_ will be specified in runtime and the caller should + // make sure its value is correct and reasonable. + int64_t max_file_size_; - // Region that is currently memory-mapped. - char* region_ = nullptr; - size_t region_size_ = 0; + // Cached file size to avoid calling system call too frequently. It is only + // used in GrowAndRemapIfNecessary(), the new API that handles underlying file + // growth internally and remaps intelligently. + // + // Note: it is guaranteed that file_size_ is smaller or equal to the actual + // file size as long as the underlying file hasn't been truncated or deleted + // externally. See GrowFileSize() for more details. + int64_t file_size_; - // The actual size of the region we mmapped. As the requested region might not - // align with system pages, we often mmap more bytes than requested. - size_t adjusted_mmap_size_ = 0; + // Memory mapped related fields: + // - mmap_result_ + // - file_offset_ + // - alignment_adjustment_ + // - mmap_size_ // Raw pointer (or error) returned by calls to mmap(). - void* mmap_result_ = nullptr; + void* mmap_result_; + + // Offset within the file at which the current memory-mapped region starts. + int64_t file_offset_; + + // Size that is currently memory-mapped. + // Note that the mmapped size can be larger than the underlying file size. We + // can reduce remapping by pre-mmapping a large memory and grow the file size + // later. See GrowAndRemapIfNecessary(). + int64_t mmap_size_; + + // The difference between file_offset_ and the actual adjusted (aligned) + // offset. + // Since mmap requires the offset to be a multiple of system page size, we + // have to align file_offset_ to the last multiple of system page size. + int64_t alignment_adjustment_; + + // E.g. system_page_size = 5, RemapImpl(/*new_file_offset=*/8, mmap_size) + // + // File layout: xxxxx xxxxx xxxxx xxxxx xxxxx xx + // file_offset_: 8 + // adjusted_offset(): 5 + // region()/mutable_region(): | + // mmap_result_: | + // + // alignment_adjustment_: file_offset_ - adjusted_offset() + // mmap_size_: mmap_size + // region_size(): mmap_size_ + // available_size(): std::min(mmap_size_, + // std::max(0, file_size_ - file_offset_)) + // region_range: [file_offset_, file_offset + mmap_size) + // adjusted_mmap_size(): alignment_adjustment_ + mmap_size_ + // adjusted_mmap_range: [alignment_offset, file_offset + mmap_size) }; } // namespace lib diff --git a/icing/file/memory-mapped-file_test.cc b/icing/file/memory-mapped-file_test.cc new file mode 100644 index 0000000..16f76e6 --- /dev/null +++ b/icing/file/memory-mapped-file_test.cc @@ -0,0 +1,668 @@ +// Copyright (C) 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "icing/file/memory-mapped-file.h" + +#include <cstdint> +#include <limits> +#include <string> + +#include "icing/text_classifier/lib3/utils/base/status.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "icing/file/filesystem.h" +#include "icing/file/mock-filesystem.h" +#include "icing/testing/common-matchers.h" +#include "icing/testing/tmp-directory.h" + +using ::testing::DoDefault; +using ::testing::Eq; +using ::testing::Gt; +using ::testing::IsNull; +using ::testing::Le; +using ::testing::Not; +using ::testing::NotNull; +using ::testing::Return; + +namespace icing { +namespace lib { + +namespace { + +class MemoryMappedFileTest : public ::testing::Test { + protected: + void SetUp() override { file_path_ = GetTestTempDir() + "/mmap_test_file"; } + + void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); } + + const Filesystem& filesystem() const { return filesystem_; } + + Filesystem filesystem_; + std::string file_path_; +}; + +TEST_F(MemoryMappedFileTest, Create) { + constexpr int max_file_size = 8192; + MemoryMappedFile::Strategy stragegy = + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC; + // Create MemoryMappedFile + ICING_ASSERT_OK_AND_ASSIGN(MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem_, file_path_, + stragegy, max_file_size)); + + EXPECT_THAT(mmapped_file.strategy(), Eq(stragegy)); + EXPECT_THAT(mmapped_file.max_file_size(), Eq(max_file_size)); + EXPECT_THAT(mmapped_file.region(), IsNull()); + EXPECT_THAT(mmapped_file.mutable_region(), IsNull()); + EXPECT_THAT(mmapped_file.file_offset(), Eq(0)); + EXPECT_THAT(mmapped_file.region_size(), Eq(0)); + EXPECT_THAT(mmapped_file.available_size(), Eq(0)); +} + +TEST_F(MemoryMappedFileTest, CreateFromExistingFile) { + int init_file_size = 100; + { + // Initialize file + ScopedFd sfd(filesystem_.OpenForWrite(file_path_.c_str())); + ASSERT_TRUE(sfd.is_valid()); + auto buf = std::make_unique<char[]>(init_file_size); + ASSERT_TRUE(filesystem_.Write(sfd.get(), buf.get(), init_file_size)); + } + + constexpr int max_file_size = 8192; + MemoryMappedFile::Strategy stragegy = + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC; + // Create MemoryMappedFile from an existing file + ICING_ASSERT_OK_AND_ASSIGN(MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem_, file_path_, + stragegy, max_file_size)); + + EXPECT_THAT(mmapped_file.strategy(), Eq(stragegy)); + EXPECT_THAT(mmapped_file.max_file_size(), Eq(max_file_size)); + EXPECT_THAT(mmapped_file.region(), IsNull()); + EXPECT_THAT(mmapped_file.mutable_region(), IsNull()); + EXPECT_THAT(mmapped_file.file_offset(), Eq(0)); + EXPECT_THAT(mmapped_file.region_size(), Eq(0)); + EXPECT_THAT(mmapped_file.available_size(), Eq(0)); +} + +TEST_F(MemoryMappedFileTest, CreateWithInvalidMaxFileSize) { + EXPECT_THAT( + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + /*max_file_size=*/-1), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + EXPECT_THAT(MemoryMappedFile::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + /*max_file_size=*/MemoryMappedFile::kMaxFileSize + 1), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + EXPECT_THAT(MemoryMappedFile::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + /*max_file_size=*/-1, /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/8192), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + EXPECT_THAT( + MemoryMappedFile::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + /*max_file_size=*/MemoryMappedFile::kMaxFileSize + 1, + /*pre_mapping_file_offset=*/0, /*pre_mapping_mmap_size=*/8192), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); +} + +TEST_F(MemoryMappedFileTest, CreateWithPreMappingInfo) { + constexpr int max_file_size = 8192; + constexpr int pre_mapping_file_offset = 99; + constexpr int pre_mapping_mmap_size = 2000; + MemoryMappedFile::Strategy stragegy = + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC; + // Create MemoryMappedFile with pre-mapping file_offset and mmap_size + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem_, file_path_, stragegy, max_file_size, + pre_mapping_file_offset, pre_mapping_mmap_size)); + + EXPECT_THAT(mmapped_file.strategy(), Eq(stragegy)); + EXPECT_THAT(mmapped_file.max_file_size(), Eq(max_file_size)); + EXPECT_THAT(mmapped_file.region(), NotNull()); + EXPECT_THAT(mmapped_file.mutable_region(), NotNull()); + EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset)); + EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size)); + EXPECT_THAT(mmapped_file.available_size(), Eq(0)); + + // Manually grow the file externally and mutate region. There should be no + // memory error. + { + ScopedFd sfd(filesystem_.OpenForAppend(file_path_.c_str())); + ASSERT_TRUE(sfd.is_valid()); + int grow_size = 4096; + auto buf = std::make_unique<char[]>(grow_size); + ASSERT_TRUE(filesystem_.Write(sfd.get(), buf.get(), grow_size)); + } + mmapped_file.mutable_region()[0] = 'a'; + ICING_EXPECT_OK(mmapped_file.PersistToDisk()); + + { + ScopedFd sfd(filesystem_.OpenForRead(file_path_.c_str())); + ASSERT_TRUE(sfd.is_valid()); + int buf_size = 10; + auto buf = std::make_unique<char[]>(buf_size); + ASSERT_TRUE(filesystem_.PRead(sfd.get(), buf.get(), buf_size, + pre_mapping_file_offset)); + EXPECT_THAT(buf.get()[0], Eq('a')); + } +} + +TEST_F(MemoryMappedFileTest, CreateWithInvalidPreMappingInfo) { + int page_size = MemoryMappedFile::system_page_size(); + int max_file_size = page_size * 2; + + // Negative file_offset + EXPECT_THAT( + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + max_file_size, + /*pre_mapping_file_offset=*/-1, + /*pre_mapping_mmap_size=*/page_size), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // Negative mmap_size + EXPECT_THAT( + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + max_file_size, /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/-1), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // pre_mapping_file_offset + pre_mapping_mmap_size > max_file_size. + int pre_mapping_file_offset = 99; + int pre_mapping_mmap_size = max_file_size - pre_mapping_file_offset + 1; + EXPECT_THAT( + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + max_file_size, pre_mapping_file_offset, + pre_mapping_mmap_size), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // Edge cases to make sure the implementation of range check won't have + // integer overflow bug. + EXPECT_THAT( + MemoryMappedFile::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, + /*pre_mapping_file_offset=*/99, + /*pre_mapping_mmap_size=*/std::numeric_limits<int64_t>::max()), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + EXPECT_THAT(MemoryMappedFile::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + max_file_size, /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/INT64_C(-1) * + (std::numeric_limits<int64_t>::max() - max_file_size)), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + EXPECT_THAT( + MemoryMappedFile::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size, + /*pre_mapping_file_offset=*/INT64_C(-1) * + (std::numeric_limits<int64_t>::max() - max_file_size), + /*pre_mapping_mmap_size=*/page_size), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); +} + +// TODO(b/247671531): remove this test after deprecating Remap +TEST_F(MemoryMappedFileTest, RemapZeroMmapSizeShouldUnmap) { + // Create MemoryMappedFile + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + MemoryMappedFile::kDefaultMaxFileSize)); + + int page_size = MemoryMappedFile::system_page_size(); + int file_offset = 99; + int mmap_size = page_size * 2 - file_offset; + ICING_ASSERT_OK(mmapped_file.Remap(file_offset, mmap_size)); + ASSERT_THAT(mmapped_file.region(), NotNull()); + + // Call GrowAndRemapIfNecessary with any file_offset and new_mmap_size = 0. + // The original mmapped region should be unmapped. + ICING_EXPECT_OK(mmapped_file.Remap(file_offset, /*mmap_size=*/0)); + EXPECT_THAT(mmapped_file.region(), IsNull()); +} + +TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessary) { + int page_size = MemoryMappedFile::system_page_size(); + int pre_mapping_file_offset = 99; + int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset; + { + // Create MemoryMappedFile with pre-mapping file_offset and mmap_size + // without growing the file. + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create( + filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + MemoryMappedFile::kDefaultMaxFileSize, pre_mapping_file_offset, + pre_mapping_mmap_size)); + ASSERT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(0)); + const char* original_region = mmapped_file.region(); + + // Call GrowAndRemapIfNecessary with same file_offset and new mmap_size that + // doesn't exceed pre_mapping_mmap_size. The underlying file size should + // grow correctly, but there should be no remap. + int new_mmap_size1 = page_size - pre_mapping_file_offset; + ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary( + pre_mapping_file_offset, new_mmap_size1)); + + EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), + Eq(pre_mapping_file_offset + new_mmap_size1)); + EXPECT_THAT(mmapped_file.region(), Eq(original_region)); + EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region)); + EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset)); + EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size)); + EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size1)); + + // Test it with new_mmap_size2 = pre_mapping_mmap_size + int new_mmap_size2 = pre_mapping_mmap_size; + ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary( + pre_mapping_file_offset, new_mmap_size2)); + + EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), + Eq(pre_mapping_file_offset + new_mmap_size2)); + EXPECT_THAT(mmapped_file.region(), Eq(original_region)); + EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region)); + EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset)); + EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size)); + EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size2)); + + // Write some bytes to region()[0]. It should write the underlying file at + // file_offset. + mmapped_file.mutable_region()[0] = 'a'; + ICING_ASSERT_OK(mmapped_file.PersistToDisk()); + } + + ScopedFd sfd(filesystem_.OpenForRead(file_path_.c_str())); + ASSERT_TRUE(sfd.is_valid()); + int buf_size = 1; + auto buf = std::make_unique<char[]>(buf_size); + ASSERT_TRUE(filesystem_.PRead(sfd.get(), buf.get(), buf_size, + pre_mapping_file_offset)); + EXPECT_THAT(buf.get()[0], Eq('a')); +} + +TEST_F(MemoryMappedFileTest, + GrowAndRemapIfNecessaryExceedingPreMappingMmapSize) { + int page_size = MemoryMappedFile::system_page_size(); + int pre_mapping_file_offset = 99; + int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset; + // Create MemoryMappedFile with pre-mapping file_offset and mmap_size without + // growing the file. + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + MemoryMappedFile::kDefaultMaxFileSize, + pre_mapping_file_offset, pre_mapping_mmap_size)); + const char* original_region = mmapped_file.region(); + + // Call GrowAndRemapIfNecessary with same file offset and new mmap_size that + // exceeds pre_mapping_mmap_size (but still below max_file_size). The + // underlying file size should grow correctly and the region should be + // remapped. + int new_mmap_size = page_size * 3 - pre_mapping_file_offset; + ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset, + new_mmap_size)); + + EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), + Eq(pre_mapping_file_offset + new_mmap_size)); + EXPECT_THAT(mmapped_file.region(), Not(Eq(original_region))); + EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset)); + EXPECT_THAT(mmapped_file.region_size(), Eq(new_mmap_size)); + EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size)); +} + +TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryDecreasingMmapSize) { + int page_size = MemoryMappedFile::system_page_size(); + int pre_mapping_file_offset = 99; + int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset; + // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and + // call GrowAndRemapIfNecessary to grow the underlying file. + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + MemoryMappedFile::kDefaultMaxFileSize, + pre_mapping_file_offset, pre_mapping_mmap_size)); + ICING_ASSERT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset, + pre_mapping_mmap_size)); + + const char* original_region = mmapped_file.region(); + int original_file_size = filesystem_.GetFileSize(file_path_.c_str()); + ASSERT_THAT(original_file_size, + Eq(pre_mapping_file_offset + pre_mapping_mmap_size)); + ASSERT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size)); + ASSERT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size)); + + // Call GrowAndRemapIfNecessary with same file offset and new mmap_size + // smaller than pre_mapping_mmap_size. There should be no file growth/truncate + // or remap. + int new_mmap_size = page_size - pre_mapping_file_offset; + ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset, + new_mmap_size)); + + EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), + Eq(original_file_size)); + EXPECT_THAT(mmapped_file.region(), Eq(original_region)); + EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset)); + EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size)); + EXPECT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size)); +} + +TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryZeroMmapSizeShouldUnmap) { + int page_size = MemoryMappedFile::system_page_size(); + int pre_mapping_file_offset = 99; + int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset; + // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and + // call GrowAndRemapIfNecessary to grow the underlying file. + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + MemoryMappedFile::kDefaultMaxFileSize, + pre_mapping_file_offset, pre_mapping_mmap_size)); + ICING_ASSERT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset, + pre_mapping_mmap_size)); + + int original_file_size = filesystem_.GetFileSize(file_path_.c_str()); + ASSERT_THAT(original_file_size, + Eq(pre_mapping_file_offset + pre_mapping_mmap_size)); + ASSERT_THAT(mmapped_file.region(), NotNull()); + ASSERT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size)); + ASSERT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size)); + + // Call GrowAndRemapIfNecessary with any file_offset and new_mmap_size = 0. + // There should be no file growth/truncate, but the original mmapped region + // should be unmapped. + ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset, + /*new_mmap_size=*/0)); + + EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), + Eq(original_file_size)); + EXPECT_THAT(mmapped_file.region(), IsNull()); + EXPECT_THAT(mmapped_file.file_offset(), Eq(0)); + EXPECT_THAT(mmapped_file.region_size(), Eq(0)); + EXPECT_THAT(mmapped_file.available_size(), Eq(0)); +} + +TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryChangeOffset) { + int page_size = MemoryMappedFile::system_page_size(); + int pre_mapping_file_offset = 99; + int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset; + // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and + // call GrowAndRemapIfNecessary to grow the underlying file. + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + MemoryMappedFile::kDefaultMaxFileSize, + pre_mapping_file_offset, pre_mapping_mmap_size)); + ICING_ASSERT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset, + pre_mapping_mmap_size)); + + const char* original_region = mmapped_file.region(); + int original_file_size = filesystem_.GetFileSize(file_path_.c_str()); + ASSERT_THAT(original_file_size, + Eq(pre_mapping_file_offset + pre_mapping_mmap_size)); + ASSERT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size)); + ASSERT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size)); + + // Call GrowAndRemapIfNecessary with different file_offset and new mmap_size + // that doesn't require to grow the underlying file. The region should still + // be remapped since offset has been changed. + int new_file_offset = pre_mapping_file_offset + page_size; + int new_mmap_size = page_size * 2 - new_file_offset; + ASSERT_THAT(new_file_offset + new_mmap_size, Le(original_file_size)); + ICING_EXPECT_OK( + mmapped_file.GrowAndRemapIfNecessary(new_file_offset, new_mmap_size)); + + EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), + Eq(original_file_size)); + EXPECT_THAT(mmapped_file.region(), Not(Eq(original_region))); + EXPECT_THAT(mmapped_file.file_offset(), Eq(new_file_offset)); + EXPECT_THAT(mmapped_file.region_size(), Eq(new_mmap_size)); + EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size)); +} + +TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryInvalidMmapRegionInfo) { + int page_size = MemoryMappedFile::system_page_size(); + int max_file_size = page_size * 2; + // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and + // call GrowAndRemapIfNecessary to grow the underlying file. + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + max_file_size, + /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/page_size * 2)); + + // Negative new_file_offset. + EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary( + /*new_file_offset=*/-1, + /*new_mmap_size=*/page_size), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // Negative new_mmap_size + EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary( + /*new_file_offset=*/0, + /*new_mmap_size=*/-1), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // new_file_offset + new_mmap_size > max_file_size. + int new_file_offset = 99; + int new_mmap_size = max_file_size - new_file_offset + 1; + EXPECT_THAT( + mmapped_file.GrowAndRemapIfNecessary(new_file_offset, new_mmap_size), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + + // Edge cases to make sure the implementation of range check won't have + // integer overflow bug. + EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary( + /*new_file_offset=*/99, + /*new_mmap_size=*/std::numeric_limits<int64_t>::max()), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary( + /*new_file_offset=*/0, + /*new_mmap_size=*/INT64_C(-1) * + (std::numeric_limits<int64_t>::max() - max_file_size)), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); + EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary( + /*new_file_offset=*/INT64_C(-1) * + (std::numeric_limits<int64_t>::max() - max_file_size), + /*new_mmap_size=*/page_size), + StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE)); +} + +TEST_F(MemoryMappedFileTest, RemapFailureStillValidInstance) { + auto mock_filesystem = std::make_unique<MockFilesystem>(); + int page_size = MemoryMappedFile::system_page_size(); + int max_file_size = page_size * 10; + + // 1. Create MemoryMappedFile with pre-mapping offset=0 and + // mmap_size=page_size. Also call GrowAndRemapIfNecessary to grow the file + // size to page_size. + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(*mock_filesystem, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + max_file_size, + /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/page_size)); + ICING_ASSERT_OK( + mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0, + /*new_mmap_size=*/page_size)); + ASSERT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size)); + ASSERT_THAT(mmapped_file.region(), NotNull()); + ASSERT_THAT(mmapped_file.mutable_region(), NotNull()); + ASSERT_THAT(mmapped_file.file_offset(), Eq(0)); + ASSERT_THAT(mmapped_file.region_size(), Eq(page_size)); + ASSERT_THAT(mmapped_file.available_size(), Eq(page_size)); + mmapped_file.mutable_region()[page_size - 1] = 'a'; + + const char* original_region = mmapped_file.region(); + + // 2. Call GrowAndRemapIfNecessary with different offset and greater + // mmap_size. Here we're testing the case when file growth succeeds but + // remap (RemapImpl) fails. + // To make RemapImpl fail, mock OpenForWrite to fail. Note that we use + // OpenForAppend when growing the file, so it is ok to make OpenForWrite + // fail without affecting file growth. + ON_CALL(*mock_filesystem, OpenForWrite(_)).WillByDefault(Return(-1)); + EXPECT_THAT( + mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/1, + /*new_mmap_size=*/page_size * 2 - 1), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + + // 3. Verify the result. The file size should be grown, but since remap fails, + // mmap related fields should remain unchanged. + EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size * 2)); + EXPECT_THAT(mmapped_file.region(), Eq(original_region)); + EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region)); + EXPECT_THAT(mmapped_file.file_offset(), Eq(0)); + EXPECT_THAT(mmapped_file.region_size(), Eq(page_size)); + EXPECT_THAT(mmapped_file.available_size(), Eq(page_size)); + // We should still be able to get the correct content via region. + EXPECT_THAT(mmapped_file.region()[page_size - 1], Eq('a')); +} + +TEST_F(MemoryMappedFileTest, BadFileSizeDuringGrowReturnsError) { + auto mock_filesystem = std::make_unique<MockFilesystem>(); + int page_size = MemoryMappedFile::system_page_size(); + int max_file_size = page_size * 10; + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(*mock_filesystem, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + max_file_size, + /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/page_size)); + ICING_ASSERT_OK( + mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0, + /*new_mmap_size=*/page_size)); + ASSERT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size)); + ASSERT_THAT(mmapped_file.region(), NotNull()); + ASSERT_THAT(mmapped_file.mutable_region(), NotNull()); + ASSERT_THAT(mmapped_file.file_offset(), Eq(0)); + ASSERT_THAT(mmapped_file.region_size(), Eq(page_size)); + ASSERT_THAT(mmapped_file.available_size(), Eq(page_size)); + mmapped_file.mutable_region()[page_size - 1] = 'a'; + + const char* original_region = mmapped_file.region(); + + // Calling GrowAndRemapIfNecessary with larger size will cause file growth. + // During file growth, we will attempt to sync the underlying file size via + // GetFileSize to see if growing is actually necessary. Mock GetFileSize to + // return an error. + ON_CALL(*mock_filesystem, GetFileSize(A<const char*>())) + .WillByDefault(Return(Filesystem::kBadFileSize)); + + // We should fail gracefully and return an INTERNAL error to indicate that + // there was an issue retrieving the file size. The underlying file size and + // mmap info should remain unchanged. + EXPECT_THAT( + mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0, + /*new_mmap_size=*/page_size * 2), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size)); + EXPECT_THAT(mmapped_file.region(), Eq(original_region)); + EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region)); + EXPECT_THAT(mmapped_file.file_offset(), Eq(0)); + EXPECT_THAT(mmapped_file.region_size(), Eq(page_size)); + EXPECT_THAT(mmapped_file.available_size(), Eq(page_size)); + // We should still be able to get the correct content via region. + EXPECT_THAT(mmapped_file.region()[page_size - 1], Eq('a')); +} + +TEST_F(MemoryMappedFileTest, WriteSucceedsPartiallyAndFailsDuringGrow) { + auto mock_filesystem = std::make_unique<MockFilesystem>(); + int page_size = MemoryMappedFile::system_page_size(); + int max_file_size = page_size * 10; + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(*mock_filesystem, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, + max_file_size, + /*pre_mapping_file_offset=*/0, + /*pre_mapping_mmap_size=*/max_file_size)); + + // 1. Initially the underlying file size is 0. When calling + // GrowAndRemapIfNecessary first time with new_mmap_size = page_size * 2, + // Write() should be called 2 times and each time should grow the + // underlying file by page_size bytes. + // Mock the 2nd Write() to write partially (1 byte) and fail, so the file + // will only be grown by page_size + 1 bytes in total. + auto open_lambda = [this](int fd, const void* data, + size_t data_size) -> bool { + EXPECT_THAT(data_size, Gt(1)); + EXPECT_THAT(this->filesystem_.Write(fd, data, 1), Eq(1)); + return false; + }; + EXPECT_CALL(*mock_filesystem, Write(A<int>(), A<const void*>(), A<size_t>())) + .WillOnce(DoDefault()) + .WillOnce(open_lambda); + + // 2. Call GrowAndRemapIfNecessary and expect to fail. The actual file size + // should be page_size + 1, but the (cached) file_size_ should be page_size + // since it fails to update that partially written byte of the 2nd Write(). + EXPECT_THAT( + mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0, + /*new_mmap_size=*/page_size * 2), + StatusIs(libtextclassifier3::StatusCode::INTERNAL)); + EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size + 1)); + EXPECT_THAT(mmapped_file.available_size(), Eq(page_size)); + + // 3. Call GrowAndRemapIfNecessary again with new_mmap_size = page_size + 1. + // Even though file_size_ only caches page_size and excludes the partially + // written byte(s) due to failure of the previous round of grow, the next + // round should sync the actual file size to file_size_ via system call and + // skip Write() since the actual file size is large enough for the new + // mmap_size. + // Note: WillOnce() above will ensure that Write() won't be called for + // another time. + ICING_EXPECT_OK( + mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0, + /*new_mmap_size=*/page_size + 1)); + EXPECT_THAT(mmapped_file.available_size(), Eq(page_size + 1)); + + // 4. Call GrowAndRemapIfNecessary again with new_mmap_size = page_size * 2. + // Even though the current file size is page_size + 1, the next round of + // grow should automatically calibrate the file size back to a multiple of + // page_size instead of just simply appending page_size bytes to the file. + EXPECT_CALL(*mock_filesystem, Write(A<int>(), A<const void*>(), A<size_t>())) + .WillOnce(DoDefault()); + ICING_EXPECT_OK( + mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0, + /*new_mmap_size=*/page_size * 2)); + EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size * 2)); + EXPECT_THAT(mmapped_file.available_size(), Eq(page_size * 2)); +} + +} // namespace + +} // namespace lib +} // namespace icing diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc index 8101c89..0c9fd7f 100644 --- a/icing/file/persistent-hash-map.cc +++ b/icing/file/persistent-hash-map.cc @@ -450,10 +450,11 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem, &new_crcs, &new_info)); // Mmap the content of the crcs and info. - auto metadata_mmapped_file = std::make_unique<MemoryMappedFile>( - filesystem, metadata_file_path, - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC); - ICING_RETURN_IF_ERROR(metadata_mmapped_file->Remap( + ICING_ASSIGN_OR_RETURN(MemoryMappedFile metadata_mmapped_file, + MemoryMappedFile::Create( + filesystem, metadata_file_path, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + ICING_RETURN_IF_ERROR(metadata_mmapped_file.Remap( /*file_offset=*/0, /*mmap_size=*/sizeof(Crcs) + sizeof(Info))); return std::unique_ptr<PersistentHashMap>(new PersistentHashMap( @@ -468,10 +469,12 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, int32_t value_type_size, int32_t max_load_factor_percent) { // Mmap the content of the crcs and info. - auto metadata_mmapped_file = std::make_unique<MemoryMappedFile>( - filesystem, GetMetadataFilePath(base_dir, kSubDirectory), - MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC); - ICING_RETURN_IF_ERROR(metadata_mmapped_file->Remap( + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile metadata_mmapped_file, + MemoryMappedFile::Create( + filesystem, GetMetadataFilePath(base_dir, kSubDirectory), + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); + ICING_RETURN_IF_ERROR(metadata_mmapped_file.Remap( /*file_offset=*/0, /*mmap_size=*/sizeof(Crcs) + sizeof(Info))); // Initialize 3 storages @@ -491,9 +494,9 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC)); Crcs* crcs_ptr = reinterpret_cast<Crcs*>( - metadata_mmapped_file->mutable_region() + Crcs::kFileOffset); + metadata_mmapped_file.mutable_region() + Crcs::kFileOffset); Info* info_ptr = reinterpret_cast<Info*>( - metadata_mmapped_file->mutable_region() + Info::kFileOffset); + metadata_mmapped_file.mutable_region() + Info::kFileOffset); // Value type size should be consistent. if (value_type_size != info_ptr->value_type_size) { @@ -514,7 +517,7 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem, info_ptr->max_load_factor_percent = max_load_factor_percent; crcs_ptr->component_crcs.info_crc = info_ptr->ComputeChecksum().Get(); crcs_ptr->all_crc = crcs_ptr->component_crcs.ComputeChecksum().Get(); - ICING_RETURN_IF_ERROR(metadata_mmapped_file->PersistToDisk()); + ICING_RETURN_IF_ERROR(metadata_mmapped_file.PersistToDisk()); } auto persistent_hash_map = diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h index 9a9ca8d..ef3995c 100644 --- a/icing/file/persistent-hash-map.h +++ b/icing/file/persistent-hash-map.h @@ -373,13 +373,14 @@ class PersistentHashMap { explicit PersistentHashMap( const Filesystem& filesystem, std::string_view base_dir, - std::unique_ptr<MemoryMappedFile> metadata_mmapped_file, + MemoryMappedFile&& metadata_mmapped_file, std::unique_ptr<FileBackedVector<Bucket>> bucket_storage, std::unique_ptr<FileBackedVector<Entry>> entry_storage, std::unique_ptr<FileBackedVector<char>> kv_storage) : filesystem_(&filesystem), base_dir_(base_dir), - metadata_mmapped_file_(std::move(metadata_mmapped_file)), + metadata_mmapped_file_(std::make_unique<MemoryMappedFile>( + std::move(metadata_mmapped_file))), bucket_storage_(std::move(bucket_storage)), entry_storage_(std::move(entry_storage)), kv_storage_(std::move(kv_storage)) {} diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h index 409ab96..e48e6e0 100644 --- a/icing/file/portable-file-backed-proto-log.h +++ b/icing/file/portable-file-backed-proto-log.h @@ -799,8 +799,10 @@ libtextclassifier3::StatusOr<Crc32> PortableFileBackedProtoLog<ProtoT>::ComputeChecksum( const Filesystem* filesystem, const std::string& file_path, Crc32 initial_crc, int64_t start, int64_t end) { - auto mmapped_file = MemoryMappedFile(*filesystem, file_path, - MemoryMappedFile::Strategy::READ_ONLY); + ICING_ASSIGN_OR_RETURN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(*filesystem, file_path, + MemoryMappedFile::Strategy::READ_ONLY)); Crc32 new_crc(initial_crc.Get()); if (start < 0) { diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc index 795271a..af09d18 100644 --- a/icing/file/portable-file-backed-proto-log_test.cc +++ b/icing/file/portable-file-backed-proto-log_test.cc @@ -927,8 +927,10 @@ TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) { // Checks if the erased area is set to 0. int64_t file_size = filesystem_.GetFileSize(file_path_.c_str()); - MemoryMappedFile mmapped_file(filesystem_, file_path_, - MemoryMappedFile::Strategy::READ_ONLY); + ICING_ASSERT_OK_AND_ASSIGN( + MemoryMappedFile mmapped_file, + MemoryMappedFile::Create(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_ONLY)); // document1_offset + sizeof(int) is the start byte of the proto where // sizeof(int) is the size of the proto metadata. |