aboutsummaryrefslogtreecommitdiff
path: root/icing/file
diff options
context:
space:
mode:
authorGrace Zhao <gracezrx@google.com>2022-10-27 13:52:32 -0700
committerGrace Zhao <gracezrx@google.com>2022-10-27 21:06:49 +0000
commitd0795655ecf1aac89bb1802f4e1d3f5fb7dcb2b0 (patch)
treeff367f5a9c0f94a1da09638e5d986d5e9bcb62ff /icing/file
parent4af97ca767a9f2e88dc75e05784dd010e7541d13 (diff)
downloadicing-d0795655ecf1aac89bb1802f4e1d3f5fb7dcb2b0.tar.gz
Sync from upstream.
Descriptions: ====================================================================== Fix the bug in PostingListAccessor found by the Icing Monkey test ====================================================================== Add the logic to handle fatal errors from IcingDynamicTrie to avoid crashing ====================================================================== Clear out the dead code IcingDynamicTrie::Compact ====================================================================== [MemoryMappedFile][RemapV2][1/x] Add factory method for MemoryMappedFile ====================================================================== [MemoryMappedFile][RemapV2][2/x] Create GrowAndRemapIfNecessary and change factory method ====================================================================== [MemoryMappedFile][RemapV2][3/x] Migrate FileBackedVector to use GrowAndRemapIfNecessary ====================================================================== Add JNI latency for query latency stats breakdown ====================================================================== Bug: 247671531 Bug: 247929909 Bug: 253282365 Change-Id: Ic3b88d0f044edacfe2dfeb08fa381b2186c731cb
Diffstat (limited to 'icing/file')
-rw-r--r--icing/file/file-backed-bitmap.cc13
-rw-r--r--icing/file/file-backed-bitmap.h5
-rw-r--r--icing/file/file-backed-proto-log.h33
-rw-r--r--icing/file/file-backed-proto_test.cc16
-rw-r--r--icing/file/file-backed-vector.h193
-rw-r--r--icing/file/file-backed-vector_test.cc113
-rw-r--r--icing/file/memory-mapped-file-leak_test.cc14
-rw-r--r--icing/file/memory-mapped-file.cc337
-rw-r--r--icing/file/memory-mapped-file.h272
-rw-r--r--icing/file/memory-mapped-file_test.cc668
-rw-r--r--icing/file/persistent-hash-map.cc25
-rw-r--r--icing/file/persistent-hash-map.h5
-rw-r--r--icing/file/portable-file-backed-proto-log.h6
-rw-r--r--icing/file/portable-file-backed-proto-log_test.cc6
14 files changed, 1377 insertions, 329 deletions
diff --git a/icing/file/file-backed-bitmap.cc b/icing/file/file-backed-bitmap.cc
index a8231e3..bdcfc79 100644
--- a/icing/file/file-backed-bitmap.cc
+++ b/icing/file/file-backed-bitmap.cc
@@ -47,8 +47,12 @@ FileBackedBitmap::Create(const Filesystem* filesystem,
"mmap strategy.");
}
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile mmapper,
+ MemoryMappedFile::Create(*filesystem, file_path, mmap_strategy));
+
auto bitmap = std::unique_ptr<FileBackedBitmap>(
- new FileBackedBitmap(filesystem, file_path, mmap_strategy));
+ new FileBackedBitmap(filesystem, file_path, std::move(mmapper)));
// TODO(b/216487496): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
@@ -62,10 +66,10 @@ FileBackedBitmap::Create(const Filesystem* filesystem,
FileBackedBitmap::FileBackedBitmap(const Filesystem* filesystem,
std::string_view file_path,
- MemoryMappedFile::Strategy mmap_strategy)
+ MemoryMappedFile&& mmapper)
: filesystem_(filesystem),
file_path_(file_path),
- mmapper_(new MemoryMappedFile(*filesystem, file_path, mmap_strategy)) {}
+ mmapper_(std::make_unique<MemoryMappedFile>(std::move(mmapper))) {}
FileBackedBitmap::~FileBackedBitmap() {
// Only update if we have auto_sync setup, otherwise the checksum will be
@@ -269,7 +273,8 @@ libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) {
return status;
}
- ICING_VLOG(1) << "Grew file " << file_path_ << " to new size " << new_file_size;
+ ICING_VLOG(1) << "Grew file " << file_path_ << " to new size "
+ << new_file_size;
mutable_header()->state = Header::ChecksumState::kStale;
return libtextclassifier3::Status::OK;
}
diff --git a/icing/file/file-backed-bitmap.h b/icing/file/file-backed-bitmap.h
index e3d98ad..beba14e 100644
--- a/icing/file/file-backed-bitmap.h
+++ b/icing/file/file-backed-bitmap.h
@@ -175,8 +175,9 @@ class FileBackedBitmap {
Header* mutable_header();
// Use FileBackedBitmap::Create() to instantiate.
- FileBackedBitmap(const Filesystem* filesystem, std::string_view file_path,
- MemoryMappedFile::Strategy mmap_strategy);
+ explicit FileBackedBitmap(const Filesystem* filesystem,
+ std::string_view file_path,
+ MemoryMappedFile&& mmapper);
// Verify the contents of the bitmap and get ready for read/write operations.
//
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index ad7fae9..78236ba 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -186,8 +186,9 @@ class FileBackedProtoLog {
// }
class Iterator {
public:
- Iterator(const Filesystem& filesystem, const std::string& file_path,
- int64_t initial_offset);
+ explicit Iterator(const Filesystem& filesystem,
+ const std::string& file_path, int64_t initial_offset,
+ MemoryMappedFile&& mmapped_file);
// Advances to the position of next proto whether it has been erased or not.
//
@@ -213,7 +214,7 @@ class FileBackedProtoLog {
// Returns an iterator of current proto log. The caller needs to keep the
// proto log unchanged while using the iterator, otherwise unexpected
// behaviors could happen.
- Iterator GetIterator();
+ libtextclassifier3::StatusOr<Iterator> GetIterator();
private:
// Object can only be instantiated via the ::Create factory.
@@ -472,8 +473,10 @@ template <typename ProtoT>
libtextclassifier3::StatusOr<Crc32> FileBackedProtoLog<ProtoT>::ComputeChecksum(
const Filesystem* filesystem, const std::string& file_path,
Crc32 initial_crc, int64_t start, int64_t end) {
- auto mmapped_file = MemoryMappedFile(*filesystem, file_path,
- MemoryMappedFile::Strategy::READ_ONLY);
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(*filesystem, file_path,
+ MemoryMappedFile::Strategy::READ_ONLY));
Crc32 new_crc(initial_crc.Get());
if (start < 0) {
@@ -544,8 +547,10 @@ template <typename ProtoT>
libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
int64_t file_offset) const {
int64_t file_size = filesystem_->GetFileSize(fd_.get());
- MemoryMappedFile mmapped_file(*filesystem_, file_path_,
- MemoryMappedFile::Strategy::READ_ONLY);
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(*filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_ONLY));
if (file_offset >= file_size) {
// file_size points to the next byte to write at, so subtract one to get
// the inclusive, actual size of file.
@@ -588,9 +593,9 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
template <typename ProtoT>
FileBackedProtoLog<ProtoT>::Iterator::Iterator(const Filesystem& filesystem,
const std::string& file_path,
- int64_t initial_offset)
- : mmapped_file_(filesystem, file_path,
- MemoryMappedFile::Strategy::READ_ONLY),
+ int64_t initial_offset,
+ MemoryMappedFile&& mmapped_file)
+ : mmapped_file_(std::move(mmapped_file)),
initial_offset_(initial_offset),
current_offset_(kInvalidOffset),
file_size_(filesystem.GetFileSize(file_path.c_str())) {
@@ -629,10 +634,14 @@ int64_t FileBackedProtoLog<ProtoT>::Iterator::GetOffset() {
}
template <typename ProtoT>
-typename FileBackedProtoLog<ProtoT>::Iterator
+libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::Iterator>
FileBackedProtoLog<ProtoT>::GetIterator() {
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(*filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_ONLY));
return Iterator(*filesystem_, file_path_,
- /*initial_offset=*/sizeof(Header));
+ /*initial_offset=*/sizeof(Header), std::move(mmapped_file));
}
template <typename ProtoT>
diff --git a/icing/file/file-backed-proto_test.cc b/icing/file/file-backed-proto_test.cc
index 7f994fb..009af52 100644
--- a/icing/file/file-backed-proto_test.cc
+++ b/icing/file/file-backed-proto_test.cc
@@ -45,7 +45,7 @@ TEST_F(FileBackedProtoTest, SimpleReadWriteTest) {
DocumentBuilder().SetKey("namespace", "google.com").Build();
FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
- ICING_ASSERT_OK(file_proto.Write(absl::make_unique<DocumentProto>(document)));
+ ICING_ASSERT_OK(file_proto.Write(std::make_unique<DocumentProto>(document)));
EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document))));
// Multiple reads work.
EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document))));
@@ -61,7 +61,7 @@ TEST_F(FileBackedProtoTest, DataPersistsAcrossMultipleInstancesTest) {
EXPECT_THAT(file_proto.Read(), Not(IsOk())); // Nothing to read.
ICING_ASSERT_OK(
- file_proto.Write(absl::make_unique<DocumentProto>(document)));
+ file_proto.Write(std::make_unique<DocumentProto>(document)));
EXPECT_THAT(file_proto.Read(),
IsOkAndHolds(Pointee(EqualsProto(document))));
}
@@ -84,12 +84,12 @@ TEST_F(FileBackedProtoTest, MultipleUpdatesToProtoTest) {
{
FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
ICING_ASSERT_OK(
- file_proto.Write(absl::make_unique<DocumentProto>(googleProto)));
+ file_proto.Write(std::make_unique<DocumentProto>(googleProto)));
EXPECT_THAT(file_proto.Read(),
IsOkAndHolds(Pointee(EqualsProto(googleProto))));
ICING_ASSERT_OK(
- file_proto.Write(absl::make_unique<DocumentProto>(youtubeProto)));
+ file_proto.Write(std::make_unique<DocumentProto>(youtubeProto)));
EXPECT_THAT(file_proto.Read(),
IsOkAndHolds(Pointee(EqualsProto(youtubeProto))));
}
@@ -100,12 +100,12 @@ TEST_F(FileBackedProtoTest, MultipleUpdatesToProtoTest) {
IsOkAndHolds(Pointee(EqualsProto(youtubeProto))));
ICING_ASSERT_OK(
- file_proto.Write(absl::make_unique<DocumentProto>(wazeProto)));
+ file_proto.Write(std::make_unique<DocumentProto>(wazeProto)));
EXPECT_THAT(file_proto.Read(),
IsOkAndHolds(Pointee(EqualsProto(wazeProto))));
ICING_ASSERT_OK(
- file_proto.Write(absl::make_unique<DocumentProto>(googleProto)));
+ file_proto.Write(std::make_unique<DocumentProto>(googleProto)));
EXPECT_THAT(file_proto.Read(),
IsOkAndHolds(Pointee(EqualsProto(googleProto))));
}
@@ -117,7 +117,7 @@ TEST_F(FileBackedProtoTest, InvalidFilenameTest) {
FileBackedProto<DocumentProto> file_proto(filesystem_, "");
EXPECT_THAT(file_proto.Read(), Not(IsOk()));
- EXPECT_THAT(file_proto.Write(absl::make_unique<DocumentProto>(document)),
+ EXPECT_THAT(file_proto.Write(std::make_unique<DocumentProto>(document)),
Not(IsOk()));
}
@@ -128,7 +128,7 @@ TEST_F(FileBackedProtoTest, FileCorruptionTest) {
{
FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
ICING_ASSERT_OK(
- file_proto.Write(absl::make_unique<DocumentProto>(document)));
+ file_proto.Write(std::make_unique<DocumentProto>(document)));
EXPECT_THAT(file_proto.Read(),
IsOkAndHolds(Pointee(EqualsProto(document))));
}
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 165597d..183c091 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -166,8 +166,31 @@ class FileBackedVector {
// mmap_strategy : Strategy/optimizations to access the content in the vector,
// see MemoryMappedFile::Strategy for more details
// max_file_size: Maximum file size for FileBackedVector, default
- // kMaxFileSize. See max_file_size_ and kMaxFileSize for more
- // details.
+ // kMaxFileSize. Note that this value won't be written into the
+ // header, so maximum file size will always be specified in
+ // runtime and the caller should make sure the value is correct
+ // and reasonable. Also it will be cached in MemoryMappedFile
+ // member, so we can always call mmapped_file_->max_file_size()
+ // to get it.
+ // The range should be in
+ // [Header::kHeaderSize + kElementTypeSize, kMaxFileSize], and
+ // (max_file_size - Header::kHeaderSize) / kElementTypeSize is
+ // max # of elements that can be stored.
+ // pre_mapping_mmap_size: pre-mapping size of MemoryMappedFile, default 0.
+ // Pre-mapping a large memory region to the file and
+ // grow the underlying file later, so we can avoid
+ // remapping too frequently and reduce the cost of
+ // system call and memory paging after remap. The user
+ // should specify reasonable size to save remapping
+ // cost and avoid exhausting the memory at once in the
+ // beginning.
+ // Note: if the file exists and pre_mapping_mmap_size
+ // is smaller than file_size - Header::kHeaderSize,
+ // then it still pre-maps file_size -
+ // Header::kHeaderSize to make all existing elements
+ // available.
+ // TODO(b/247671531): figure out pre_mapping_mmap_size for each
+ // FileBackedVector use case.
//
// Return:
// FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
@@ -178,7 +201,8 @@ class FileBackedVector {
static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
Create(const Filesystem& filesystem, const std::string& file_path,
MemoryMappedFile::Strategy mmap_strategy,
- int32_t max_file_size = kMaxFileSize);
+ int32_t max_file_size = kMaxFileSize,
+ int32_t pre_mapping_mmap_size = 0);
// Deletes the FileBackedVector
//
@@ -282,7 +306,8 @@ class FileBackedVector {
// returned from Get/GetMutable/Allocate may be invalidated.
//
// Returns:
- // OUT_OF_RANGE_ERROR if file cannot be grown (i.e. reach max_file_size_)
+ // OUT_OF_RANGE_ERROR if file cannot be grown (i.e. reach
+ // mmapped_file_->max_file_size())
libtextclassifier3::Status Append(const T& value) {
return Set(header_->num_elements, value);
}
@@ -306,7 +331,7 @@ class FileBackedVector {
//
// Returns:
// OUT_OF_RANGE_ERROR if len <= 0 or file cannot be grown (i.e. reach
- // max_file_size_)
+ // mmapped_file_->max_file_size())
libtextclassifier3::StatusOr<MutableArrayView> Allocate(int32_t len);
// Resizes to first len elements. The crc is cleared on truncation and will be
@@ -447,21 +472,20 @@ class FileBackedVector {
explicit FileBackedVector(const Filesystem& filesystem,
const std::string& file_path,
std::unique_ptr<Header> header,
- std::unique_ptr<MemoryMappedFile> mmapped_file,
- int32_t max_file_size);
+ MemoryMappedFile&& mmapped_file);
// Initialize a new FileBackedVector, and create the file.
static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
InitializeNewFile(const Filesystem& filesystem, const std::string& file_path,
ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy,
- int32_t max_file_size);
+ int32_t max_file_size, int32_t pre_mapping_mmap_size);
// Initialize a FileBackedVector from an existing file.
static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
InitializeExistingFile(const Filesystem& filesystem,
const std::string& file_path, ScopedFd fd,
MemoryMappedFile::Strategy mmap_strategy,
- int32_t max_file_size);
+ int32_t max_file_size, int32_t pre_mapping_mmap_size);
// Grows the underlying file to hold at least num_elements
//
@@ -490,17 +514,6 @@ class FileBackedVector {
// Buffer of the original elements that have been changed since the last crc
// update. Will be cleared if the size grows too big.
std::string saved_original_buffer_;
-
- // Max file size for FileBackedVector, default kMaxFileSize. Note that this
- // value won't be written into the header, so maximum file size will always be
- // specified in runtime and the caller should make sure its value is correct
- // and reasonable. Note that file size includes size of header + elements.
- //
- // The range should be in
- // [Header::kHeaderSize + kElementTypeSize, kMaxFileSize], and
- // (max_file_size_ - Header::kHeaderSize) / kElementTypeSize is max # of
- // elements that can be stored.
- int32_t max_file_size_;
};
template <typename T>
@@ -526,7 +539,8 @@ libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
FileBackedVector<T>::Create(const Filesystem& filesystem,
const std::string& file_path,
MemoryMappedFile::Strategy mmap_strategy,
- int32_t max_file_size) {
+ int32_t max_file_size,
+ int32_t pre_mapping_mmap_size) {
if (mmap_strategy == MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC) {
// FileBackedVector's behavior of growing the file underneath the mmap is
// inherently broken with MAP_PRIVATE. Growing the vector requires extending
@@ -567,10 +581,12 @@ FileBackedVector<T>::Create(const Filesystem& filesystem,
const bool new_file = file_size == 0;
if (new_file) {
return InitializeNewFile(filesystem, file_path, std::move(fd),
- mmap_strategy, max_file_size);
+ mmap_strategy, max_file_size,
+ pre_mapping_mmap_size);
}
return InitializeExistingFile(filesystem, file_path, std::move(fd),
- mmap_strategy, max_file_size);
+ mmap_strategy, max_file_size,
+ pre_mapping_mmap_size);
}
template <typename T>
@@ -579,7 +595,8 @@ FileBackedVector<T>::InitializeNewFile(const Filesystem& filesystem,
const std::string& file_path,
ScopedFd fd,
MemoryMappedFile::Strategy mmap_strategy,
- int32_t max_file_size) {
+ int32_t max_file_size,
+ int32_t pre_mapping_mmap_size) {
// Create header.
auto header = std::make_unique<Header>();
header->magic = FileBackedVector<T>::Header::kMagic;
@@ -594,17 +611,21 @@ FileBackedVector<T>::InitializeNewFile(const Filesystem& filesystem,
return absl_ports::InternalError("Failed to write header");
}
- // Constructor of MemoryMappedFile doesn't actually call mmap(), mmap()
- // happens on MemoryMappedFile::Remap(). So having a potentially unflushed fd
- // at this point shouldn't run into issues with a mmap of the same file. But
- // we'll close the fd just in case.
+ // Close the fd since constructor of MemoryMappedFile calls mmap() and we need
+ // to flush fd before mmap().
fd.reset();
- auto mmapped_file =
- std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
- return std::unique_ptr<FileBackedVector<T>>(
- new FileBackedVector<T>(filesystem, file_path, std::move(header),
- std::move(mmapped_file), max_file_size));
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem, file_path, mmap_strategy,
+ max_file_size,
+ /*pre_mapping_file_offset=*/Header::kHeaderSize,
+ /*pre_mapping_mmap_size=*/
+ std::min(max_file_size - Header::kHeaderSize,
+ pre_mapping_mmap_size)));
+
+ return std::unique_ptr<FileBackedVector<T>>(new FileBackedVector<T>(
+ filesystem, file_path, std::move(header), std::move(mmapped_file)));
}
template <typename T>
@@ -612,7 +633,7 @@ libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
FileBackedVector<T>::InitializeExistingFile(
const Filesystem& filesystem, const std::string& file_path,
const ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy,
- int32_t max_file_size) {
+ int32_t max_file_size, int32_t pre_mapping_mmap_size) {
int64_t file_size = filesystem.GetFileSize(file_path.c_str());
if (file_size == Filesystem::kBadFileSize) {
return absl_ports::InternalError(
@@ -662,14 +683,23 @@ FileBackedVector<T>::InitializeExistingFile(
// Mmap the content of the vector, excluding the header so its easier to
// access elements from the mmapped region
- auto mmapped_file =
- std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
- ICING_RETURN_IF_ERROR(mmapped_file->Remap(Header::kHeaderSize,
- file_size - Header::kHeaderSize));
+ // Although users can specify their own pre_mapping_mmap_size, we should make
+ // sure that the pre-map size is at least file_size - Header::kHeaderSize to
+ // make all existing elements available.
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(
+ filesystem, file_path, mmap_strategy, max_file_size,
+ /*pre_mapping_file_offset=*/Header::kHeaderSize,
+ /*pre_mapping_mmap_size=*/
+ std::max(
+ file_size - Header::kHeaderSize,
+ static_cast<int64_t>(std::min(max_file_size - Header::kHeaderSize,
+ pre_mapping_mmap_size)))));
// Check vector contents
Crc32 vector_checksum(
- std::string_view(reinterpret_cast<const char*>(mmapped_file->region()),
+ std::string_view(reinterpret_cast<const char*>(mmapped_file.region()),
header->num_elements * kElementTypeSize));
if (vector_checksum.Get() != header->vector_checksum) {
@@ -677,9 +707,8 @@ FileBackedVector<T>::InitializeExistingFile(
absl_ports::StrCat("Invalid vector contents for ", file_path));
}
- return std::unique_ptr<FileBackedVector<T>>(
- new FileBackedVector<T>(filesystem, file_path, std::move(header),
- std::move(mmapped_file), max_file_size));
+ return std::unique_ptr<FileBackedVector<T>>(new FileBackedVector<T>(
+ filesystem, file_path, std::move(header), std::move(mmapped_file)));
}
template <typename T>
@@ -693,16 +722,16 @@ libtextclassifier3::Status FileBackedVector<T>::Delete(
}
template <typename T>
-FileBackedVector<T>::FileBackedVector(
- const Filesystem& filesystem, const std::string& file_path,
- std::unique_ptr<Header> header,
- std::unique_ptr<MemoryMappedFile> mmapped_file, int32_t max_file_size)
+FileBackedVector<T>::FileBackedVector(const Filesystem& filesystem,
+ const std::string& file_path,
+ std::unique_ptr<Header> header,
+ MemoryMappedFile&& mmapped_file)
: filesystem_(&filesystem),
file_path_(file_path),
header_(std::move(header)),
- mmapped_file_(std::move(mmapped_file)),
- changes_end_(header_->num_elements),
- max_file_size_(max_file_size) {}
+ mmapped_file_(
+ std::make_unique<MemoryMappedFile>(std::move(mmapped_file))),
+ changes_end_(header_->num_elements) {}
template <typename T>
FileBackedVector<T>::~FileBackedVector() {
@@ -831,8 +860,9 @@ FileBackedVector<T>::Allocate(int32_t len) {
}
// Although header_->num_elements + len doesn't exceed kMaxNumElements, the
- // actual max # of elements are determined by max_file_size, kElementTypeSize,
- // and kHeaderSize. Thus, it is still possible to fail to grow the file.
+ // actual max # of elements are determined by mmapped_file_->max_file_size(),
+ // kElementTypeSize, and kHeaderSize. Thus, it is still possible to fail to
+ // grow the file.
ICING_RETURN_IF_ERROR(GrowIfNecessary(header_->num_elements + len));
int32_t start_idx = header_->num_elements;
@@ -853,62 +883,35 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
return libtextclassifier3::Status::OK;
}
- if (num_elements >
- (max_file_size_ - Header::kHeaderSize) / kElementTypeSize) {
+ if (num_elements > (mmapped_file_->max_file_size() - Header::kHeaderSize) /
+ kElementTypeSize) {
return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
"%d elements total size exceed maximum bytes of elements allowed, "
"%d bytes",
- num_elements, max_file_size_ - Header::kHeaderSize));
+ num_elements, mmapped_file_->max_file_size() - Header::kHeaderSize));
}
- int32_t least_element_file_size_needed =
- num_elements * kElementTypeSize; // Won't overflow
- if (least_element_file_size_needed <= mmapped_file_->region_size()) {
- // Our mmapped region can hold the target num_elements cause we've grown
- // before
+ int32_t least_file_size_needed =
+ Header::kHeaderSize + num_elements * kElementTypeSize; // Won't overflow
+ if (least_file_size_needed <= mmapped_file_->available_size()) {
return libtextclassifier3::Status::OK;
}
- int32_t least_file_size_needed =
- Header::kHeaderSize + least_element_file_size_needed;
- // Otherwise, we need to grow. Grow to kGrowElements boundary.
- // Note that we need to use int64_t here, since int32_t might overflow after
- // round up.
int64_t round_up_file_size_needed = math_util::RoundUpTo(
int64_t{least_file_size_needed},
int64_t{FileBackedVector<T>::kGrowElements} * kElementTypeSize);
- least_file_size_needed =
- std::min(round_up_file_size_needed, int64_t{max_file_size_});
-
- // Get the actual file size here.
- int64_t current_file_size = filesystem_->GetFileSize(file_path_.c_str());
- if (current_file_size == Filesystem::kBadFileSize) {
- return absl_ports::InternalError("Unable to retrieve file size.");
- }
-
- // We use PWrite here rather than Grow because Grow doesn't actually allocate
- // an underlying disk block. This can lead to problems with mmap because mmap
- // has no effective way to signal that it was impossible to allocate the disk
- // block and ends up crashing instead. PWrite will force the allocation of
- // these blocks, which will ensure that any failure to grow will surface here.
- int64_t page_size = getpagesize();
- auto buf = std::make_unique<uint8_t[]>(page_size);
- int64_t size_to_write = std::min(page_size - (current_file_size % page_size),
- max_file_size_ - current_file_size);
- ScopedFd sfd(filesystem_->OpenForWrite(file_path_.c_str()));
- while (size_to_write > 0 && current_file_size < least_file_size_needed) {
- if (!filesystem_->PWrite(sfd.get(), current_file_size, buf.get(),
- size_to_write)) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Couldn't grow file ", file_path_));
- }
- current_file_size += size_to_write;
- size_to_write = std::min(page_size - (current_file_size % page_size),
- max_file_size_ - current_file_size);
- }
- ICING_RETURN_IF_ERROR(mmapped_file_->Remap(
- Header::kHeaderSize, least_file_size_needed - Header::kHeaderSize));
+ // Call GrowAndRemapIfNecessary. It handles file growth internally and remaps
+ // intelligently.
+ // We've ensured that least_file_size_needed (for num_elements) doesn't exceed
+ // mmapped_file_->max_file_size(), but it is still possible that
+ // round_up_file_size_needed exceeds it, so use the smaller value of them as
+ // new_mmap_size.
+ ICING_RETURN_IF_ERROR(mmapped_file_->GrowAndRemapIfNecessary(
+ /*new_file_offset=*/Header::kHeaderSize,
+ /*new_mmap_size=*/std::min(round_up_file_size_needed,
+ mmapped_file_->max_file_size()) -
+ Header::kHeaderSize));
return libtextclassifier3::Status::OK;
}
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index 74e4132..c526dec 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -36,14 +36,12 @@
#include "icing/util/crc32.h"
#include "icing/util/logging.h"
-using ::testing::DoDefault;
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsTrue;
using ::testing::Lt;
using ::testing::Not;
using ::testing::Pointee;
-using ::testing::Return;
using ::testing::SizeIs;
namespace icing {
@@ -1231,92 +1229,35 @@ TEST_F(FileBackedVectorTest, InitNormalSucceeds) {
}
}
-TEST_F(FileBackedVectorTest, RemapFailureStillValidInstance) {
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<FileBackedVector<int>> vector,
- FileBackedVector<int>::Create(
- *mock_filesystem, file_path_,
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
-
- // 1. Write data to just before the first block resize. Running the test
- // locally has determined that we'll first resize at 65531st entry.
- constexpr int kResizingIndex = 16378;
- for (int i = 0; i < kResizingIndex; ++i) {
- ICING_ASSERT_OK(vector->Set(i, 7));
+TEST_F(FileBackedVectorTest, InitFromExistingFileShouldPreMapAtLeastFileSize) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<char>::kMaxFileSize));
+ Insert(vector.get(), 10000, "A");
+ Insert(vector.get(), 10001, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
}
- // 2. The next Set call should cause a resize and a remap. Make that remap
- // fail.
- int num_calls = 0;
- auto open_lambda = [this, &num_calls](const char* file_name) {
- if (++num_calls == 2) {
- return -1;
- }
- return this->filesystem().OpenForWrite(file_name);
- };
- ON_CALL(*mock_filesystem, OpenForWrite(_)).WillByDefault(open_lambda);
- EXPECT_THAT(vector->Set(kResizingIndex, 7),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-
- // 3. We should still be able to call set correctly for earlier regions.
- ICING_EXPECT_OK(vector->Set(kResizingIndex / 2, 9));
- EXPECT_THAT(vector->Get(kResizingIndex / 2), IsOkAndHolds(Pointee(Eq(9))));
-}
-
-TEST_F(FileBackedVectorTest, BadFileSizeDuringGrowReturnsError) {
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<FileBackedVector<int>> vector,
- FileBackedVector<int>::Create(
- *mock_filesystem, file_path_,
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
-
- // At first, the vector is empty and has no mapping established. The first Set
- // call will cause a Grow.
- // During Grow, we will attempt to check the underlying file size to see if
- // growing is actually necessary. Return an error on the call to GetFileSize.
- ON_CALL(*mock_filesystem, GetFileSize(A<const char*>()))
- .WillByDefault(Return(Filesystem::kBadFileSize));
-
- // We should fail gracefully and return an INTERNAL error to indicate that
- // there was an issue retrieving the file size.
- EXPECT_THAT(vector->Set(0, 7),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-}
-
-TEST_F(FileBackedVectorTest, PWriteFailsInTheSecondRound) {
- auto mock_filesystem = std::make_unique<MockFilesystem>();
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<FileBackedVector<int>> vector,
- FileBackedVector<int>::Create(
- *mock_filesystem, file_path_,
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
-
- // At first, the vector is empty and has no mapping established. The first Set
- // call will cause a Grow.
- // During Grow, we call PWrite for several rounds to grow the file. Mock
- // PWrite to succeed in the first round, fail in the second round, and succeed
- // in the rest rounds.
-
- // This unit test checks if we check file size and Remap properly. If the
- // first PWrite succeeds but the second PWrite fails, then the file size has
- // been grown, but there will be no Remap for the MemoryMappedFile. Then,
- // the next several Append() won't require file growth since the file size has
- // been grown, but it causes memory error because we haven't remapped.
- EXPECT_CALL(*mock_filesystem,
- PWrite(A<int>(), A<off_t>(), A<const void*>(), A<size_t>()))
- .WillOnce(DoDefault())
- .WillOnce(Return(false))
- .WillRepeatedly(DoDefault());
-
- EXPECT_THAT(vector->Append(7),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
- EXPECT_THAT(vector->Get(/*idx=*/0),
- StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
-
- EXPECT_THAT(vector->Append(7), IsOk());
- EXPECT_THAT(vector->Get(/*idx=*/0), IsOkAndHolds(Pointee(Eq(7))));
+ {
+ // 2. Attempt to create the file with pre_mapping_mmap_size < file_size. It
+ // should still pre-map file_size, so we can pass the checksum
+ // verification when initializing and get the correct contents.
+ int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
+ int pre_mapping_mmap_size = 10;
+ ASSERT_THAT(pre_mapping_mmap_size, Lt(file_size));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ FileBackedVector<char>::kMaxFileSize, pre_mapping_mmap_size));
+ EXPECT_THAT(Get(vector.get(), /*idx=*/10000, /*expected_len=*/2), Eq("AZ"));
+ }
}
} // namespace
diff --git a/icing/file/memory-mapped-file-leak_test.cc b/icing/file/memory-mapped-file-leak_test.cc
index 598fb61..ff031df 100644
--- a/icing/file/memory-mapped-file-leak_test.cc
+++ b/icing/file/memory-mapped-file-leak_test.cc
@@ -12,12 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/file/memory-mapped-file.h"
-
#include "perftools/profiles/collector/heap/alloc_recorder.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/recorder-test-utils.h"
#include "icing/testing/tmp-directory.h"
@@ -40,8 +39,15 @@ TEST(MemoryMappedFileTest, MMapMemoryLeak) {
{
std::string mmfile_dir = test_dir + "/file";
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(mmfile_dir.c_str()));
- MemoryMappedFile mmfile(filesystem, mmfile_dir + "/mmfile",
- MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+
+ // Don't use ICING_ASSERT_OK_AND_ASSIGN or matcher IsOk to prevent
+ // unnecessary implicit heap memory allocation in these macros.
+ libtextclassifier3::StatusOr<MemoryMappedFile> mmfile_or =
+ MemoryMappedFile::Create(filesystem, mmfile_dir + "/mmfile",
+ MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+ ASSERT_TRUE(mmfile_or.ok());
+ MemoryMappedFile mmfile = std::move(mmfile_or).ValueOrDie();
+
// How this works:
// We request a 500-byte mapping starting at the 101st byte of the file.
// But(!), mmap only accepts offsets that are multiples of page size. So
diff --git a/icing/file/memory-mapped-file.cc b/icing/file/memory-mapped-file.cc
index fc13a79..43ed030 100644
--- a/icing/file/memory-mapped-file.cc
+++ b/icing/file/memory-mapped-file.cc
@@ -12,30 +12,96 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-// TODO(cassiewang) Add unit-tests to this class.
-
#include "icing/file/memory-mapped-file.h"
#include <sys/mman.h>
#include <cerrno>
+#include <cinttypes>
+#include <memory>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/file/filesystem.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/util/math-util.h"
+#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
+/* static */ libtextclassifier3::StatusOr<MemoryMappedFile>
+MemoryMappedFile::Create(const Filesystem& filesystem,
+ std::string_view file_path, Strategy mmap_strategy,
+ int64_t max_file_size) {
+ if (max_file_size <= 0 || max_file_size > kMaxFileSize) {
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "Invalid max file size %" PRId64 " for MemoryMappedFile",
+ max_file_size));
+ }
+
+ const std::string file_path_str(file_path);
+ int64_t file_size = filesystem.FileExists(file_path_str.c_str())
+ ? filesystem.GetFileSize(file_path_str.c_str())
+ : 0;
+ if (file_size == Filesystem::kBadFileSize) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Bad file size for file ", file_path));
+ }
+
+ return MemoryMappedFile(filesystem, file_path, mmap_strategy, max_file_size,
+ file_size);
+}
+
+/* static */ libtextclassifier3::StatusOr<MemoryMappedFile>
+MemoryMappedFile::Create(const Filesystem& filesystem,
+ std::string_view file_path, Strategy mmap_strategy,
+ int64_t max_file_size, int64_t pre_mapping_file_offset,
+ int64_t pre_mapping_mmap_size) {
+ if (max_file_size <= 0 || max_file_size > kMaxFileSize) {
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "Invalid max file size %" PRId64 " for MemoryMappedFile",
+ max_file_size));
+ }
+
+ // We need at least pre_mapping_file_offset + pre_mapping_mmap_size bytes for
+ // the underlying file size, so max_file_size should be at least
+ // pre_mapping_file_offset + pre_mapping_mmap_size. Safe integer check.
+ if (pre_mapping_file_offset < 0 || pre_mapping_mmap_size < 0 ||
+ pre_mapping_file_offset > max_file_size - pre_mapping_mmap_size) {
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "Invalid pre-mapping file offset %" PRId64 " and mmap size %" PRId64
+ " with max file size %" PRId64 "for MemoryMappedFile",
+ pre_mapping_file_offset, pre_mapping_mmap_size, max_file_size));
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile mmapped_file,
+ Create(filesystem, file_path, mmap_strategy, max_file_size));
+
+ if (pre_mapping_mmap_size > 0) {
+ ICING_RETURN_IF_ERROR(
+ mmapped_file.RemapImpl(pre_mapping_file_offset, pre_mapping_mmap_size));
+ }
+
+ return std::move(mmapped_file);
+}
+
MemoryMappedFile::MemoryMappedFile(const Filesystem& filesystem,
- const std::string_view file_path,
- Strategy mmap_strategy)
+ std::string_view file_path,
+ Strategy mmap_strategy,
+ int64_t max_file_size, int64_t file_size)
: filesystem_(&filesystem),
file_path_(file_path),
- strategy_(mmap_strategy) {}
+ strategy_(mmap_strategy),
+ max_file_size_(max_file_size),
+ file_size_(file_size),
+ mmap_result_(nullptr),
+ file_offset_(0),
+ mmap_size_(0),
+ alignment_adjustment_(0) {}
MemoryMappedFile::MemoryMappedFile(MemoryMappedFile&& other)
// Make sure that mmap_result_ is a nullptr before we call Swap. We don't
@@ -58,82 +124,44 @@ MemoryMappedFile::~MemoryMappedFile() { Unmap(); }
void MemoryMappedFile::MemoryMappedFile::Unmap() {
if (mmap_result_ != nullptr) {
- munmap(mmap_result_, adjusted_mmap_size_);
+ munmap(mmap_result_, adjusted_mmap_size());
mmap_result_ = nullptr;
}
file_offset_ = 0;
- region_ = nullptr;
- region_size_ = 0;
- adjusted_mmap_size_ = 0;
+ mmap_size_ = 0;
+ alignment_adjustment_ = 0;
}
-libtextclassifier3::Status MemoryMappedFile::Remap(size_t file_offset,
- size_t mmap_size) {
- if (mmap_size == 0) {
- // First unmap any previously mmapped region.
- Unmap();
- return libtextclassifier3::Status::OK;
- }
-
- size_t aligned_offset =
- math_util::RoundDownTo(file_offset, system_page_size());
- size_t alignment_adjustment = file_offset - aligned_offset;
- size_t adjusted_mmap_size = alignment_adjustment + mmap_size;
+libtextclassifier3::Status MemoryMappedFile::Remap(int64_t file_offset,
+ int64_t mmap_size) {
+ return RemapImpl(file_offset, mmap_size);
+}
- int mmap_flags = 0;
- // Determines if the mapped region should just be readable or also writable.
- int protection_flags = 0;
- ScopedFd fd;
- switch (strategy_) {
- case Strategy::READ_ONLY: {
- mmap_flags = MAP_PRIVATE;
- protection_flags = PROT_READ;
- fd.reset(filesystem_->OpenForRead(file_path_.c_str()));
- break;
- }
- case Strategy::READ_WRITE_AUTO_SYNC: {
- mmap_flags = MAP_SHARED;
- protection_flags = PROT_READ | PROT_WRITE;
- fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
- break;
- }
- case Strategy::READ_WRITE_MANUAL_SYNC: {
- mmap_flags = MAP_PRIVATE;
- protection_flags = PROT_READ | PROT_WRITE;
- // TODO(cassiewang) MAP_PRIVATE effectively makes it a read-only file.
- // figure out if we can open this file in read-only mode.
- fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
- break;
- }
- default:
- return absl_ports::UnknownError(IcingStringUtil::StringPrintf(
- "Invalid value in switch statement: %d", strategy_));
+libtextclassifier3::Status MemoryMappedFile::GrowAndRemapIfNecessary(
+ int64_t new_file_offset, int64_t new_mmap_size) {
+ // We need at least new_file_offset + new_mmap_size bytes for the underlying
+ // file size, and it should not exceed max_file_size_. Safe integer check.
+ if (new_file_offset < 0 || new_mmap_size < 0 ||
+ new_file_offset > max_file_size_ - new_mmap_size) {
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "Invalid new file offset %" PRId64 " and new mmap size %" PRId64
+ " with max file size %" PRId64 "for MemoryMappedFile",
+ new_file_offset, new_mmap_size, max_file_size_));
}
- if (!fd.is_valid()) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Unable to open file meant to be mmapped: ", file_path_));
+ if (new_mmap_size == 0) {
+ // Unmap any previously mmapped region.
+ Unmap();
+ return libtextclassifier3::Status::OK;
}
- void* mmap_result = mmap(nullptr, adjusted_mmap_size, protection_flags,
- mmap_flags, fd.get(), aligned_offset);
+ ICING_RETURN_IF_ERROR(GrowFileSize(new_file_offset + new_mmap_size));
- if (mmap_result == MAP_FAILED) {
- mmap_result = nullptr;
- return absl_ports::InternalError(absl_ports::StrCat(
- "Failed to mmap region due to error: ", strerror(errno)));
+ if (new_file_offset != file_offset_ || new_mmap_size > mmap_size_) {
+ ICING_RETURN_IF_ERROR(RemapImpl(new_file_offset, new_mmap_size));
}
- // Now we know that we have successfully created a new mapping. We can free
- // the old one and switch to the new one.
- Unmap();
-
- mmap_result_ = mmap_result;
- file_offset_ = file_offset;
- region_ = reinterpret_cast<char*>(mmap_result_) + alignment_adjustment;
- region_size_ = mmap_size;
- adjusted_mmap_size_ = adjusted_mmap_size;
return libtextclassifier3::Status::OK;
}
@@ -143,13 +171,27 @@ libtextclassifier3::Status MemoryMappedFile::PersistToDisk() {
"Attempting to PersistToDisk on a read-only file: ", file_path_));
}
- if (region_ == nullptr) {
+ if (mmap_result_ == nullptr) {
// Nothing mapped to sync.
return libtextclassifier3::Status::OK;
}
+ // Sync actual file size via system call.
+ int64_t actual_file_size = filesystem_->GetFileSize(file_path_.c_str());
+ if (actual_file_size == Filesystem::kBadFileSize) {
+ return absl_ports::InternalError("Unable to retrieve file size");
+ }
+ file_size_ = actual_file_size;
+
if (strategy_ == Strategy::READ_WRITE_AUTO_SYNC &&
- msync(mmap_result_, adjusted_mmap_size_, MS_SYNC) != 0) {
+ // adjusted_mmap_size(), which is the mmap size after alignment
+ // adjustment, may be larger than the actual underlying file size since we
+ // can pre-mmap a large memory region before growing the file. Therefore,
+ // we should std::min with file_size_ - adjusted_offset() as the msync
+ // size.
+ msync(mmap_result_,
+ std::min(file_size_ - adjusted_offset(), adjusted_mmap_size()),
+ MS_SYNC) != 0) {
return absl_ports::InternalError(
absl_ports::StrCat("Unable to sync file using msync(): ", file_path_));
}
@@ -159,7 +201,13 @@ libtextclassifier3::Status MemoryMappedFile::PersistToDisk() {
// can't be synced using msync(). So, we have to directly write to the
// underlying file to update it.
if (strategy_ == Strategy::READ_WRITE_MANUAL_SYNC &&
- !filesystem_->PWrite(file_path_.c_str(), 0, region(), region_size())) {
+ // Contents before file_offset_ won't be modified by the caller, so we
+ // only need to PWrite contents starting at file_offset_. mmap_size_ may
+ // be larger than the actual underlying file size since we can pre-mmap a
+ // large memory before growing the file. Therefore, we should std::min
+ // with file_size_ - file_offset_ as the PWrite size.
+ !filesystem_->PWrite(file_path_.c_str(), file_offset_, region(),
+ std::min(mmap_size_, file_size_ - file_offset_))) {
return absl_ports::InternalError(
absl_ports::StrCat("Unable to sync file using PWrite(): ", file_path_));
}
@@ -180,22 +228,161 @@ libtextclassifier3::Status MemoryMappedFile::OptimizeFor(
madvise_flag = MADV_SEQUENTIAL;
}
- if (madvise(mmap_result_, adjusted_mmap_size_, madvise_flag) != 0) {
+ if (madvise(mmap_result_, adjusted_mmap_size(), madvise_flag) != 0) {
return absl_ports::InternalError(absl_ports::StrCat(
"Unable to madvise file ", file_path_, "; Error: ", strerror(errno)));
}
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::Status MemoryMappedFile::GrowFileSize(
+ int64_t new_file_size) {
+ // Early return if new_file_size doesn't exceed the cached file size
+ // (file_size_). It saves a system call for getting the actual file size and
+ // reduces latency significantly.
+ if (new_file_size <= file_size_) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ if (new_file_size > max_file_size_) {
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "new file size %" PRId64 " exceeds maximum file size allowed, %" PRId64
+ " bytes",
+ new_file_size, max_file_size_));
+ }
+
+ // Sync actual file size via system call.
+ int64_t actual_file_size = filesystem_->GetFileSize(file_path_.c_str());
+ if (actual_file_size == Filesystem::kBadFileSize) {
+ return absl_ports::InternalError("Unable to retrieve file size");
+ }
+ file_size_ = actual_file_size;
+
+ // Early return again if new_file_size doesn't exceed actual_file_size. It
+ // saves system calls for opening and closing file descriptor.
+ if (new_file_size <= actual_file_size) {
+ return libtextclassifier3::Status::OK;
+ }
+
+ if (strategy_ == Strategy::READ_ONLY) {
+ return absl_ports::FailedPreconditionError(absl_ports::StrCat(
+ "Attempting to grow a read-only file: ", file_path_));
+ }
+
+ // We use Write here rather than Grow because Grow doesn't actually allocate
+ // an underlying disk block. This can lead to problems with mmap because mmap
+ // has no effective way to signal that it was impossible to allocate the disk
+ // block and ends up crashing instead. Write will force the allocation of
+ // these blocks, which will ensure that any failure to grow will surface here.
+ int64_t page_size = system_page_size();
+ auto buf = std::make_unique<uint8_t[]>(page_size);
+ int64_t size_to_write = std::min(page_size - (file_size_ % page_size),
+ new_file_size - file_size_);
+ ScopedFd sfd(filesystem_->OpenForAppend(file_path_.c_str()));
+ if (!sfd.is_valid()) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Couldn't open file ", file_path_));
+ }
+ while (size_to_write > 0 && file_size_ < new_file_size) {
+ if (!filesystem_->Write(sfd.get(), buf.get(), size_to_write)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Couldn't grow file ", file_path_));
+ }
+ file_size_ += size_to_write;
+ size_to_write = std::min(page_size - (file_size_ % page_size),
+ new_file_size - file_size_);
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status MemoryMappedFile::RemapImpl(int64_t new_file_offset,
+ int64_t new_mmap_size) {
+ if (new_file_offset < 0) {
+ return absl_ports::OutOfRangeError("Invalid file offset");
+ }
+
+ if (new_mmap_size < 0) {
+ return absl_ports::OutOfRangeError("Invalid mmap size");
+ }
+
+ if (new_mmap_size == 0) {
+ // First unmap any previously mmapped region.
+ Unmap();
+ return libtextclassifier3::Status::OK;
+ }
+
+ int64_t new_aligned_offset =
+ math_util::RoundDownTo(new_file_offset, system_page_size());
+ int64_t new_alignment_adjustment = new_file_offset - new_aligned_offset;
+ int64_t new_adjusted_mmap_size = new_alignment_adjustment + new_mmap_size;
+
+ int mmap_flags = 0;
+ // Determines if the mapped region should just be readable or also writable.
+ int protection_flags = 0;
+ ScopedFd fd;
+ switch (strategy_) {
+ case Strategy::READ_ONLY: {
+ mmap_flags = MAP_PRIVATE;
+ protection_flags = PROT_READ;
+ fd.reset(filesystem_->OpenForRead(file_path_.c_str()));
+ break;
+ }
+ case Strategy::READ_WRITE_AUTO_SYNC: {
+ mmap_flags = MAP_SHARED;
+ protection_flags = PROT_READ | PROT_WRITE;
+ fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
+ break;
+ }
+ case Strategy::READ_WRITE_MANUAL_SYNC: {
+ mmap_flags = MAP_PRIVATE;
+ protection_flags = PROT_READ | PROT_WRITE;
+ // TODO(cassiewang) MAP_PRIVATE effectively makes it a read-only file.
+ // figure out if we can open this file in read-only mode.
+ fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
+ break;
+ }
+ default:
+ return absl_ports::UnknownError(IcingStringUtil::StringPrintf(
+ "Invalid value in switch statement: %d", strategy_));
+ }
+
+ if (!fd.is_valid()) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Unable to open file meant to be mmapped: ", file_path_));
+ }
+
+ void* new_mmap_result =
+ mmap(nullptr, new_adjusted_mmap_size, protection_flags, mmap_flags,
+ fd.get(), new_aligned_offset);
+
+ if (new_mmap_result == MAP_FAILED) {
+ new_mmap_result = nullptr;
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to mmap region due to error: ", strerror(errno)));
+ }
+
+ // Now we know that we have successfully created a new mapping. We can free
+ // the old one and switch to the new one.
+ Unmap();
+
+ mmap_result_ = new_mmap_result;
+ file_offset_ = new_file_offset;
+ mmap_size_ = new_mmap_size;
+ alignment_adjustment_ = new_alignment_adjustment;
+ return libtextclassifier3::Status::OK;
+}
+
void MemoryMappedFile::Swap(MemoryMappedFile* other) {
std::swap(filesystem_, other->filesystem_);
std::swap(file_path_, other->file_path_);
std::swap(strategy_, other->strategy_);
- std::swap(file_offset_, other->file_offset_);
- std::swap(region_, other->region_);
- std::swap(region_size_, other->region_size_);
- std::swap(adjusted_mmap_size_, other->adjusted_mmap_size_);
+ std::swap(max_file_size_, other->max_file_size_);
+ std::swap(file_size_, other->file_size_);
std::swap(mmap_result_, other->mmap_result_);
+ std::swap(file_offset_, other->file_offset_);
+ std::swap(mmap_size_, other->mmap_size_);
+ std::swap(alignment_adjustment_, other->alignment_adjustment_);
}
} // namespace lib
diff --git a/icing/file/memory-mapped-file.h b/icing/file/memory-mapped-file.h
index 1d31a42..54507af 100644
--- a/icing/file/memory-mapped-file.h
+++ b/icing/file/memory-mapped-file.h
@@ -21,18 +21,54 @@
// faster reads as well as background-sync vs manual-sync of changes to disk.
// For more details, see comments at MemoryMappedFile::Strategy.
//
-// Usage:
+// ** Usage 1: pre-mmap large memory and grow the underlying file internally **
//
-// MemoryMappedFile mmapped_file(filesystem, "/file.pb", READ_WRITE_AUTO_SYNC));
-// mmapped_file->Remap(0, 16* 1024); // load the first 16K of the file.
+// // Create MemoryMappedFile instance.
+// ICING_ASSIGN_OR_RETURN(
+// std::unique_ptr<MemoryMappedFile> mmapped_file,
+// MemoryMappedFile::Create(filesystem, "/file.pb",
+// READ_WRITE_AUTO_SYNC,
+// max_file_size,
+// /*pre_mapping_file_offset=*/0,
+// /*pre_mapping_mmap_size=*/1024 * 1024));
//
+// // Found that we need 4K bytes for the file and mmapped region.
+// mmapped_file->GrowAndRemapIfNecessary(
+// /*new_file_offset=*/0, /*new_mmap_size=*/4 * 1024);
+// char read_byte = mmapped_file->region()[4000];
+// mmapped_file->mutable_region()[4001] = write_byte;
+//
+// mmapped_file->PersistToDisk(); // Optional; immediately writes changes to
+// disk.
+//
+// // Found that we need 2048 * 1024 bytes for the file and mmapped region.
+// mmapped_file->GrowAndRemapIfNecessary(
+// /*new_file_offset=*/0, /*new_mmap_size=*/2048 * 1024);
+// mmapped_file->mutable_region()[2000 * 1024] = write_byte;
+// mmapped_file.reset();
+//
+// ** Usage 2: load by segments **
+//
+// ICING_ASSIGN_OR_RETURN(
+// std::unique_ptr<MemoryMappedFile> mmapped_file,
+// MemoryMappedFile::Create(filesystem, "/file.pb",
+// READ_WRITE_AUTO_SYNC,
+// max_file_size,
+// /*pre_mapping_file_offset=*/0,
+// /*pre_mapping_mmap_size=*/16 * 1024));
+//
+// // load the first 16K.
+// mmapped_file->GrowAndRemapIfNecessary(
+// /*new_file_offset=*/0, /*new_mmap_size=*/16 * 1024);
// char read_byte = mmapped_file->region()[100];
// mmapped_file->mutable_region()[10] = write_byte;
//
// mmapped_file->PersistToDisk(); // Optional; immediately writes changes to
// disk.
//
-// mmapped_file->Remap(16*1024, 16* 1024); // load the next 16K.
+// // load the next 16K.
+// mmapped_file->GrowAndRemapIfNecessary(
+// /*new_file_offset=*/16 * 1024, /*new_mmap_size=*/16 * 1024);
// mmapped_file->mutable_region()[10] = write_byte;
// mmapped_file.reset();
@@ -41,12 +77,14 @@
#include <unistd.h>
+#include <algorithm>
#include <cstdint>
#include <memory>
#include <string>
#include <string_view>
#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/file/filesystem.h"
namespace icing {
@@ -54,8 +92,9 @@ namespace lib {
class MemoryMappedFile {
public:
- static size_t __attribute__((const)) system_page_size() {
- static const size_t page_size = sysconf(_SC_PAGE_SIZE);
+ static int64_t __attribute__((const)) system_page_size() {
+ static const int64_t page_size =
+ static_cast<int64_t>(sysconf(_SC_PAGE_SIZE));
return page_size;
}
@@ -71,26 +110,95 @@ class MemoryMappedFile {
// Memory map a read-write file into a writable memory region. Changes made
// to this region will never be auto-synced to the underlying file. Unless
// the caller explicitly calls PersistToDisk(), all changes will be lost
- // when the
- // MemoryMappedFile is destroyed.
+ // when the MemoryMappedFile is destroyed.
READ_WRITE_MANUAL_SYNC,
};
- // file_path : Full path of the file that needs to be memory-mapped.
- MemoryMappedFile(const Filesystem& filesystem, std::string_view file_path,
- Strategy mmap_strategy);
+ // Absolute max file size, 16 GiB.
+ static constexpr int64_t kMaxFileSize = INT64_C(1) << 34;
+
+ // Default max file size, 1 MiB.
+ static constexpr int64_t kDefaultMaxFileSize = INT64_C(1) << 20;
+
+ // Creates a new MemoryMappedFile to read/write content to.
+ //
+ // filesystem : Object to make system level calls
+ // file_path : Full path of the file that needs to be memory-mapped.
+ // mmap_strategy : Strategy/optimizations to access the content.
+ // max_file_size : Maximum file size for MemoryMappedFile, default
+ // kDefaultMaxFileSize.
+ //
+ // Returns:
+ // A MemoryMappedFile instance on success
+ // OUT_OF_RANGE_ERROR if max_file_size is invalid
+ // INTERNAL_ERROR on I/O error
+ static libtextclassifier3::StatusOr<MemoryMappedFile> Create(
+ const Filesystem& filesystem, std::string_view file_path,
+ Strategy mmap_strategy, int64_t max_file_size = kDefaultMaxFileSize);
+
+ // Creates a new MemoryMappedFile to read/write content to. It remaps when
+ // creating the instance, but doesn't check or grow the actual file size, so
+ // the caller should call GrowAndRemapIfNecessary before accessing region.
+ //
+ // filesystem : Object to make system level calls
+ // file_path : Full path of the file that needs to be memory-mapped.
+ // mmap_strategy : Strategy/optimizations to access the content.
+ // max_file_size : Maximum file size for MemoryMappedFile.
+ // pre_mapping_file_offset : The offset of the file to be memory mapped.
+ // pre_mapping_mmap_size : mmap size for pre-mapping.
+ //
+ // Returns:
+ // A MemoryMappedFile instance on success
+ // OUT_OF_RANGE_ERROR if max_file_size, file_offset, or mmap_size is invalid
+ // INTERNAL_ERROR on I/O error
+ static libtextclassifier3::StatusOr<MemoryMappedFile> Create(
+ const Filesystem& filesystem, std::string_view file_path,
+ Strategy mmap_strategy, int64_t max_file_size,
+ int64_t pre_mapping_file_offset, int64_t pre_mapping_mmap_size);
+
+ // Delete copy constructor and assignment operator.
MemoryMappedFile(const MemoryMappedFile& other) = delete;
- MemoryMappedFile(MemoryMappedFile&& other);
MemoryMappedFile& operator=(const MemoryMappedFile& other) = delete;
+
+ MemoryMappedFile(MemoryMappedFile&& other);
MemoryMappedFile& operator=(MemoryMappedFile&& other);
+
// Frees any region that is still memory-mapped region.
~MemoryMappedFile();
+ // TODO(b/247671531): migrate all callers to use GrowAndRemapIfNecessary and
+ // deprecate this API.
+ //
// Memory-map the newly specified region within the file specified by
// file_offset and mmap_size. Unmaps any previously mmapped region.
+ // It doesn't handle the underlying file growth.
//
// Returns any encountered IO error.
- libtextclassifier3::Status Remap(size_t file_offset, size_t mmap_size);
+ libtextclassifier3::Status Remap(int64_t file_offset, int64_t mmap_size);
+
+ // Attempt to memory-map the newly specified region within the file specified
+ // by new_file_offset and new_mmap_size. It handles mmap and file growth
+ // intelligently.
+ // - Compute least file size needed according to new_file_offset and
+ // new_mmap_size, and compare with the current file size. If requiring file
+ // growth, then grow the underlying file (Write) or return error if
+ // strategy_ is READ_ONLY.
+ // - If new_file_offset is different from the current file_offset_ or
+ // new_mmap_size is greater than the current mmap_size_, then memory-map
+ // the newly specified region and unmap any previously mmapped region.
+ //
+ // This API is useful for file growth since it grows the underlying file
+ // internally and handles remapping intelligently. By pre-mmapping a large
+ // memory, we only need to grow the underlying file (Write) without remapping
+ // in each round of growth, which significantly reduces the cost of system
+ // call and memory paging after remap.
+ //
+ // Returns:
+ // OK on success
+ // OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid
+ // Any error from GrowFileSize() and RemapImpl()
+ libtextclassifier3::Status GrowAndRemapIfNecessary(int64_t new_file_offset,
+ int64_t new_mmap_size);
// unmap and free-up the region that has currently been memory mapped.
void Unmap();
@@ -129,35 +237,147 @@ class MemoryMappedFile {
};
libtextclassifier3::Status OptimizeFor(AccessPattern access_pattern);
+ Strategy strategy() const { return strategy_; }
+
+ int64_t max_file_size() const { return max_file_size_; }
+
// Accessors to the memory-mapped region. Returns null if nothing is mapped.
- const char* region() const { return region_; }
- char* mutable_region() { return region_; }
+ const char* region() const {
+ return reinterpret_cast<const char*>(mmap_result_) + alignment_adjustment_;
+ }
+ char* mutable_region() {
+ return reinterpret_cast<char*>(mmap_result_) + alignment_adjustment_;
+ }
- size_t region_size() const { return region_size_; }
- Strategy strategy() const { return strategy_; }
+ int64_t file_offset() const { return file_offset_; }
+
+ // TODO(b/247671531): remove this API after migrating all callers to use
+ // GrowAndRemapIfNecessary.
+ int64_t region_size() const { return mmap_size_; }
+
+ // The size that is safe for the client to read/write. This is only valid for
+ // callers that use GrowAndRemapIfNecessary.
+ int64_t available_size() const {
+ return std::min(mmap_size_,
+ std::max(INT64_C(0), file_size_ - file_offset_));
+ }
private:
+ explicit MemoryMappedFile(const Filesystem& filesystem,
+ std::string_view file_path, Strategy mmap_strategy,
+ int64_t max_file_size, int64_t file_size);
+
+ // Grow the underlying file to new_file_size.
+ // Note: it is possible that Write() (implemented in the file system call
+ // library) grows the underlying file partially and returns error due to
+ // failures, so the cached file_size_ may contain out-of-date value, but it is
+ // still guaranteed that file_size_ is always smaller or equal to the actual
+ // file size. In the next round of growing:
+ // - If new_file_size is not greater than file_size_, then we're still
+ // confident that the actual file size is large enough and therefore skip
+ // the grow process.
+ // - If new_file_size is greater than file_size_, then we will invoke the
+ // system call to sync the actual file size. At this moment, file_size_ is
+ // the actual file size and therefore we can grow the underlying file size
+ // correctly.
+ //
+ // Returns:
+ // OK on success
+ // FAILED_PRECONDITION_ERROR if requiring file growth and strategy_ is
+ // READ_ONLY
+ // OUT_OF_RANGE_ERROR if new_mmap_size exceeds max_file_size_
+ // INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status GrowFileSize(int64_t new_file_size);
+
+ // Memory-map the newly specified region within the file specified by
+ // new_file_offset and new_mmap_size. Unmaps any previously mmapped region.
+ // It doesn't handle the underlying file growth.
+ //
+ // Returns:
+ // OK on success
+ // OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid
+ // INTERNAL_ERROR on I/O error
+ libtextclassifier3::Status RemapImpl(int64_t new_file_offset,
+ int64_t new_mmap_size);
+
// Swaps the contents of this with other.
void Swap(MemoryMappedFile* other);
+ int64_t adjusted_offset() const {
+ return file_offset_ - alignment_adjustment_;
+ }
+
+ int64_t adjusted_mmap_size() const {
+ return alignment_adjustment_ + mmap_size_;
+ }
+
// Cached constructor params.
const Filesystem* filesystem_;
std::string file_path_;
Strategy strategy_;
- // Offset within the file at which the current memory-mapped region starts.
- size_t file_offset_ = 0;
+ // Raw file related fields:
+ // - max_file_size_
+ // - file_size_
+
+ // Max file size for MemoryMappedFile. It should not exceed the absolute max
+ // size of memory mapped file (kMaxFileSize). It is only used in
+ // GrowAndRemapIfNecessary(), the new API that handles underlying file growth
+ // internally and remaps intelligently.
+ //
+ // Note: max_file_size_ will be specified in runtime and the caller should
+ // make sure its value is correct and reasonable.
+ int64_t max_file_size_;
- // Region that is currently memory-mapped.
- char* region_ = nullptr;
- size_t region_size_ = 0;
+ // Cached file size to avoid calling system call too frequently. It is only
+ // used in GrowAndRemapIfNecessary(), the new API that handles underlying file
+ // growth internally and remaps intelligently.
+ //
+ // Note: it is guaranteed that file_size_ is smaller or equal to the actual
+ // file size as long as the underlying file hasn't been truncated or deleted
+ // externally. See GrowFileSize() for more details.
+ int64_t file_size_;
- // The actual size of the region we mmapped. As the requested region might not
- // align with system pages, we often mmap more bytes than requested.
- size_t adjusted_mmap_size_ = 0;
+ // Memory mapped related fields:
+ // - mmap_result_
+ // - file_offset_
+ // - alignment_adjustment_
+ // - mmap_size_
// Raw pointer (or error) returned by calls to mmap().
- void* mmap_result_ = nullptr;
+ void* mmap_result_;
+
+ // Offset within the file at which the current memory-mapped region starts.
+ int64_t file_offset_;
+
+ // Size that is currently memory-mapped.
+ // Note that the mmapped size can be larger than the underlying file size. We
+ // can reduce remapping by pre-mmapping a large memory and grow the file size
+ // later. See GrowAndRemapIfNecessary().
+ int64_t mmap_size_;
+
+ // The difference between file_offset_ and the actual adjusted (aligned)
+ // offset.
+ // Since mmap requires the offset to be a multiple of system page size, we
+ // have to align file_offset_ to the last multiple of system page size.
+ int64_t alignment_adjustment_;
+
+ // E.g. system_page_size = 5, RemapImpl(/*new_file_offset=*/8, mmap_size)
+ //
+ // File layout: xxxxx xxxxx xxxxx xxxxx xxxxx xx
+ // file_offset_: 8
+ // adjusted_offset(): 5
+ // region()/mutable_region(): |
+ // mmap_result_: |
+ //
+ // alignment_adjustment_: file_offset_ - adjusted_offset()
+ // mmap_size_: mmap_size
+ // region_size(): mmap_size_
+ // available_size(): std::min(mmap_size_,
+ // std::max(0, file_size_ - file_offset_))
+ // region_range: [file_offset_, file_offset + mmap_size)
+ // adjusted_mmap_size(): alignment_adjustment_ + mmap_size_
+ // adjusted_mmap_range: [alignment_offset, file_offset + mmap_size)
};
} // namespace lib
diff --git a/icing/file/memory-mapped-file_test.cc b/icing/file/memory-mapped-file_test.cc
new file mode 100644
index 0000000..16f76e6
--- /dev/null
+++ b/icing/file/memory-mapped-file_test.cc
@@ -0,0 +1,668 @@
+// Copyright (C) 2022 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/memory-mapped-file.h"
+
+#include <cstdint>
+#include <limits>
+#include <string>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+using ::testing::DoDefault;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::IsNull;
+using ::testing::Le;
+using ::testing::Not;
+using ::testing::NotNull;
+using ::testing::Return;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+class MemoryMappedFileTest : public ::testing::Test {
+ protected:
+ void SetUp() override { file_path_ = GetTestTempDir() + "/mmap_test_file"; }
+
+ void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
+
+ const Filesystem& filesystem() const { return filesystem_; }
+
+ Filesystem filesystem_;
+ std::string file_path_;
+};
+
+TEST_F(MemoryMappedFileTest, Create) {
+ constexpr int max_file_size = 8192;
+ MemoryMappedFile::Strategy stragegy =
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC;
+ // Create MemoryMappedFile
+ ICING_ASSERT_OK_AND_ASSIGN(MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ stragegy, max_file_size));
+
+ EXPECT_THAT(mmapped_file.strategy(), Eq(stragegy));
+ EXPECT_THAT(mmapped_file.max_file_size(), Eq(max_file_size));
+ EXPECT_THAT(mmapped_file.region(), IsNull());
+ EXPECT_THAT(mmapped_file.mutable_region(), IsNull());
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(0));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(0));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(0));
+}
+
+TEST_F(MemoryMappedFileTest, CreateFromExistingFile) {
+ int init_file_size = 100;
+ {
+ // Initialize file
+ ScopedFd sfd(filesystem_.OpenForWrite(file_path_.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+ auto buf = std::make_unique<char[]>(init_file_size);
+ ASSERT_TRUE(filesystem_.Write(sfd.get(), buf.get(), init_file_size));
+ }
+
+ constexpr int max_file_size = 8192;
+ MemoryMappedFile::Strategy stragegy =
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC;
+ // Create MemoryMappedFile from an existing file
+ ICING_ASSERT_OK_AND_ASSIGN(MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ stragegy, max_file_size));
+
+ EXPECT_THAT(mmapped_file.strategy(), Eq(stragegy));
+ EXPECT_THAT(mmapped_file.max_file_size(), Eq(max_file_size));
+ EXPECT_THAT(mmapped_file.region(), IsNull());
+ EXPECT_THAT(mmapped_file.mutable_region(), IsNull());
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(0));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(0));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(0));
+}
+
+TEST_F(MemoryMappedFileTest, CreateWithInvalidMaxFileSize) {
+ EXPECT_THAT(
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(MemoryMappedFile::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/MemoryMappedFile::kMaxFileSize + 1),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ EXPECT_THAT(MemoryMappedFile::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/-1, /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/8192),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(
+ MemoryMappedFile::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ /*max_file_size=*/MemoryMappedFile::kMaxFileSize + 1,
+ /*pre_mapping_file_offset=*/0, /*pre_mapping_mmap_size=*/8192),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(MemoryMappedFileTest, CreateWithPreMappingInfo) {
+ constexpr int max_file_size = 8192;
+ constexpr int pre_mapping_file_offset = 99;
+ constexpr int pre_mapping_mmap_size = 2000;
+ MemoryMappedFile::Strategy stragegy =
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC;
+ // Create MemoryMappedFile with pre-mapping file_offset and mmap_size
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem_, file_path_, stragegy, max_file_size,
+ pre_mapping_file_offset, pre_mapping_mmap_size));
+
+ EXPECT_THAT(mmapped_file.strategy(), Eq(stragegy));
+ EXPECT_THAT(mmapped_file.max_file_size(), Eq(max_file_size));
+ EXPECT_THAT(mmapped_file.region(), NotNull());
+ EXPECT_THAT(mmapped_file.mutable_region(), NotNull());
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(0));
+
+ // Manually grow the file externally and mutate region. There should be no
+ // memory error.
+ {
+ ScopedFd sfd(filesystem_.OpenForAppend(file_path_.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+ int grow_size = 4096;
+ auto buf = std::make_unique<char[]>(grow_size);
+ ASSERT_TRUE(filesystem_.Write(sfd.get(), buf.get(), grow_size));
+ }
+ mmapped_file.mutable_region()[0] = 'a';
+ ICING_EXPECT_OK(mmapped_file.PersistToDisk());
+
+ {
+ ScopedFd sfd(filesystem_.OpenForRead(file_path_.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+ int buf_size = 10;
+ auto buf = std::make_unique<char[]>(buf_size);
+ ASSERT_TRUE(filesystem_.PRead(sfd.get(), buf.get(), buf_size,
+ pre_mapping_file_offset));
+ EXPECT_THAT(buf.get()[0], Eq('a'));
+ }
+}
+
+TEST_F(MemoryMappedFileTest, CreateWithInvalidPreMappingInfo) {
+ int page_size = MemoryMappedFile::system_page_size();
+ int max_file_size = page_size * 2;
+
+ // Negative file_offset
+ EXPECT_THAT(
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ max_file_size,
+ /*pre_mapping_file_offset=*/-1,
+ /*pre_mapping_mmap_size=*/page_size),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // Negative mmap_size
+ EXPECT_THAT(
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ max_file_size, /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // pre_mapping_file_offset + pre_mapping_mmap_size > max_file_size.
+ int pre_mapping_file_offset = 99;
+ int pre_mapping_mmap_size = max_file_size - pre_mapping_file_offset + 1;
+ EXPECT_THAT(
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ max_file_size, pre_mapping_file_offset,
+ pre_mapping_mmap_size),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // Edge cases to make sure the implementation of range check won't have
+ // integer overflow bug.
+ EXPECT_THAT(
+ MemoryMappedFile::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+ /*pre_mapping_file_offset=*/99,
+ /*pre_mapping_mmap_size=*/std::numeric_limits<int64_t>::max()),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(MemoryMappedFile::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ max_file_size, /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/INT64_C(-1) *
+ (std::numeric_limits<int64_t>::max() - max_file_size)),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(
+ MemoryMappedFile::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, max_file_size,
+ /*pre_mapping_file_offset=*/INT64_C(-1) *
+ (std::numeric_limits<int64_t>::max() - max_file_size),
+ /*pre_mapping_mmap_size=*/page_size),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+// TODO(b/247671531): remove this test after deprecating Remap
+TEST_F(MemoryMappedFileTest, RemapZeroMmapSizeShouldUnmap) {
+ // Create MemoryMappedFile
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ MemoryMappedFile::kDefaultMaxFileSize));
+
+ int page_size = MemoryMappedFile::system_page_size();
+ int file_offset = 99;
+ int mmap_size = page_size * 2 - file_offset;
+ ICING_ASSERT_OK(mmapped_file.Remap(file_offset, mmap_size));
+ ASSERT_THAT(mmapped_file.region(), NotNull());
+
+ // Call GrowAndRemapIfNecessary with any file_offset and new_mmap_size = 0.
+ // The original mmapped region should be unmapped.
+ ICING_EXPECT_OK(mmapped_file.Remap(file_offset, /*mmap_size=*/0));
+ EXPECT_THAT(mmapped_file.region(), IsNull());
+}
+
+TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessary) {
+ int page_size = MemoryMappedFile::system_page_size();
+ int pre_mapping_file_offset = 99;
+ int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset;
+ {
+ // Create MemoryMappedFile with pre-mapping file_offset and mmap_size
+ // without growing the file.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ MemoryMappedFile::kDefaultMaxFileSize, pre_mapping_file_offset,
+ pre_mapping_mmap_size));
+ ASSERT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(0));
+ const char* original_region = mmapped_file.region();
+
+ // Call GrowAndRemapIfNecessary with same file_offset and new mmap_size that
+ // doesn't exceed pre_mapping_mmap_size. The underlying file size should
+ // grow correctly, but there should be no remap.
+ int new_mmap_size1 = page_size - pre_mapping_file_offset;
+ ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(
+ pre_mapping_file_offset, new_mmap_size1));
+
+ EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+ Eq(pre_mapping_file_offset + new_mmap_size1));
+ EXPECT_THAT(mmapped_file.region(), Eq(original_region));
+ EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region));
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size1));
+
+ // Test it with new_mmap_size2 = pre_mapping_mmap_size
+ int new_mmap_size2 = pre_mapping_mmap_size;
+ ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(
+ pre_mapping_file_offset, new_mmap_size2));
+
+ EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+ Eq(pre_mapping_file_offset + new_mmap_size2));
+ EXPECT_THAT(mmapped_file.region(), Eq(original_region));
+ EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region));
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size2));
+
+ // Write some bytes to region()[0]. It should write the underlying file at
+ // file_offset.
+ mmapped_file.mutable_region()[0] = 'a';
+ ICING_ASSERT_OK(mmapped_file.PersistToDisk());
+ }
+
+ ScopedFd sfd(filesystem_.OpenForRead(file_path_.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+ int buf_size = 1;
+ auto buf = std::make_unique<char[]>(buf_size);
+ ASSERT_TRUE(filesystem_.PRead(sfd.get(), buf.get(), buf_size,
+ pre_mapping_file_offset));
+ EXPECT_THAT(buf.get()[0], Eq('a'));
+}
+
+TEST_F(MemoryMappedFileTest,
+ GrowAndRemapIfNecessaryExceedingPreMappingMmapSize) {
+ int page_size = MemoryMappedFile::system_page_size();
+ int pre_mapping_file_offset = 99;
+ int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset;
+ // Create MemoryMappedFile with pre-mapping file_offset and mmap_size without
+ // growing the file.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ MemoryMappedFile::kDefaultMaxFileSize,
+ pre_mapping_file_offset, pre_mapping_mmap_size));
+ const char* original_region = mmapped_file.region();
+
+ // Call GrowAndRemapIfNecessary with same file offset and new mmap_size that
+ // exceeds pre_mapping_mmap_size (but still below max_file_size). The
+ // underlying file size should grow correctly and the region should be
+ // remapped.
+ int new_mmap_size = page_size * 3 - pre_mapping_file_offset;
+ ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+ new_mmap_size));
+
+ EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+ Eq(pre_mapping_file_offset + new_mmap_size));
+ EXPECT_THAT(mmapped_file.region(), Not(Eq(original_region)));
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(new_mmap_size));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size));
+}
+
+TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryDecreasingMmapSize) {
+ int page_size = MemoryMappedFile::system_page_size();
+ int pre_mapping_file_offset = 99;
+ int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset;
+ // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and
+ // call GrowAndRemapIfNecessary to grow the underlying file.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ MemoryMappedFile::kDefaultMaxFileSize,
+ pre_mapping_file_offset, pre_mapping_mmap_size));
+ ICING_ASSERT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+ pre_mapping_mmap_size));
+
+ const char* original_region = mmapped_file.region();
+ int original_file_size = filesystem_.GetFileSize(file_path_.c_str());
+ ASSERT_THAT(original_file_size,
+ Eq(pre_mapping_file_offset + pre_mapping_mmap_size));
+ ASSERT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+ ASSERT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size));
+
+ // Call GrowAndRemapIfNecessary with same file offset and new mmap_size
+ // smaller than pre_mapping_mmap_size. There should be no file growth/truncate
+ // or remap.
+ int new_mmap_size = page_size - pre_mapping_file_offset;
+ ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+ new_mmap_size));
+
+ EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+ Eq(original_file_size));
+ EXPECT_THAT(mmapped_file.region(), Eq(original_region));
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(pre_mapping_file_offset));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size));
+}
+
+TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryZeroMmapSizeShouldUnmap) {
+ int page_size = MemoryMappedFile::system_page_size();
+ int pre_mapping_file_offset = 99;
+ int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset;
+ // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and
+ // call GrowAndRemapIfNecessary to grow the underlying file.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ MemoryMappedFile::kDefaultMaxFileSize,
+ pre_mapping_file_offset, pre_mapping_mmap_size));
+ ICING_ASSERT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+ pre_mapping_mmap_size));
+
+ int original_file_size = filesystem_.GetFileSize(file_path_.c_str());
+ ASSERT_THAT(original_file_size,
+ Eq(pre_mapping_file_offset + pre_mapping_mmap_size));
+ ASSERT_THAT(mmapped_file.region(), NotNull());
+ ASSERT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+ ASSERT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size));
+
+ // Call GrowAndRemapIfNecessary with any file_offset and new_mmap_size = 0.
+ // There should be no file growth/truncate, but the original mmapped region
+ // should be unmapped.
+ ICING_EXPECT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+ /*new_mmap_size=*/0));
+
+ EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+ Eq(original_file_size));
+ EXPECT_THAT(mmapped_file.region(), IsNull());
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(0));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(0));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(0));
+}
+
+TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryChangeOffset) {
+ int page_size = MemoryMappedFile::system_page_size();
+ int pre_mapping_file_offset = 99;
+ int pre_mapping_mmap_size = page_size * 2 - pre_mapping_file_offset;
+ // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and
+ // call GrowAndRemapIfNecessary to grow the underlying file.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ MemoryMappedFile::kDefaultMaxFileSize,
+ pre_mapping_file_offset, pre_mapping_mmap_size));
+ ICING_ASSERT_OK(mmapped_file.GrowAndRemapIfNecessary(pre_mapping_file_offset,
+ pre_mapping_mmap_size));
+
+ const char* original_region = mmapped_file.region();
+ int original_file_size = filesystem_.GetFileSize(file_path_.c_str());
+ ASSERT_THAT(original_file_size,
+ Eq(pre_mapping_file_offset + pre_mapping_mmap_size));
+ ASSERT_THAT(mmapped_file.region_size(), Eq(pre_mapping_mmap_size));
+ ASSERT_THAT(mmapped_file.available_size(), Eq(pre_mapping_mmap_size));
+
+ // Call GrowAndRemapIfNecessary with different file_offset and new mmap_size
+ // that doesn't require to grow the underlying file. The region should still
+ // be remapped since offset has been changed.
+ int new_file_offset = pre_mapping_file_offset + page_size;
+ int new_mmap_size = page_size * 2 - new_file_offset;
+ ASSERT_THAT(new_file_offset + new_mmap_size, Le(original_file_size));
+ ICING_EXPECT_OK(
+ mmapped_file.GrowAndRemapIfNecessary(new_file_offset, new_mmap_size));
+
+ EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+ Eq(original_file_size));
+ EXPECT_THAT(mmapped_file.region(), Not(Eq(original_region)));
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(new_file_offset));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(new_mmap_size));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(new_mmap_size));
+}
+
+TEST_F(MemoryMappedFileTest, GrowAndRemapIfNecessaryInvalidMmapRegionInfo) {
+ int page_size = MemoryMappedFile::system_page_size();
+ int max_file_size = page_size * 2;
+ // Create MemoryMappedFile with pre-mapping file_offset and mmap_size, and
+ // call GrowAndRemapIfNecessary to grow the underlying file.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ max_file_size,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/page_size * 2));
+
+ // Negative new_file_offset.
+ EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary(
+ /*new_file_offset=*/-1,
+ /*new_mmap_size=*/page_size),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // Negative new_mmap_size
+ EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary(
+ /*new_file_offset=*/0,
+ /*new_mmap_size=*/-1),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // new_file_offset + new_mmap_size > max_file_size.
+ int new_file_offset = 99;
+ int new_mmap_size = max_file_size - new_file_offset + 1;
+ EXPECT_THAT(
+ mmapped_file.GrowAndRemapIfNecessary(new_file_offset, new_mmap_size),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+ // Edge cases to make sure the implementation of range check won't have
+ // integer overflow bug.
+ EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary(
+ /*new_file_offset=*/99,
+ /*new_mmap_size=*/std::numeric_limits<int64_t>::max()),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary(
+ /*new_file_offset=*/0,
+ /*new_mmap_size=*/INT64_C(-1) *
+ (std::numeric_limits<int64_t>::max() - max_file_size)),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ EXPECT_THAT(mmapped_file.GrowAndRemapIfNecessary(
+ /*new_file_offset=*/INT64_C(-1) *
+ (std::numeric_limits<int64_t>::max() - max_file_size),
+ /*new_mmap_size=*/page_size),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(MemoryMappedFileTest, RemapFailureStillValidInstance) {
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ int page_size = MemoryMappedFile::system_page_size();
+ int max_file_size = page_size * 10;
+
+ // 1. Create MemoryMappedFile with pre-mapping offset=0 and
+ // mmap_size=page_size. Also call GrowAndRemapIfNecessary to grow the file
+ // size to page_size.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(*mock_filesystem, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ max_file_size,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/page_size));
+ ICING_ASSERT_OK(
+ mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+ /*new_mmap_size=*/page_size));
+ ASSERT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size));
+ ASSERT_THAT(mmapped_file.region(), NotNull());
+ ASSERT_THAT(mmapped_file.mutable_region(), NotNull());
+ ASSERT_THAT(mmapped_file.file_offset(), Eq(0));
+ ASSERT_THAT(mmapped_file.region_size(), Eq(page_size));
+ ASSERT_THAT(mmapped_file.available_size(), Eq(page_size));
+ mmapped_file.mutable_region()[page_size - 1] = 'a';
+
+ const char* original_region = mmapped_file.region();
+
+ // 2. Call GrowAndRemapIfNecessary with different offset and greater
+ // mmap_size. Here we're testing the case when file growth succeeds but
+ // remap (RemapImpl) fails.
+ // To make RemapImpl fail, mock OpenForWrite to fail. Note that we use
+ // OpenForAppend when growing the file, so it is ok to make OpenForWrite
+ // fail without affecting file growth.
+ ON_CALL(*mock_filesystem, OpenForWrite(_)).WillByDefault(Return(-1));
+ EXPECT_THAT(
+ mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/1,
+ /*new_mmap_size=*/page_size * 2 - 1),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+
+ // 3. Verify the result. The file size should be grown, but since remap fails,
+ // mmap related fields should remain unchanged.
+ EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size * 2));
+ EXPECT_THAT(mmapped_file.region(), Eq(original_region));
+ EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region));
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(0));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(page_size));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(page_size));
+ // We should still be able to get the correct content via region.
+ EXPECT_THAT(mmapped_file.region()[page_size - 1], Eq('a'));
+}
+
+TEST_F(MemoryMappedFileTest, BadFileSizeDuringGrowReturnsError) {
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ int page_size = MemoryMappedFile::system_page_size();
+ int max_file_size = page_size * 10;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(*mock_filesystem, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ max_file_size,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/page_size));
+ ICING_ASSERT_OK(
+ mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+ /*new_mmap_size=*/page_size));
+ ASSERT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size));
+ ASSERT_THAT(mmapped_file.region(), NotNull());
+ ASSERT_THAT(mmapped_file.mutable_region(), NotNull());
+ ASSERT_THAT(mmapped_file.file_offset(), Eq(0));
+ ASSERT_THAT(mmapped_file.region_size(), Eq(page_size));
+ ASSERT_THAT(mmapped_file.available_size(), Eq(page_size));
+ mmapped_file.mutable_region()[page_size - 1] = 'a';
+
+ const char* original_region = mmapped_file.region();
+
+ // Calling GrowAndRemapIfNecessary with larger size will cause file growth.
+ // During file growth, we will attempt to sync the underlying file size via
+ // GetFileSize to see if growing is actually necessary. Mock GetFileSize to
+ // return an error.
+ ON_CALL(*mock_filesystem, GetFileSize(A<const char*>()))
+ .WillByDefault(Return(Filesystem::kBadFileSize));
+
+ // We should fail gracefully and return an INTERNAL error to indicate that
+ // there was an issue retrieving the file size. The underlying file size and
+ // mmap info should remain unchanged.
+ EXPECT_THAT(
+ mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+ /*new_mmap_size=*/page_size * 2),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size));
+ EXPECT_THAT(mmapped_file.region(), Eq(original_region));
+ EXPECT_THAT(mmapped_file.mutable_region(), Eq(original_region));
+ EXPECT_THAT(mmapped_file.file_offset(), Eq(0));
+ EXPECT_THAT(mmapped_file.region_size(), Eq(page_size));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(page_size));
+ // We should still be able to get the correct content via region.
+ EXPECT_THAT(mmapped_file.region()[page_size - 1], Eq('a'));
+}
+
+TEST_F(MemoryMappedFileTest, WriteSucceedsPartiallyAndFailsDuringGrow) {
+ auto mock_filesystem = std::make_unique<MockFilesystem>();
+ int page_size = MemoryMappedFile::system_page_size();
+ int max_file_size = page_size * 10;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(*mock_filesystem, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC,
+ max_file_size,
+ /*pre_mapping_file_offset=*/0,
+ /*pre_mapping_mmap_size=*/max_file_size));
+
+ // 1. Initially the underlying file size is 0. When calling
+ // GrowAndRemapIfNecessary first time with new_mmap_size = page_size * 2,
+ // Write() should be called 2 times and each time should grow the
+ // underlying file by page_size bytes.
+ // Mock the 2nd Write() to write partially (1 byte) and fail, so the file
+ // will only be grown by page_size + 1 bytes in total.
+ auto open_lambda = [this](int fd, const void* data,
+ size_t data_size) -> bool {
+ EXPECT_THAT(data_size, Gt(1));
+ EXPECT_THAT(this->filesystem_.Write(fd, data, 1), Eq(1));
+ return false;
+ };
+ EXPECT_CALL(*mock_filesystem, Write(A<int>(), A<const void*>(), A<size_t>()))
+ .WillOnce(DoDefault())
+ .WillOnce(open_lambda);
+
+ // 2. Call GrowAndRemapIfNecessary and expect to fail. The actual file size
+ // should be page_size + 1, but the (cached) file_size_ should be page_size
+ // since it fails to update that partially written byte of the 2nd Write().
+ EXPECT_THAT(
+ mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+ /*new_mmap_size=*/page_size * 2),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size + 1));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(page_size));
+
+ // 3. Call GrowAndRemapIfNecessary again with new_mmap_size = page_size + 1.
+ // Even though file_size_ only caches page_size and excludes the partially
+ // written byte(s) due to failure of the previous round of grow, the next
+ // round should sync the actual file size to file_size_ via system call and
+ // skip Write() since the actual file size is large enough for the new
+ // mmap_size.
+ // Note: WillOnce() above will ensure that Write() won't be called for
+ // another time.
+ ICING_EXPECT_OK(
+ mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+ /*new_mmap_size=*/page_size + 1));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(page_size + 1));
+
+ // 4. Call GrowAndRemapIfNecessary again with new_mmap_size = page_size * 2.
+ // Even though the current file size is page_size + 1, the next round of
+ // grow should automatically calibrate the file size back to a multiple of
+ // page_size instead of just simply appending page_size bytes to the file.
+ EXPECT_CALL(*mock_filesystem, Write(A<int>(), A<const void*>(), A<size_t>()))
+ .WillOnce(DoDefault());
+ ICING_EXPECT_OK(
+ mmapped_file.GrowAndRemapIfNecessary(/*new_file_offset=*/0,
+ /*new_mmap_size=*/page_size * 2));
+ EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()), Eq(page_size * 2));
+ EXPECT_THAT(mmapped_file.available_size(), Eq(page_size * 2));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/file/persistent-hash-map.cc b/icing/file/persistent-hash-map.cc
index 8101c89..0c9fd7f 100644
--- a/icing/file/persistent-hash-map.cc
+++ b/icing/file/persistent-hash-map.cc
@@ -450,10 +450,11 @@ PersistentHashMap::InitializeNewFiles(const Filesystem& filesystem,
&new_crcs, &new_info));
// Mmap the content of the crcs and info.
- auto metadata_mmapped_file = std::make_unique<MemoryMappedFile>(
- filesystem, metadata_file_path,
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
- ICING_RETURN_IF_ERROR(metadata_mmapped_file->Remap(
+ ICING_ASSIGN_OR_RETURN(MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(
+ filesystem, metadata_file_path,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file.Remap(
/*file_offset=*/0, /*mmap_size=*/sizeof(Crcs) + sizeof(Info)));
return std::unique_ptr<PersistentHashMap>(new PersistentHashMap(
@@ -468,10 +469,12 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
int32_t value_type_size,
int32_t max_load_factor_percent) {
// Mmap the content of the crcs and info.
- auto metadata_mmapped_file = std::make_unique<MemoryMappedFile>(
- filesystem, GetMetadataFilePath(base_dir, kSubDirectory),
- MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
- ICING_RETURN_IF_ERROR(metadata_mmapped_file->Remap(
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile metadata_mmapped_file,
+ MemoryMappedFile::Create(
+ filesystem, GetMetadataFilePath(base_dir, kSubDirectory),
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file.Remap(
/*file_offset=*/0, /*mmap_size=*/sizeof(Crcs) + sizeof(Info)));
// Initialize 3 storages
@@ -491,9 +494,9 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
Crcs* crcs_ptr = reinterpret_cast<Crcs*>(
- metadata_mmapped_file->mutable_region() + Crcs::kFileOffset);
+ metadata_mmapped_file.mutable_region() + Crcs::kFileOffset);
Info* info_ptr = reinterpret_cast<Info*>(
- metadata_mmapped_file->mutable_region() + Info::kFileOffset);
+ metadata_mmapped_file.mutable_region() + Info::kFileOffset);
// Value type size should be consistent.
if (value_type_size != info_ptr->value_type_size) {
@@ -514,7 +517,7 @@ PersistentHashMap::InitializeExistingFiles(const Filesystem& filesystem,
info_ptr->max_load_factor_percent = max_load_factor_percent;
crcs_ptr->component_crcs.info_crc = info_ptr->ComputeChecksum().Get();
crcs_ptr->all_crc = crcs_ptr->component_crcs.ComputeChecksum().Get();
- ICING_RETURN_IF_ERROR(metadata_mmapped_file->PersistToDisk());
+ ICING_RETURN_IF_ERROR(metadata_mmapped_file.PersistToDisk());
}
auto persistent_hash_map =
diff --git a/icing/file/persistent-hash-map.h b/icing/file/persistent-hash-map.h
index 9a9ca8d..ef3995c 100644
--- a/icing/file/persistent-hash-map.h
+++ b/icing/file/persistent-hash-map.h
@@ -373,13 +373,14 @@ class PersistentHashMap {
explicit PersistentHashMap(
const Filesystem& filesystem, std::string_view base_dir,
- std::unique_ptr<MemoryMappedFile> metadata_mmapped_file,
+ MemoryMappedFile&& metadata_mmapped_file,
std::unique_ptr<FileBackedVector<Bucket>> bucket_storage,
std::unique_ptr<FileBackedVector<Entry>> entry_storage,
std::unique_ptr<FileBackedVector<char>> kv_storage)
: filesystem_(&filesystem),
base_dir_(base_dir),
- metadata_mmapped_file_(std::move(metadata_mmapped_file)),
+ metadata_mmapped_file_(std::make_unique<MemoryMappedFile>(
+ std::move(metadata_mmapped_file))),
bucket_storage_(std::move(bucket_storage)),
entry_storage_(std::move(entry_storage)),
kv_storage_(std::move(kv_storage)) {}
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
index 409ab96..e48e6e0 100644
--- a/icing/file/portable-file-backed-proto-log.h
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -799,8 +799,10 @@ libtextclassifier3::StatusOr<Crc32>
PortableFileBackedProtoLog<ProtoT>::ComputeChecksum(
const Filesystem* filesystem, const std::string& file_path,
Crc32 initial_crc, int64_t start, int64_t end) {
- auto mmapped_file = MemoryMappedFile(*filesystem, file_path,
- MemoryMappedFile::Strategy::READ_ONLY);
+ ICING_ASSIGN_OR_RETURN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(*filesystem, file_path,
+ MemoryMappedFile::Strategy::READ_ONLY));
Crc32 new_crc(initial_crc.Get());
if (start < 0) {
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
index 795271a..af09d18 100644
--- a/icing/file/portable-file-backed-proto-log_test.cc
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -927,8 +927,10 @@ TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) {
// Checks if the erased area is set to 0.
int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
- MemoryMappedFile mmapped_file(filesystem_, file_path_,
- MemoryMappedFile::Strategy::READ_ONLY);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ MemoryMappedFile mmapped_file,
+ MemoryMappedFile::Create(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_ONLY));
// document1_offset + sizeof(int) is the start byte of the proto where
// sizeof(int) is the size of the proto metadata.