aboutsummaryrefslogtreecommitdiff
path: root/icing/file
diff options
context:
space:
mode:
authorTerry Wang <tytytyww@google.com>2020-09-24 13:39:23 -0700
committerTerry Wang <tytytyww@google.com>2020-09-24 13:39:23 -0700
commite15b6b66f871a71b73278c34d5c54f648f880c29 (patch)
tree61e187172a8802fae8e39f04ce69d3ae5c939f2e /icing/file
parent9f1b9cf4dc93fa7bfee0a3637c93dc5b557aab30 (diff)
downloadicing-e15b6b66f871a71b73278c34d5c54f648f880c29.tar.gz
Pull upstream changes.
Change-Id: I44831fdadcdb67f2e19570a35cb4c76faf8397f9
Diffstat (limited to 'icing/file')
-rw-r--r--icing/file/file-backed-proto-log.h98
-rw-r--r--icing/file/file-backed-proto-log_test.cc185
-rw-r--r--icing/file/file-backed-vector.h2
3 files changed, 267 insertions, 18 deletions
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 62943b8..95511ac 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -78,6 +78,23 @@
namespace icing {
namespace lib {
+namespace {
+
+bool IsEmptyBuffer(const char* buffer, int size) {
+ return std::all_of(buffer, buffer + size,
+ [](const char byte) { return byte == 0; });
+}
+
+// Helper function to get stored proto size from the metadata.
+// Metadata format: 8 bits magic + 24 bits size
+int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+// Helper function to get stored proto magic from the metadata.
+// Metadata format: 8 bits magic + 24 bits size
+uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
+} // namespace
+
template <typename ProtoT>
class FileBackedProtoLog {
public:
@@ -206,10 +223,19 @@ class FileBackedProtoLog {
//
// Returns:
// A proto on success
+ // NOT_FOUND if the proto at the given offset has been erased
// OUT_OF_RANGE_ERROR if file_offset exceeds file size
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<ProtoT> ReadProto(int64_t file_offset) const;
+ // Erases the data of a proto located at file_offset from the file.
+ //
+ // Returns:
+ // OK on success
+ // OUT_OF_RANGE_ERROR if file_offset exceeds file size
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::Status EraseProto(int64_t file_offset);
+
// Calculates and returns the disk usage in bytes. Rounds up to the nearest
// block size.
//
@@ -239,7 +265,7 @@ class FileBackedProtoLog {
Iterator(const Filesystem& filesystem, const std::string& file_path,
int64_t initial_offset);
- // Advances to the position of next proto.
+ // Advances to the position of next proto whether it has been erased or not.
//
// Returns:
// OK on success
@@ -716,10 +742,15 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
int metadata, ReadProtoMetadata(&mmapped_file, file_offset, file_size));
// Copy out however many bytes it says the proto is
- int stored_size = metadata & 0x00FFFFFF;
+ int stored_size = GetProtoSize(metadata);
ICING_RETURN_IF_ERROR(
mmapped_file.Remap(file_offset + sizeof(metadata), stored_size));
+
+ if (IsEmptyBuffer(mmapped_file.region(), mmapped_file.region_size())) {
+ return absl_ports::NotFoundError("The proto data has been erased.");
+ }
+
google::protobuf::io::ArrayInputStream proto_stream(
mmapped_file.mutable_region(), stored_size);
@@ -736,6 +767,62 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
}
template <typename ProtoT>
+libtextclassifier3::Status FileBackedProtoLog<ProtoT>::EraseProto(
+ int64_t file_offset) {
+ int64_t file_size = filesystem_->GetFileSize(fd_.get());
+ if (file_offset >= file_size) {
+ // file_size points to the next byte to write at, so subtract one to get the
+ // inclusive, actual size of file.
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "Trying to erase data at a location, %lld, "
+ "out of range of the file size, %lld",
+ static_cast<long long>(file_offset),
+ static_cast<long long>(file_size - 1)));
+ }
+
+ MemoryMappedFile mmapped_file(
+ *filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
+
+ // Read out the metadata
+ ICING_ASSIGN_OR_RETURN(
+ int metadata, ReadProtoMetadata(&mmapped_file, file_offset, file_size));
+
+ ICING_RETURN_IF_ERROR(mmapped_file.Remap(file_offset + sizeof(metadata),
+ GetProtoSize(metadata)));
+
+ // We need to update the crc checksum if the erased area is before the rewind
+ // position.
+ if (file_offset + sizeof(metadata) < header_->rewind_offset) {
+ // We need to calculate [original string xor 0s].
+ // The xored string is the same as the original string because 0 xor 0 = 0,
+ // 1 xor 0 = 1.
+ const std::string_view xored_str(mmapped_file.region(),
+ mmapped_file.region_size());
+
+ Crc32 crc(header_->log_checksum);
+ ICING_ASSIGN_OR_RETURN(
+ uint32_t new_crc,
+ crc.UpdateWithXor(
+ xored_str,
+ /*full_data_size=*/header_->rewind_offset - sizeof(Header),
+ /*position=*/file_offset + sizeof(metadata) - sizeof(Header)));
+
+ header_->log_checksum = new_crc;
+ header_->header_checksum = header_->CalculateHeaderChecksum();
+
+ if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+ sizeof(Header))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to update header to: ", file_path_));
+ }
+ }
+
+ memset(mmapped_file.mutable_region(), '\0', mmapped_file.region_size());
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
libtextclassifier3::StatusOr<int64_t> FileBackedProtoLog<ProtoT>::GetDiskUsage()
const {
int64_t size = filesystem_->GetDiskUsage(file_path_.c_str());
@@ -781,8 +868,7 @@ libtextclassifier3::Status FileBackedProtoLog<ProtoT>::Iterator::Advance() {
ICING_ASSIGN_OR_RETURN(
int metadata,
ReadProtoMetadata(&mmapped_file_, current_offset_, file_size_));
- int proto_size = metadata & 0x00FFFFFF;
- current_offset_ += sizeof(metadata) + proto_size;
+ current_offset_ += sizeof(metadata) + GetProtoSize(metadata);
}
if (current_offset_ < file_size_) {
@@ -829,7 +915,7 @@ libtextclassifier3::StatusOr<int> FileBackedProtoLog<ProtoT>::ReadProtoMetadata(
ICING_RETURN_IF_ERROR(mmapped_file->Remap(file_offset, metadata_size));
memcpy(&metadata, mmapped_file->region(), metadata_size);
// Checks magic number
- uint8_t stored_k_proto_magic = metadata >> 24;
+ uint8_t stored_k_proto_magic = GetProtoMagic(metadata);
if (stored_k_proto_magic != kProtoMagic) {
return absl_ports::InternalError(IcingStringUtil::StringPrintf(
"Failed to read kProtoMagic, expected %d, actual %d", kProtoMagic,
@@ -842,7 +928,7 @@ template <typename ProtoT>
libtextclassifier3::Status FileBackedProtoLog<ProtoT>::PersistToDisk() {
int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
if (file_size == header_->rewind_offset) {
- // No changes made, don't need to update the checksum.
+ // No new protos appended, don't need to update the checksum.
return libtextclassifier3::Status::OK;
}
diff --git a/icing/file/file-backed-proto-log_test.cc b/icing/file/file-backed-proto-log_test.cc
index 3a9060d..fad5248 100644
--- a/icing/file/file-backed-proto-log_test.cc
+++ b/icing/file/file-backed-proto-log_test.cc
@@ -48,7 +48,10 @@ class FileBackedProtoLogTest : public ::testing::Test {
// https://stackoverflow.com/a/47368753
FileBackedProtoLogTest() {}
- void SetUp() override { file_path_ = GetTestTempDir() + "/proto_log"; }
+ void SetUp() override {
+ file_path_ = GetTestTempDir() + "/proto_log";
+ filesystem_.DeleteFile(file_path_.c_str());
+ }
void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
@@ -93,7 +96,7 @@ TEST_F(FileBackedProtoLogTest, WriteProtoTooLarge) {
FileBackedProtoLog<DocumentProto>::Options(compress_,
max_proto_size)));
auto proto_log = std::move(create_result.proto_log);
- EXPECT_FALSE(create_result.data_loss);
+ ASSERT_FALSE(create_result.data_loss);
DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
@@ -110,7 +113,7 @@ TEST_F(FileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
FileBackedProtoLog<DocumentProto>::Options(compress_,
max_proto_size_)));
auto proto_log = std::move(create_result.proto_log);
- EXPECT_FALSE(create_result.data_loss);
+ ASSERT_FALSE(create_result.data_loss);
// Write a proto
DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
@@ -144,7 +147,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteUncompressedProto) {
FileBackedProtoLog<DocumentProto>::Options(
/*compress_in=*/false, max_proto_size_)));
auto proto_log = std::move(create_result.proto_log);
- EXPECT_FALSE(create_result.data_loss);
+ ASSERT_FALSE(create_result.data_loss);
// Write the first proto
DocumentProto document1 =
@@ -191,7 +194,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteUncompressedProto) {
FileBackedProtoLog<DocumentProto>::Options(
/*compress_in=*/false, max_proto_size_)));
auto recreated_proto_log = std::move(create_result.proto_log);
- EXPECT_FALSE(create_result.data_loss);
+ ASSERT_FALSE(create_result.data_loss);
// Write a third proto
DocumentProto document3 =
@@ -213,7 +216,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteCompressedProto) {
FileBackedProtoLog<DocumentProto>::Options(
/*compress_in=*/true, max_proto_size_)));
auto proto_log = std::move(create_result.proto_log);
- EXPECT_FALSE(create_result.data_loss);
+ ASSERT_FALSE(create_result.data_loss);
// Write the first proto
DocumentProto document1 =
@@ -260,7 +263,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteCompressedProto) {
FileBackedProtoLog<DocumentProto>::Options(
/*compress_in=*/true, max_proto_size_)));
auto recreated_proto_log = std::move(create_result.proto_log);
- EXPECT_FALSE(create_result.data_loss);
+ ASSERT_FALSE(create_result.data_loss);
// Write a third proto
DocumentProto document3 =
@@ -360,7 +363,7 @@ TEST_F(FileBackedProtoLogTest, PersistToDisk) {
FileBackedProtoLog<DocumentProto>::Options(compress_,
max_proto_size_)));
auto proto_log = std::move(create_result.proto_log);
- EXPECT_FALSE(create_result.data_loss);
+ ASSERT_FALSE(create_result.data_loss);
// Write and persist the first proto
ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
@@ -430,7 +433,7 @@ TEST_F(FileBackedProtoLogTest, Iterator) {
FileBackedProtoLog<DocumentProto>::Options(compress_,
max_proto_size_)));
auto proto_log = std::move(create_result.proto_log);
- EXPECT_FALSE(create_result.data_loss);
+ ASSERT_FALSE(create_result.data_loss);
{
// Empty iterator
@@ -481,7 +484,7 @@ TEST_F(FileBackedProtoLogTest, ComputeChecksum) {
FileBackedProtoLog<DocumentProto>::Options(compress_,
max_proto_size_)));
auto proto_log = std::move(create_result.proto_log);
- EXPECT_FALSE(create_result.data_loss);
+ ASSERT_FALSE(create_result.data_loss);
ICING_EXPECT_OK(proto_log->WriteProto(document));
@@ -499,7 +502,7 @@ TEST_F(FileBackedProtoLogTest, ComputeChecksum) {
FileBackedProtoLog<DocumentProto>::Options(compress_,
max_proto_size_)));
auto proto_log = std::move(create_result.proto_log);
- EXPECT_FALSE(create_result.data_loss);
+ ASSERT_FALSE(create_result.data_loss);
// Checksum should be consistent across instances
EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
@@ -514,6 +517,166 @@ TEST_F(FileBackedProtoLogTest, ComputeChecksum) {
}
}
+TEST_F(FileBackedProtoLogTest, EraseProtoShouldSetZero) {
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "uri1").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ FileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ FileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.data_loss);
+
+ // Writes and erases proto
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+ proto_log->WriteProto(document1));
+ ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+ // Checks if the erased area is set to 0.
+ int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
+ MemoryMappedFile mmapped_file(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_ONLY);
+
+ // document1_offset + sizeof(int) is the start byte of the proto where
+ // sizeof(int) is the size of the proto metadata.
+ mmapped_file.Remap(document1_offset + sizeof(int), file_size - 1);
+ for (size_t i = 0; i < mmapped_file.region_size(); ++i) {
+ ASSERT_THAT(mmapped_file.region()[i], Eq(0));
+ }
+}
+
+TEST_F(FileBackedProtoLogTest, EraseProtoShouldReturnNotFound) {
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "uri1").Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace", "uri2").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ FileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ FileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.data_loss);
+
+ // Writes 2 protos
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+ proto_log->WriteProto(document1));
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t document2_offset,
+ proto_log->WriteProto(document2));
+
+ // Erases the first proto
+ ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+ // The first proto has been erased.
+ ASSERT_THAT(proto_log->ReadProto(document1_offset),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ // The second proto should be returned.
+ ASSERT_THAT(proto_log->ReadProto(document2_offset),
+ IsOkAndHolds(EqualsProto(document2)));
+}
+
+TEST_F(FileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "uri1").Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace", "uri2").Build();
+ DocumentProto document3 =
+ DocumentBuilder().SetKey("namespace", "uri3").Build();
+ DocumentProto document4 =
+ DocumentBuilder().SetKey("namespace", "uri4").Build();
+
+ int64_t document2_offset;
+ int64_t document3_offset;
+
+ {
+ // Erase data after the rewind position. This won't update the checksum
+ // immediately.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ FileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ FileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.data_loss);
+
+ // Writes 3 protos
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+ proto_log->WriteProto(document1));
+ ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+ proto_log->WriteProto(document2));
+ ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
+ proto_log->WriteProto(document3));
+
+ // Erases the 1st proto, checksum won't be updated immediately because the
+ // rewind position is 0.
+ ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+ EXPECT_THAT(proto_log->ComputeChecksum(),
+ IsOkAndHolds(Eq(Crc32(2293202502))));
+ } // New checksum is updated in destructor.
+
+ {
+ // Erase data before the rewind position. This will update the checksum
+ // immediately.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ FileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ FileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.data_loss);
+
+ // Erases the 2nd proto that is now before the rewind position. Checksum is
+ // updated.
+ ICING_ASSERT_OK(proto_log->EraseProto(document2_offset));
+
+ EXPECT_THAT(proto_log->ComputeChecksum(),
+ IsOkAndHolds(Eq(Crc32(639634028))));
+ }
+
+ {
+ // Append data and erase data before the rewind position. This will update
+ // the checksum twice: in EraseProto() and destructor.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ FileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ FileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.data_loss);
+
+ // Append a new document which is after the rewind position.
+ ICING_ASSERT_OK(proto_log->WriteProto(document4));
+
+ // Erases the 3rd proto that is now before the rewind position. Checksum is
+ // updated.
+ ICING_ASSERT_OK(proto_log->EraseProto(document3_offset));
+
+ EXPECT_THAT(proto_log->ComputeChecksum(),
+ IsOkAndHolds(Eq(Crc32(1990198693))));
+ } // Checksum is updated with the newly appended document.
+
+ {
+ // A successful creation means that the checksum matches.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ FileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ FileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ EXPECT_FALSE(create_result.data_loss);
+ }
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index e4ec0cd..eb89db8 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -187,7 +187,7 @@ class FileBackedVector {
//
// Returns:
// OUT_OF_RANGE_ERROR if len < 0 or >= num_elements()
- libtextclassifier3::Status TruncateTo(int32_t len);
+ libtextclassifier3::Status TruncateTo(int32_t new_num_elements);
// Flushes content to underlying file.
//