diff options
author | Terry Wang <tytytyww@google.com> | 2020-09-24 13:39:23 -0700 |
---|---|---|
committer | Terry Wang <tytytyww@google.com> | 2020-09-24 13:39:23 -0700 |
commit | e15b6b66f871a71b73278c34d5c54f648f880c29 (patch) | |
tree | 61e187172a8802fae8e39f04ce69d3ae5c939f2e /icing/file | |
parent | 9f1b9cf4dc93fa7bfee0a3637c93dc5b557aab30 (diff) | |
download | icing-e15b6b66f871a71b73278c34d5c54f648f880c29.tar.gz |
Pull upstream changes.
Change-Id: I44831fdadcdb67f2e19570a35cb4c76faf8397f9
Diffstat (limited to 'icing/file')
-rw-r--r-- | icing/file/file-backed-proto-log.h | 98 | ||||
-rw-r--r-- | icing/file/file-backed-proto-log_test.cc | 185 | ||||
-rw-r--r-- | icing/file/file-backed-vector.h | 2 |
3 files changed, 267 insertions, 18 deletions
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h index 62943b8..95511ac 100644 --- a/icing/file/file-backed-proto-log.h +++ b/icing/file/file-backed-proto-log.h @@ -78,6 +78,23 @@ namespace icing { namespace lib { +namespace { + +bool IsEmptyBuffer(const char* buffer, int size) { + return std::all_of(buffer, buffer + size, + [](const char byte) { return byte == 0; }); +} + +// Helper function to get stored proto size from the metadata. +// Metadata format: 8 bits magic + 24 bits size +int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; } + +// Helper function to get stored proto magic from the metadata. +// Metadata format: 8 bits magic + 24 bits size +uint8_t GetProtoMagic(int metadata) { return metadata >> 24; } + +} // namespace + template <typename ProtoT> class FileBackedProtoLog { public: @@ -206,10 +223,19 @@ class FileBackedProtoLog { // // Returns: // A proto on success + // NOT_FOUND if the proto at the given offset has been erased // OUT_OF_RANGE_ERROR if file_offset exceeds file size // INTERNAL_ERROR on IO error libtextclassifier3::StatusOr<ProtoT> ReadProto(int64_t file_offset) const; + // Erases the data of a proto located at file_offset from the file. + // + // Returns: + // OK on success + // OUT_OF_RANGE_ERROR if file_offset exceeds file size + // INTERNAL_ERROR on IO error + libtextclassifier3::Status EraseProto(int64_t file_offset); + // Calculates and returns the disk usage in bytes. Rounds up to the nearest // block size. // @@ -239,7 +265,7 @@ class FileBackedProtoLog { Iterator(const Filesystem& filesystem, const std::string& file_path, int64_t initial_offset); - // Advances to the position of next proto. + // Advances to the position of next proto whether it has been erased or not. // // Returns: // OK on success @@ -716,10 +742,15 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto( int metadata, ReadProtoMetadata(&mmapped_file, file_offset, file_size)); // Copy out however many bytes it says the proto is - int stored_size = metadata & 0x00FFFFFF; + int stored_size = GetProtoSize(metadata); ICING_RETURN_IF_ERROR( mmapped_file.Remap(file_offset + sizeof(metadata), stored_size)); + + if (IsEmptyBuffer(mmapped_file.region(), mmapped_file.region_size())) { + return absl_ports::NotFoundError("The proto data has been erased."); + } + google::protobuf::io::ArrayInputStream proto_stream( mmapped_file.mutable_region(), stored_size); @@ -736,6 +767,62 @@ libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto( } template <typename ProtoT> +libtextclassifier3::Status FileBackedProtoLog<ProtoT>::EraseProto( + int64_t file_offset) { + int64_t file_size = filesystem_->GetFileSize(fd_.get()); + if (file_offset >= file_size) { + // file_size points to the next byte to write at, so subtract one to get the + // inclusive, actual size of file. + return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf( + "Trying to erase data at a location, %lld, " + "out of range of the file size, %lld", + static_cast<long long>(file_offset), + static_cast<long long>(file_size - 1))); + } + + MemoryMappedFile mmapped_file( + *filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC); + + // Read out the metadata + ICING_ASSIGN_OR_RETURN( + int metadata, ReadProtoMetadata(&mmapped_file, file_offset, file_size)); + + ICING_RETURN_IF_ERROR(mmapped_file.Remap(file_offset + sizeof(metadata), + GetProtoSize(metadata))); + + // We need to update the crc checksum if the erased area is before the rewind + // position. + if (file_offset + sizeof(metadata) < header_->rewind_offset) { + // We need to calculate [original string xor 0s]. + // The xored string is the same as the original string because 0 xor 0 = 0, + // 1 xor 0 = 1. + const std::string_view xored_str(mmapped_file.region(), + mmapped_file.region_size()); + + Crc32 crc(header_->log_checksum); + ICING_ASSIGN_OR_RETURN( + uint32_t new_crc, + crc.UpdateWithXor( + xored_str, + /*full_data_size=*/header_->rewind_offset - sizeof(Header), + /*position=*/file_offset + sizeof(metadata) - sizeof(Header))); + + header_->log_checksum = new_crc; + header_->header_checksum = header_->CalculateHeaderChecksum(); + + if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(), + sizeof(Header))) { + return absl_ports::InternalError( + absl_ports::StrCat("Failed to update header to: ", file_path_)); + } + } + + memset(mmapped_file.mutable_region(), '\0', mmapped_file.region_size()); + return libtextclassifier3::Status::OK; +} + +template <typename ProtoT> libtextclassifier3::StatusOr<int64_t> FileBackedProtoLog<ProtoT>::GetDiskUsage() const { int64_t size = filesystem_->GetDiskUsage(file_path_.c_str()); @@ -781,8 +868,7 @@ libtextclassifier3::Status FileBackedProtoLog<ProtoT>::Iterator::Advance() { ICING_ASSIGN_OR_RETURN( int metadata, ReadProtoMetadata(&mmapped_file_, current_offset_, file_size_)); - int proto_size = metadata & 0x00FFFFFF; - current_offset_ += sizeof(metadata) + proto_size; + current_offset_ += sizeof(metadata) + GetProtoSize(metadata); } if (current_offset_ < file_size_) { @@ -829,7 +915,7 @@ libtextclassifier3::StatusOr<int> FileBackedProtoLog<ProtoT>::ReadProtoMetadata( ICING_RETURN_IF_ERROR(mmapped_file->Remap(file_offset, metadata_size)); memcpy(&metadata, mmapped_file->region(), metadata_size); // Checks magic number - uint8_t stored_k_proto_magic = metadata >> 24; + uint8_t stored_k_proto_magic = GetProtoMagic(metadata); if (stored_k_proto_magic != kProtoMagic) { return absl_ports::InternalError(IcingStringUtil::StringPrintf( "Failed to read kProtoMagic, expected %d, actual %d", kProtoMagic, @@ -842,7 +928,7 @@ template <typename ProtoT> libtextclassifier3::Status FileBackedProtoLog<ProtoT>::PersistToDisk() { int64_t file_size = filesystem_->GetFileSize(file_path_.c_str()); if (file_size == header_->rewind_offset) { - // No changes made, don't need to update the checksum. + // No new protos appended, don't need to update the checksum. return libtextclassifier3::Status::OK; } diff --git a/icing/file/file-backed-proto-log_test.cc b/icing/file/file-backed-proto-log_test.cc index 3a9060d..fad5248 100644 --- a/icing/file/file-backed-proto-log_test.cc +++ b/icing/file/file-backed-proto-log_test.cc @@ -48,7 +48,10 @@ class FileBackedProtoLogTest : public ::testing::Test { // https://stackoverflow.com/a/47368753 FileBackedProtoLogTest() {} - void SetUp() override { file_path_ = GetTestTempDir() + "/proto_log"; } + void SetUp() override { + file_path_ = GetTestTempDir() + "/proto_log"; + filesystem_.DeleteFile(file_path_.c_str()); + } void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); } @@ -93,7 +96,7 @@ TEST_F(FileBackedProtoLogTest, WriteProtoTooLarge) { FileBackedProtoLog<DocumentProto>::Options(compress_, max_proto_size))); auto proto_log = std::move(create_result.proto_log); - EXPECT_FALSE(create_result.data_loss); + ASSERT_FALSE(create_result.data_loss); DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); @@ -110,7 +113,7 @@ TEST_F(FileBackedProtoLogTest, ReadProtoWrongKProtoMagic) { FileBackedProtoLog<DocumentProto>::Options(compress_, max_proto_size_))); auto proto_log = std::move(create_result.proto_log); - EXPECT_FALSE(create_result.data_loss); + ASSERT_FALSE(create_result.data_loss); // Write a proto DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build(); @@ -144,7 +147,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteUncompressedProto) { FileBackedProtoLog<DocumentProto>::Options( /*compress_in=*/false, max_proto_size_))); auto proto_log = std::move(create_result.proto_log); - EXPECT_FALSE(create_result.data_loss); + ASSERT_FALSE(create_result.data_loss); // Write the first proto DocumentProto document1 = @@ -191,7 +194,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteUncompressedProto) { FileBackedProtoLog<DocumentProto>::Options( /*compress_in=*/false, max_proto_size_))); auto recreated_proto_log = std::move(create_result.proto_log); - EXPECT_FALSE(create_result.data_loss); + ASSERT_FALSE(create_result.data_loss); // Write a third proto DocumentProto document3 = @@ -213,7 +216,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteCompressedProto) { FileBackedProtoLog<DocumentProto>::Options( /*compress_in=*/true, max_proto_size_))); auto proto_log = std::move(create_result.proto_log); - EXPECT_FALSE(create_result.data_loss); + ASSERT_FALSE(create_result.data_loss); // Write the first proto DocumentProto document1 = @@ -260,7 +263,7 @@ TEST_F(FileBackedProtoLogTest, ReadWriteCompressedProto) { FileBackedProtoLog<DocumentProto>::Options( /*compress_in=*/true, max_proto_size_))); auto recreated_proto_log = std::move(create_result.proto_log); - EXPECT_FALSE(create_result.data_loss); + ASSERT_FALSE(create_result.data_loss); // Write a third proto DocumentProto document3 = @@ -360,7 +363,7 @@ TEST_F(FileBackedProtoLogTest, PersistToDisk) { FileBackedProtoLog<DocumentProto>::Options(compress_, max_proto_size_))); auto proto_log = std::move(create_result.proto_log); - EXPECT_FALSE(create_result.data_loss); + ASSERT_FALSE(create_result.data_loss); // Write and persist the first proto ICING_ASSERT_OK_AND_ASSIGN(document1_offset, @@ -430,7 +433,7 @@ TEST_F(FileBackedProtoLogTest, Iterator) { FileBackedProtoLog<DocumentProto>::Options(compress_, max_proto_size_))); auto proto_log = std::move(create_result.proto_log); - EXPECT_FALSE(create_result.data_loss); + ASSERT_FALSE(create_result.data_loss); { // Empty iterator @@ -481,7 +484,7 @@ TEST_F(FileBackedProtoLogTest, ComputeChecksum) { FileBackedProtoLog<DocumentProto>::Options(compress_, max_proto_size_))); auto proto_log = std::move(create_result.proto_log); - EXPECT_FALSE(create_result.data_loss); + ASSERT_FALSE(create_result.data_loss); ICING_EXPECT_OK(proto_log->WriteProto(document)); @@ -499,7 +502,7 @@ TEST_F(FileBackedProtoLogTest, ComputeChecksum) { FileBackedProtoLog<DocumentProto>::Options(compress_, max_proto_size_))); auto proto_log = std::move(create_result.proto_log); - EXPECT_FALSE(create_result.data_loss); + ASSERT_FALSE(create_result.data_loss); // Checksum should be consistent across instances EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum))); @@ -514,6 +517,166 @@ TEST_F(FileBackedProtoLogTest, ComputeChecksum) { } } +TEST_F(FileBackedProtoLogTest, EraseProtoShouldSetZero) { + DocumentProto document1 = + DocumentBuilder().SetKey("namespace", "uri1").Build(); + + ICING_ASSERT_OK_AND_ASSIGN( + FileBackedProtoLog<DocumentProto>::CreateResult create_result, + FileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + FileBackedProtoLog<DocumentProto>::Options(compress_, + max_proto_size_))); + auto proto_log = std::move(create_result.proto_log); + ASSERT_FALSE(create_result.data_loss); + + // Writes and erases proto + ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset, + proto_log->WriteProto(document1)); + ICING_ASSERT_OK(proto_log->EraseProto(document1_offset)); + + // Checks if the erased area is set to 0. + int64_t file_size = filesystem_.GetFileSize(file_path_.c_str()); + MemoryMappedFile mmapped_file(filesystem_, file_path_, + MemoryMappedFile::Strategy::READ_ONLY); + + // document1_offset + sizeof(int) is the start byte of the proto where + // sizeof(int) is the size of the proto metadata. + mmapped_file.Remap(document1_offset + sizeof(int), file_size - 1); + for (size_t i = 0; i < mmapped_file.region_size(); ++i) { + ASSERT_THAT(mmapped_file.region()[i], Eq(0)); + } +} + +TEST_F(FileBackedProtoLogTest, EraseProtoShouldReturnNotFound) { + DocumentProto document1 = + DocumentBuilder().SetKey("namespace", "uri1").Build(); + DocumentProto document2 = + DocumentBuilder().SetKey("namespace", "uri2").Build(); + + ICING_ASSERT_OK_AND_ASSIGN( + FileBackedProtoLog<DocumentProto>::CreateResult create_result, + FileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + FileBackedProtoLog<DocumentProto>::Options(compress_, + max_proto_size_))); + auto proto_log = std::move(create_result.proto_log); + ASSERT_FALSE(create_result.data_loss); + + // Writes 2 protos + ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset, + proto_log->WriteProto(document1)); + ICING_ASSERT_OK_AND_ASSIGN(int64_t document2_offset, + proto_log->WriteProto(document2)); + + // Erases the first proto + ICING_ASSERT_OK(proto_log->EraseProto(document1_offset)); + + // The first proto has been erased. + ASSERT_THAT(proto_log->ReadProto(document1_offset), + StatusIs(libtextclassifier3::StatusCode::NOT_FOUND)); + // The second proto should be returned. + ASSERT_THAT(proto_log->ReadProto(document2_offset), + IsOkAndHolds(EqualsProto(document2))); +} + +TEST_F(FileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) { + DocumentProto document1 = + DocumentBuilder().SetKey("namespace", "uri1").Build(); + DocumentProto document2 = + DocumentBuilder().SetKey("namespace", "uri2").Build(); + DocumentProto document3 = + DocumentBuilder().SetKey("namespace", "uri3").Build(); + DocumentProto document4 = + DocumentBuilder().SetKey("namespace", "uri4").Build(); + + int64_t document2_offset; + int64_t document3_offset; + + { + // Erase data after the rewind position. This won't update the checksum + // immediately. + ICING_ASSERT_OK_AND_ASSIGN( + FileBackedProtoLog<DocumentProto>::CreateResult create_result, + FileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + FileBackedProtoLog<DocumentProto>::Options(compress_, + max_proto_size_))); + auto proto_log = std::move(create_result.proto_log); + ASSERT_FALSE(create_result.data_loss); + + // Writes 3 protos + ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset, + proto_log->WriteProto(document1)); + ICING_ASSERT_OK_AND_ASSIGN(document2_offset, + proto_log->WriteProto(document2)); + ICING_ASSERT_OK_AND_ASSIGN(document3_offset, + proto_log->WriteProto(document3)); + + // Erases the 1st proto, checksum won't be updated immediately because the + // rewind position is 0. + ICING_ASSERT_OK(proto_log->EraseProto(document1_offset)); + + EXPECT_THAT(proto_log->ComputeChecksum(), + IsOkAndHolds(Eq(Crc32(2293202502)))); + } // New checksum is updated in destructor. + + { + // Erase data before the rewind position. This will update the checksum + // immediately. + ICING_ASSERT_OK_AND_ASSIGN( + FileBackedProtoLog<DocumentProto>::CreateResult create_result, + FileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + FileBackedProtoLog<DocumentProto>::Options(compress_, + max_proto_size_))); + auto proto_log = std::move(create_result.proto_log); + ASSERT_FALSE(create_result.data_loss); + + // Erases the 2nd proto that is now before the rewind position. Checksum is + // updated. + ICING_ASSERT_OK(proto_log->EraseProto(document2_offset)); + + EXPECT_THAT(proto_log->ComputeChecksum(), + IsOkAndHolds(Eq(Crc32(639634028)))); + } + + { + // Append data and erase data before the rewind position. This will update + // the checksum twice: in EraseProto() and destructor. + ICING_ASSERT_OK_AND_ASSIGN( + FileBackedProtoLog<DocumentProto>::CreateResult create_result, + FileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + FileBackedProtoLog<DocumentProto>::Options(compress_, + max_proto_size_))); + auto proto_log = std::move(create_result.proto_log); + ASSERT_FALSE(create_result.data_loss); + + // Append a new document which is after the rewind position. + ICING_ASSERT_OK(proto_log->WriteProto(document4)); + + // Erases the 3rd proto that is now before the rewind position. Checksum is + // updated. + ICING_ASSERT_OK(proto_log->EraseProto(document3_offset)); + + EXPECT_THAT(proto_log->ComputeChecksum(), + IsOkAndHolds(Eq(Crc32(1990198693)))); + } // Checksum is updated with the newly appended document. + + { + // A successful creation means that the checksum matches. + ICING_ASSERT_OK_AND_ASSIGN( + FileBackedProtoLog<DocumentProto>::CreateResult create_result, + FileBackedProtoLog<DocumentProto>::Create( + &filesystem_, file_path_, + FileBackedProtoLog<DocumentProto>::Options(compress_, + max_proto_size_))); + auto proto_log = std::move(create_result.proto_log); + EXPECT_FALSE(create_result.data_loss); + } +} + } // namespace } // namespace lib } // namespace icing diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h index e4ec0cd..eb89db8 100644 --- a/icing/file/file-backed-vector.h +++ b/icing/file/file-backed-vector.h @@ -187,7 +187,7 @@ class FileBackedVector { // // Returns: // OUT_OF_RANGE_ERROR if len < 0 or >= num_elements() - libtextclassifier3::Status TruncateTo(int32_t len); + libtextclassifier3::Status TruncateTo(int32_t new_num_elements); // Flushes content to underlying file. // |