aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCassie Wang <cassiewang@google.com>2021-07-14 18:21:35 -0700
committerCassie Wang <cassiewang@google.com>2021-07-14 18:21:35 -0700
commit1b85c23555eab498907ef7527575c4fb1adb4f2b (patch)
tree99284cd6ba12f4f3e3d6774502ea77626a2cad68
parentae70ff235f241a6c2beb469029ac29605f6bd51d (diff)
downloadicing-1b85c23555eab498907ef7527575c4fb1adb4f2b.tar.gz
Sync from upstream.
Descriptions: ============ Lower the memory usage cap from 16MB to 4MB. ============ Modify file-backed-vector to 1) include a check that the size of the file is greater than or equal to the number of elements * sizeof(element) and 2) use PWrite to force the system to allocate disk blocks so that we avoid the risk of trying to mmap a region of the file that the system hasn't allocated a block for (and is unable to do so). ============ Fix unused-error and reorder-ctor issues that surfaced in framework ============ Bug: 191444782 Bug: 193715176 Change-Id: Ic76a46700e770b6740a69c4777f88639c8c4e946
-rw-r--r--icing/file/file-backed-proto-log.h2
-rw-r--r--icing/file/file-backed-vector.h42
-rw-r--r--icing/file/file-backed-vector_test.cc197
-rw-r--r--icing/icing-search-engine_test.cc3
-rw-r--r--icing/index/main/posting-list-free.h2
-rw-r--r--icing/performance-configuration.cc5
-rw-r--r--icing/store/document-store.cc14
-rw-r--r--icing/store/document-store.h22
-rw-r--r--synced_AOSP_CL_number.txt2
9 files changed, 236 insertions, 53 deletions
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 4302dce..b2b37e8 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -406,7 +406,6 @@ class FileBackedProtoLog {
// INTERNAL_ERROR if the metadata is invalid or any IO errors happen
static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
- std::unique_ptr<Header> header_;
// Magic number added in front of every proto. Used when reading out protos
// as a first check for corruption in each entry in the file. Even if there is
@@ -435,6 +434,7 @@ class FileBackedProtoLog {
ScopedFd fd_;
const Filesystem* const filesystem_;
const std::string file_path_;
+ std::unique_ptr<Header> header_;
};
template <typename ProtoT>
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 2443cb2..61f6923 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -423,13 +423,6 @@ FileBackedVector<T>::InitializeExistingFile(
absl_ports::StrCat("Invalid header kMagic for ", file_path));
}
- // Mmap the content of the vector, excluding the header so its easier to
- // access elements from the mmapped region
- auto mmapped_file =
- std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
- ICING_RETURN_IF_ERROR(
- mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
-
// Check header
if (header->header_checksum != header->CalculateHeaderChecksum()) {
return absl_ports::FailedPreconditionError(
@@ -442,6 +435,20 @@ FileBackedVector<T>::InitializeExistingFile(
header->element_size));
}
+ int64_t min_file_size = header->num_elements * sizeof(T) + sizeof(Header);
+ if (min_file_size > file_size) {
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Inconsistent file size, expected %zd, actual %d", min_file_size,
+ file_size));
+ }
+
+ // Mmap the content of the vector, excluding the header so its easier to
+ // access elements from the mmapped region
+ auto mmapped_file =
+ std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
+ ICING_RETURN_IF_ERROR(
+ mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
+
// Check vector contents
Crc32 vector_checksum;
std::string_view vector_contents(
@@ -591,9 +598,24 @@ libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
least_file_size_needed = math_util::RoundUpTo(
least_file_size_needed,
int64_t{FileBackedVector<T>::kGrowElements * sizeof(T)});
- if (!filesystem_->Grow(file_path_.c_str(), least_file_size_needed)) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Couldn't grow file ", file_path_));
+
+ // We use PWrite here rather than Grow because Grow doesn't actually allocate
+ // an underlying disk block. This can lead to problems with mmap because mmap
+ // has no effective way to signal that it was impossible to allocate the disk
+ // block and ends up crashing instead. PWrite will force the allocation of
+ // these blocks, which will ensure that any failure to grow will surface here.
+ int64_t page_size = getpagesize();
+ auto buf = std::make_unique<uint8_t[]>(page_size);
+ int64_t size_to_write = page_size - (current_file_size % page_size);
+ ScopedFd sfd(filesystem_->OpenForWrite(file_path_.c_str()));
+ while (current_file_size < least_file_size_needed) {
+ if (!filesystem_->PWrite(sfd.get(), current_file_size, buf.get(),
+ size_to_write)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Couldn't grow file ", file_path_));
+ }
+ current_file_size += size_to_write;
+ size_to_write = page_size - (current_file_size % page_size);
}
ICING_RETURN_IF_ERROR(mmapped_file_->Remap(
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index bc2fef6..b05ce2d 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -32,6 +32,7 @@
#include "icing/util/logging.h"
using ::testing::Eq;
+using ::testing::IsTrue;
using ::testing::Pointee;
namespace icing {
@@ -278,7 +279,6 @@ TEST_F(FileBackedVectorTest, Grow) {
filesystem_, file_path_,
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
-
EXPECT_THAT(vector->Set(kMaxNumElts + 11, 'a'),
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
EXPECT_THAT(vector->Set(-1, 'a'),
@@ -318,25 +318,32 @@ TEST_F(FileBackedVectorTest, GrowsInChunks) {
filesystem_, file_path_,
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
- // Our initial file size should just be the size of the header
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(sizeof(FileBackedVector<char>::Header)));
+ // Our initial file size should just be the size of the header. Disk usage
+ // will indicate that one block has been allocated, which contains the header.
+ int header_size = sizeof(FileBackedVector<char>::Header);
+ int page_size = getpagesize();
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(header_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(page_size));
- // Once we add something though, we'll grow to kGrowElements big
+ // Once we add something though, we'll grow to be kGrowElements big. From this
+ // point on, file size and disk usage should be the same because Growing will
+ // explicitly allocate the number of blocks needed to accomodate the file.
Insert(vector.get(), 0, "a");
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kGrowElements * sizeof(int)));
+ int file_size = kGrowElements * sizeof(int);
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
// Should still be the same size, don't need to grow underlying file
Insert(vector.get(), 1, "b");
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kGrowElements * sizeof(int)));
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
// Now we grow by a kGrowElements chunk, so the underlying file is 2
// kGrowElements big
+ file_size *= 2;
Insert(vector.get(), 2, std::string(kGrowElements, 'c'));
- EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
- Eq(kGrowElements * 2 * sizeof(int)));
+ EXPECT_THAT(filesystem_.GetFileSize(fd_), Eq(file_size));
+ EXPECT_THAT(filesystem_.GetDiskUsage(fd_), Eq(file_size));
// Destroy/persist the contents.
vector.reset();
@@ -463,6 +470,174 @@ TEST_F(FileBackedVectorTest, TruncateAndReReadFile) {
}
}
+TEST_F(FileBackedVectorTest, InitFileTooSmallForHeaderFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Shrink the file to be smaller than the header.
+ filesystem_.Truncate(fd_, sizeof(FileBackedVector<char>::Header) - 1);
+
+ {
+ // 3. Attempt to create the file and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitWrongDataSizeFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ {
+ // 2. Attempt to create the file with a different element size and confirm
+ // that it fails.
+ EXPECT_THAT(FileBackedVector<int>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptHeaderFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Modify the header, but don't update the checksum. This would be similar
+ // to corruption of the header.
+ FileBackedVector<char>::Header header;
+ ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+ IsTrue());
+ header.num_elements = 1;
+ ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+ IsTrue());
+
+ {
+ // 3. Attempt to create the file with a header that doesn't match its
+ // checksum and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitHeaderElementSizeTooBigFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Modify the header so that the number of elements exceeds the actual size
+ // of the underlying file.
+ FileBackedVector<char>::Header header;
+ ASSERT_THAT(filesystem_.PRead(fd_, &header, sizeof(header), /*offset=*/0),
+ IsTrue());
+ int64_t file_size = filesystem_.GetFileSize(fd_);
+ int64_t allocated_elements_size = file_size - sizeof(header);
+ header.num_elements = (allocated_elements_size / sizeof(char)) + 1;
+ header.header_checksum = header.CalculateHeaderChecksum();
+ ASSERT_THAT(filesystem_.PWrite(fd_, /*offset=*/0, &header, sizeof(header)),
+ IsTrue());
+
+ {
+ // 3. Attempt to create the file with num_elements that is larger than the
+ // underlying file and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitCorruptElementsFails) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ // 2. Overwrite the values of the first two elements.
+ std::string corrupted_content = "BY";
+ ASSERT_THAT(
+ filesystem_.PWrite(fd_, /*offset=*/sizeof(FileBackedVector<char>::Header),
+ corrupted_content.c_str(), corrupted_content.length()),
+ IsTrue());
+
+ {
+ // 3. Attempt to create the file with elements that don't match their
+ // checksum and confirm that it fails.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ }
+}
+
+TEST_F(FileBackedVectorTest, InitNormalSucceeds) {
+ {
+ // 1. Create a vector with a few elements.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<FileBackedVector<char>> vector,
+ FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+ Insert(vector.get(), 0, "A");
+ Insert(vector.get(), 1, "Z");
+ ASSERT_THAT(vector->PersistToDisk(), IsOk());
+ }
+
+ {
+ // 2. Attempt to create the file with a completely valid header and elements
+ // region. This should succeed.
+ EXPECT_THAT(FileBackedVector<char>::Create(
+ filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+ IsOk());
+ }
+}
+
} // namespace
} // namespace lib
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 1ee2d63..ef4625a 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -101,7 +101,10 @@ constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
StringIndexingConfig_TokenizerType_Code_NONE;
+#ifndef ICING_JNI_TEST
constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+#endif // !ICING_JNI_TEST
+
constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
constexpr TermMatchType_Code MATCH_NONE = TermMatchType_Code_UNKNOWN;
diff --git a/icing/index/main/posting-list-free.h b/icing/index/main/posting-list-free.h
index 4b27401..4f06057 100644
--- a/icing/index/main/posting-list-free.h
+++ b/icing/index/main/posting-list-free.h
@@ -115,7 +115,7 @@ class PostingListFree {
// bytes which will store the next posting list index, the rest are unused and
// can be anything.
uint8_t *posting_list_buffer_;
- uint32_t size_in_bytes_;
+ [[maybe_unused]] uint32_t size_in_bytes_;
static_assert(sizeof(PostingListIndex) <=
posting_list_utils::min_posting_list_size(),
diff --git a/icing/performance-configuration.cc b/icing/performance-configuration.cc
index 45b03d3..4020dd0 100644
--- a/icing/performance-configuration.cc
+++ b/icing/performance-configuration.cc
@@ -55,11 +55,12 @@ constexpr int kMaxQueryLength = 23000;
constexpr int kDefaultNumToScore = 30000;
// New Android devices nowadays all allow more than 16 MB memory per app. Using
-// that as a guideline, we set 16 MB as the safe memory threshold.
+// that as a guideline and being more conservative, we set 4 MB as the safe
+// memory threshold.
// TODO(b/150029642): Android apps / framework have better understanding of how
// much memory is allowed, so it would be better to let clients pass in this
// value.
-constexpr int kSafeMemoryUsage = 16 * 1024 * 1024; // 16MB
+constexpr int kSafeMemoryUsage = 4 * 1024 * 1024; // 4MB
// The maximum number of hits that can fit below the kSafeMemoryUsage threshold.
constexpr int kMaxNumTotalHits = kSafeMemoryUsage / sizeof(ScoredDocumentHit);
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 6479b97..226a96b 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -64,7 +64,6 @@ namespace {
// Used in DocumentId mapper to mark a document as deleted
constexpr int64_t kDocDeletedFlag = -1;
-constexpr char kDocumentLogFilename[] = "document_log";
constexpr char kDocumentIdMapperFilename[] = "document_id_mapper";
constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
constexpr char kScoreCacheFilename[] = "score_cache";
@@ -1578,6 +1577,7 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
int size = document_id_mapper_->num_elements();
int num_deleted = 0;
int num_expired = 0;
+ UsageStore::UsageScores default_usage;
for (DocumentId document_id = 0; document_id < size; document_id++) {
auto document_or = Get(document_id, /*clear_internal_fields=*/false);
if (absl_ports::IsNotFound(document_or.status())) {
@@ -1626,10 +1626,14 @@ libtextclassifier3::Status DocumentStore::OptimizeInto(
// Copy over usage scores.
ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
usage_store_->GetUsageScores(document_id));
-
- DocumentId new_document_id = new_document_id_or.ValueOrDie();
- ICING_RETURN_IF_ERROR(
- new_doc_store->SetUsageScores(new_document_id, usage_scores));
+ if (!(usage_scores == default_usage)) {
+ // If the usage scores for this document are the default (no usage), then
+ // don't bother setting it. No need to possibly allocate storage if
+ // there's nothing interesting to store.
+ DocumentId new_document_id = new_document_id_or.ValueOrDie();
+ ICING_RETURN_IF_ERROR(
+ new_doc_store->SetUsageScores(new_document_id, usage_scores));
+ }
}
if (stats != nullptr) {
stats->set_num_original_documents(size);
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index a60aab1..c85c989 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -497,28 +497,6 @@ class DocumentStore {
bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats);
- // Initializes a new DocumentStore and sets up any underlying files.
- //
- // Returns:
- // Data loss status on success, effectively always DataLoss::NONE
- // INTERNAL on I/O error
- libtextclassifier3::StatusOr<DataLoss> InitializeNewStore(
- InitializeStatsProto* initialize_stats);
-
- // Initializes a DocumentStore over an existing directory of files.
- //
- // stats will be set if non-null
- //
- // Returns:
- // Data loss status on success
- // INTERNAL on I/O error
- libtextclassifier3::StatusOr<DataLoss> InitializeExistingStore(
- bool force_recovery_and_revalidate_documents,
- InitializeStatsProto* initialize_stats);
-
- libtextclassifier3::StatusOr<DataLoss> MigrateFromV0ToV1(
- InitializeStatsProto* initialize_stats);
-
// Creates sub-components and verifies the integrity of each sub-component.
// This assumes that the the underlying files already exist, and will return
// an error if it doesn't find what it's expecting.
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 71ea9fe..b5282cf 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=382354974)
+set(synced_AOSP_CL_number=384818601)