summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkilesh Kailash <akailash@google.com>2021-07-29 00:55:14 +0000
committerAkilesh Kailash <akailash@google.com>2021-07-29 16:30:01 +0000
commit533c2f6d55f3036d21fdddfaa81633e5850848cb (patch)
treef0c6bc78433c083a7355226d31cf0a9bbae69de3
parentf3fea3777299843b34afd5f5453e80183fa3ad99 (diff)
downloadcore-533c2f6d55f3036d21fdddfaa81633e5850848cb.tar.gz
Crash during OTA merge may lead to blocks with stale data
This is a corner case wherein a crash during OTA merge can lead to missing of some COW operations to be merged thereby some blocks may end up with stale data. Fix here is to avoid any re-ordering of COW operations. Merge the COW operations as present in the COW file. New tests have been added to cow_snapuserd. Bug: 194955361 Test: cow_snapuserd_test, Incremental OTA Signed-off-by: Akilesh Kailash <akailash@google.com> Merged-In: Id895fe7a3d6b4510676490a86d0caf62dec9b079 Change-Id: I14900b9537c4deb7824547e1dfe80f15274bdda4 Ignore-AOSP-First: manual merge from aosp
-rw-r--r--fs_mgr/libsnapshot/cow_snapuserd_test.cpp258
-rw-r--r--fs_mgr/libsnapshot/snapuserd.cpp88
2 files changed, 257 insertions, 89 deletions
diff --git a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
index 3888eb168..d09c6e9c9 100644
--- a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
+++ b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
@@ -96,6 +96,8 @@ class TempDevice {
class CowSnapuserdTest final {
public:
bool Setup();
+ bool SetupOrderedOps();
+ bool SetupOrderedOpsInverted();
bool SetupCopyOverlap_1();
bool SetupCopyOverlap_2();
bool Merge();
@@ -103,6 +105,9 @@ class CowSnapuserdTest final {
void ReadSnapshotDeviceAndValidate();
void Shutdown();
void MergeInterrupt();
+ void MergeInterruptFixed(int duration);
+ void MergeInterruptRandomly(int max_duration);
+ void ReadDmUserBlockWithoutDaemon();
std::string snapshot_dev() const { return snapshot_dev_->path(); }
@@ -116,6 +121,8 @@ class CowSnapuserdTest final {
void StartMerge();
void CreateCowDevice();
+ void CreateCowDeviceOrderedOps();
+ void CreateCowDeviceOrderedOpsInverted();
void CreateCowDeviceWithCopyOverlap_1();
void CreateCowDeviceWithCopyOverlap_2();
bool SetupDaemon();
@@ -196,6 +203,18 @@ bool CowSnapuserdTest::Setup() {
return setup_ok_;
}
+bool CowSnapuserdTest::SetupOrderedOps() {
+ CreateBaseDevice();
+ CreateCowDeviceOrderedOps();
+ return SetupDaemon();
+}
+
+bool CowSnapuserdTest::SetupOrderedOpsInverted() {
+ CreateBaseDevice();
+ CreateCowDeviceOrderedOpsInverted();
+ return SetupDaemon();
+}
+
bool CowSnapuserdTest::SetupCopyOverlap_1() {
CreateBaseDevice();
CreateCowDeviceWithCopyOverlap_1();
@@ -382,6 +401,112 @@ void CowSnapuserdTest::CreateCowDeviceWithCopyOverlap_1() {
true);
}
+void CowSnapuserdTest::CreateCowDeviceOrderedOpsInverted() {
+ unique_fd rnd_fd;
+ loff_t offset = 0;
+
+ std::string path = android::base::GetExecutableDirectory();
+ cow_system_ = std::make_unique<TemporaryFile>(path);
+
+ rnd_fd.reset(open("/dev/random", O_RDONLY));
+ ASSERT_TRUE(rnd_fd > 0);
+
+ std::unique_ptr<uint8_t[]> random_buffer_1_ = std::make_unique<uint8_t[]>(size_);
+
+ // Fill random data
+ for (size_t j = 0; j < (size_ / 1_MiB); j++) {
+ ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0),
+ true);
+
+ offset += 1_MiB;
+ }
+
+ CowOptions options;
+ options.compression = "gz";
+ CowWriter writer(options);
+
+ ASSERT_TRUE(writer.Initialize(cow_system_->fd));
+
+ size_t num_blocks = size_ / options.block_size;
+ size_t blk_end_copy = num_blocks * 2;
+ size_t source_blk = num_blocks - 1;
+ size_t blk_src_copy = blk_end_copy - 1;
+
+ size_t x = num_blocks;
+ while (1) {
+ ASSERT_TRUE(writer.AddCopy(source_blk, blk_src_copy));
+ x -= 1;
+ if (x == 0) {
+ break;
+ }
+ source_blk -= 1;
+ blk_src_copy -= 1;
+ }
+
+ // Flush operations
+ ASSERT_TRUE(writer.Finalize());
+ // Construct the buffer required for validation
+ orig_buffer_ = std::make_unique<uint8_t[]>(total_base_size_);
+ // Read the entire base device
+ ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0),
+ true);
+ // Merged Buffer
+ memmove(orig_buffer_.get(), (char*)orig_buffer_.get() + size_, size_);
+}
+
+void CowSnapuserdTest::CreateCowDeviceOrderedOps() {
+ unique_fd rnd_fd;
+ loff_t offset = 0;
+
+ std::string path = android::base::GetExecutableDirectory();
+ cow_system_ = std::make_unique<TemporaryFile>(path);
+
+ rnd_fd.reset(open("/dev/random", O_RDONLY));
+ ASSERT_TRUE(rnd_fd > 0);
+
+ std::unique_ptr<uint8_t[]> random_buffer_1_ = std::make_unique<uint8_t[]>(size_);
+
+ // Fill random data
+ for (size_t j = 0; j < (size_ / 1_MiB); j++) {
+ ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0),
+ true);
+
+ offset += 1_MiB;
+ }
+
+ CowOptions options;
+ options.compression = "gz";
+ CowWriter writer(options);
+
+ ASSERT_TRUE(writer.Initialize(cow_system_->fd));
+
+ size_t num_blocks = size_ / options.block_size;
+ size_t x = num_blocks;
+ size_t source_blk = 0;
+ size_t blk_src_copy = num_blocks;
+
+ while (1) {
+ ASSERT_TRUE(writer.AddCopy(source_blk, blk_src_copy));
+
+ x -= 1;
+ if (x == 0) {
+ break;
+ }
+ source_blk += 1;
+ blk_src_copy += 1;
+ }
+
+ // Flush operations
+ ASSERT_TRUE(writer.Finalize());
+ // Construct the buffer required for validation
+ orig_buffer_ = std::make_unique<uint8_t[]>(total_base_size_);
+ // Read the entire base device
+ ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0),
+ true);
+ // Merged Buffer
+ memmove(orig_buffer_.get(), (char*)orig_buffer_.get() + size_, size_);
+}
+
void CowSnapuserdTest::CreateCowDevice() {
unique_fd rnd_fd;
loff_t offset = 0;
@@ -481,6 +606,36 @@ void CowSnapuserdTest::CreateDmUserDevice() {
ASSERT_TRUE(android::fs_mgr::WaitForFile(misc_device, 10s));
}
+void CowSnapuserdTest::ReadDmUserBlockWithoutDaemon() {
+ DmTable dmuser_table;
+ std::string dm_user_name = "dm-test-device";
+ unique_fd fd;
+
+ // Create a dm-user block device
+ ASSERT_TRUE(dmuser_table.AddTarget(std::make_unique<DmTargetUser>(0, 123456, dm_user_name)));
+ ASSERT_TRUE(dmuser_table.valid());
+
+ dmuser_dev_ = std::make_unique<TempDevice>(dm_user_name, dmuser_table);
+ ASSERT_TRUE(dmuser_dev_->valid());
+ ASSERT_FALSE(dmuser_dev_->path().empty());
+
+ fd.reset(open(dmuser_dev_->path().c_str(), O_RDONLY));
+ ASSERT_GE(fd, 0);
+
+ std::unique_ptr<uint8_t[]> buffer = std::make_unique<uint8_t[]>(1_MiB);
+
+ loff_t offset = 0;
+ // Every IO should fail as there is no daemon to process the IO
+ for (size_t j = 0; j < 10; j++) {
+ ASSERT_EQ(ReadFullyAtOffset(fd, (char*)buffer.get() + offset, BLOCK_SZ, offset), false);
+
+ offset += BLOCK_SZ;
+ }
+
+ fd = {};
+ ASSERT_TRUE(dmuser_dev_->Destroy());
+}
+
void CowSnapuserdTest::InitDaemon() {
bool ok = client_->AttachDmUser(system_device_ctrl_name_);
ASSERT_TRUE(ok);
@@ -566,6 +721,7 @@ void CowSnapuserdTest::ValidateMerge() {
void CowSnapuserdTest::SimulateDaemonRestart() {
Shutdown();
+ std::this_thread::sleep_for(500ms);
SetDeviceControlName();
StartSnapuserdDaemon();
InitCowDevice();
@@ -574,6 +730,34 @@ void CowSnapuserdTest::SimulateDaemonRestart() {
CreateSnapshotDevice();
}
+void CowSnapuserdTest::MergeInterruptRandomly(int max_duration) {
+ std::srand(std::time(nullptr));
+ StartMerge();
+
+ for (int i = 0; i < 20; i++) {
+ int duration = std::rand() % max_duration;
+ std::this_thread::sleep_for(std::chrono::milliseconds(duration));
+ SimulateDaemonRestart();
+ StartMerge();
+ }
+
+ SimulateDaemonRestart();
+ ASSERT_TRUE(Merge());
+}
+
+void CowSnapuserdTest::MergeInterruptFixed(int duration) {
+ StartMerge();
+
+ for (int i = 0; i < 25; i++) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(duration));
+ SimulateDaemonRestart();
+ StartMerge();
+ }
+
+ SimulateDaemonRestart();
+ ASSERT_TRUE(Merge());
+}
+
void CowSnapuserdTest::MergeInterrupt() {
// Interrupt merge at various intervals
StartMerge();
@@ -638,10 +822,9 @@ void CowSnapuserdMetadataTest::ValidatePartialFilledArea() {
void* buffer = snapuserd_->GetExceptionBuffer(1);
loff_t offset = 0;
struct disk_exception* de;
- for (int i = 0; i < 12; i++) {
+ for (int i = 11; i >= 0; i--) {
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
ASSERT_EQ(de->old_chunk, i);
- ASSERT_EQ(de->new_chunk, new_chunk);
offset += sizeof(struct disk_exception);
new_chunk += 1;
}
@@ -780,71 +963,71 @@ void CowSnapuserdMetadataTest::ValidateMetadata() {
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
ASSERT_EQ(de->old_chunk, 100);
- ASSERT_EQ(de->new_chunk, 522);
+ ASSERT_EQ(de->new_chunk, 521);
offset += sizeof(struct disk_exception);
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
ASSERT_EQ(de->old_chunk, 105);
- ASSERT_EQ(de->new_chunk, 524);
+ ASSERT_EQ(de->new_chunk, 522);
offset += sizeof(struct disk_exception);
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
ASSERT_EQ(de->old_chunk, 110);
- ASSERT_EQ(de->new_chunk, 526);
+ ASSERT_EQ(de->new_chunk, 523);
offset += sizeof(struct disk_exception);
// The next 4 operations are batch merged as
// both old and new chunk are contiguous
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
- ASSERT_EQ(de->old_chunk, 50);
- ASSERT_EQ(de->new_chunk, 528);
+ ASSERT_EQ(de->old_chunk, 53);
+ ASSERT_EQ(de->new_chunk, 524);
offset += sizeof(struct disk_exception);
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
- ASSERT_EQ(de->old_chunk, 51);
- ASSERT_EQ(de->new_chunk, 529);
+ ASSERT_EQ(de->old_chunk, 52);
+ ASSERT_EQ(de->new_chunk, 525);
offset += sizeof(struct disk_exception);
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
- ASSERT_EQ(de->old_chunk, 52);
- ASSERT_EQ(de->new_chunk, 530);
+ ASSERT_EQ(de->old_chunk, 51);
+ ASSERT_EQ(de->new_chunk, 526);
offset += sizeof(struct disk_exception);
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
- ASSERT_EQ(de->old_chunk, 53);
- ASSERT_EQ(de->new_chunk, 531);
+ ASSERT_EQ(de->old_chunk, 50);
+ ASSERT_EQ(de->new_chunk, 527);
offset += sizeof(struct disk_exception);
// This is handling overlap operation with
// two batch merge operations.
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
ASSERT_EQ(de->old_chunk, 18);
- ASSERT_EQ(de->new_chunk, 533);
+ ASSERT_EQ(de->new_chunk, 528);
offset += sizeof(struct disk_exception);
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
ASSERT_EQ(de->old_chunk, 19);
- ASSERT_EQ(de->new_chunk, 534);
+ ASSERT_EQ(de->new_chunk, 529);
offset += sizeof(struct disk_exception);
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
ASSERT_EQ(de->old_chunk, 20);
- ASSERT_EQ(de->new_chunk, 535);
+ ASSERT_EQ(de->new_chunk, 530);
offset += sizeof(struct disk_exception);
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
ASSERT_EQ(de->old_chunk, 21);
- ASSERT_EQ(de->new_chunk, 537);
+ ASSERT_EQ(de->new_chunk, 532);
offset += sizeof(struct disk_exception);
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
ASSERT_EQ(de->old_chunk, 22);
- ASSERT_EQ(de->new_chunk, 538);
+ ASSERT_EQ(de->new_chunk, 533);
offset += sizeof(struct disk_exception);
de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
ASSERT_EQ(de->old_chunk, 23);
- ASSERT_EQ(de->new_chunk, 539);
+ ASSERT_EQ(de->new_chunk, 534);
offset += sizeof(struct disk_exception);
// End of metadata
@@ -909,6 +1092,43 @@ TEST(Snapuserd_Test, Snapshot_COPY_Overlap_Merge_Resume_TEST) {
harness.Shutdown();
}
+TEST(Snapuserd_Test, ReadDmUserBlockWithoutDaemon) {
+ CowSnapuserdTest harness;
+ harness.ReadDmUserBlockWithoutDaemon();
+}
+
+TEST(Snapuserd_Test, Snapshot_Merge_Crash_Fixed_Ordered) {
+ CowSnapuserdTest harness;
+ ASSERT_TRUE(harness.SetupOrderedOps());
+ harness.MergeInterruptFixed(300);
+ harness.ValidateMerge();
+ harness.Shutdown();
+}
+
+TEST(Snapuserd_Test, Snapshot_Merge_Crash_Random_Ordered) {
+ CowSnapuserdTest harness;
+ ASSERT_TRUE(harness.SetupOrderedOps());
+ harness.MergeInterruptRandomly(500);
+ harness.ValidateMerge();
+ harness.Shutdown();
+}
+
+TEST(Snapuserd_Test, Snapshot_Merge_Crash_Fixed_Inverted) {
+ CowSnapuserdTest harness;
+ ASSERT_TRUE(harness.SetupOrderedOpsInverted());
+ harness.MergeInterruptFixed(50);
+ harness.ValidateMerge();
+ harness.Shutdown();
+}
+
+TEST(Snapuserd_Test, Snapshot_Merge_Crash_Random_Inverted) {
+ CowSnapuserdTest harness;
+ ASSERT_TRUE(harness.SetupOrderedOpsInverted());
+ harness.MergeInterruptRandomly(50);
+ harness.ValidateMerge();
+ harness.Shutdown();
+}
+
} // namespace snapshot
} // namespace android
diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp
index 03c2ef6c7..57a61a791 100644
--- a/fs_mgr/libsnapshot/snapuserd.cpp
+++ b/fs_mgr/libsnapshot/snapuserd.cpp
@@ -442,8 +442,9 @@ bool Snapuserd::ReadMetadata() {
int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ);
std::optional<chunk_t> prev_id = {};
- std::map<uint64_t, const CowOperation*> map;
+ std::vector<const CowOperation*> vec;
std::set<uint64_t> dest_blocks;
+ std::set<uint64_t> source_blocks;
size_t pending_copy_ops = exceptions_per_area_ - num_ops;
uint64_t total_copy_ops = reader_->total_copy_ops();
@@ -500,99 +501,45 @@ bool Snapuserd::ReadMetadata() {
// scratch space and re-construct it thereby there
// is no loss of data.
//
+ // Note that we will follow the same order of COW operations
+ // as present in the COW file. This will make sure that\
+ // the merge of operations are done based on the ops present
+ // in the file.
//===========================================================
- //
- // Case 2:
- //
- // Let's say we have three copy operations written to COW file
- // in the following order:
- //
- // op-1: 15 -> 18
- // op-2: 16 -> 19
- // op-3: 17 -> 20
- //
- // As aforementioned, kernel will initiate merge in reverse order.
- // Hence, we will read these ops in reverse order so that all these
- // ops are exectued in the same order as requested. Thus, we will
- // read the metadata in reverse order and for the kernel it will
- // look like:
- //
- // op-3: 17 -> 20
- // op-2: 16 -> 19
- // op-1: 15 -> 18 <-- Merge starts here in the kernel
- //
- // Now, this is problematic as kernel cannot batch merge them.
- //
- // Merge sequence will look like:
- //
- // Merge-1: op-1: 15 -> 18
- // Merge-2: op-2: 16 -> 19
- // Merge-3: op-3: 17 -> 20
- //
- // We have three merge operations.
- //
- // Even though the blocks are contiguous, kernel can batch merge
- // them if the blocks are in descending order. Update engine
- // addresses this issue partially for overlapping operations as
- // we see that op-1 to op-3 and op-4 to op-6 operatiosn are in
- // descending order. However, if the copy operations are not
- // overlapping, update engine cannot write these blocks
- // in descending order. Hence, we will try to address it.
- // Thus, we will send these blocks to the kernel and it will
- // look like:
- //
- // op-3: 15 -> 18
- // op-2: 16 -> 19
- // op-1: 17 -> 20 <-- Merge starts here in the kernel
- //
- // Now with this change, we can batch merge all these three
- // operations. Merge sequence will look like:
- //
- // Merge-1: {op-1: 17 -> 20, op-2: 16 -> 19, op-3: 15 -> 18}
- //
- // Note that we have changed the ordering of merge; However, this
- // is ok as each of these copy operations are independent and there
- // is no overlap.
- //
- //===================================================================
if (prev_id.has_value()) {
- chunk_t diff = (cow_op->new_block > prev_id.value())
- ? (cow_op->new_block - prev_id.value())
- : (prev_id.value() - cow_op->new_block);
- if (diff != 1) {
- break;
- }
-
- if (dest_blocks.count(cow_op->new_block) || map.count(cow_op->source) > 0) {
+ if (dest_blocks.count(cow_op->new_block) || source_blocks.count(cow_op->source)) {
break;
}
}
metadata_found = true;
pending_copy_ops -= 1;
- map[cow_op->new_block] = cow_op;
+ vec.push_back(cow_op);
dest_blocks.insert(cow_op->source);
+ source_blocks.insert(cow_op->new_block);
prev_id = cow_op->new_block;
cowop_riter_->Next();
} while (!cowop_riter_->Done() && pending_copy_ops);
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
- SNAP_LOG(DEBUG) << "Batch Merge copy-ops of size: " << map.size()
+ SNAP_LOG(DEBUG) << "Batch Merge copy-ops of size: " << vec.size()
<< " Area: " << vec_.size() << " Area offset: " << offset
<< " Pending-copy-ops in this area: " << pending_copy_ops;
- for (auto it = map.begin(); it != map.end(); it++) {
+ for (size_t i = 0; i < vec.size(); i++) {
struct disk_exception* de =
reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
- de->old_chunk = it->first;
+ const CowOperation* cow_op = vec[i];
+
+ de->old_chunk = cow_op->new_block;
de->new_chunk = data_chunk_id;
// Store operation pointer.
- chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), it->second));
+ chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
offset += sizeof(struct disk_exception);
num_ops += 1;
copy_ops++;
if (read_ahead_feature_) {
- read_ahead_ops_.push_back(it->second);
+ read_ahead_ops_.push_back(cow_op);
}
SNAP_LOG(DEBUG) << num_ops << ":"
@@ -635,8 +582,9 @@ bool Snapuserd::ReadMetadata() {
data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
}
}
- map.clear();
+ vec.clear();
dest_blocks.clear();
+ source_blocks.clear();
prev_id.reset();
}