diff options
Diffstat (limited to 'zucchini_gen.cc')
-rw-r--r-- | zucchini_gen.cc | 430 |
1 files changed, 430 insertions, 0 deletions
diff --git a/zucchini_gen.cc b/zucchini_gen.cc new file mode 100644 index 0000000..4be0b8b --- /dev/null +++ b/zucchini_gen.cc @@ -0,0 +1,430 @@ +// Copyright 2017 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/zucchini_gen.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <map> +#include <memory> +#include <utility> + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/element_detection.h" +#include "components/zucchini/encoded_view.h" +#include "components/zucchini/ensemble_matcher.h" +#include "components/zucchini/equivalence_map.h" +#include "components/zucchini/heuristic_ensemble_matcher.h" +#include "components/zucchini/image_index.h" +#include "components/zucchini/label_manager.h" +#include "components/zucchini/patch_writer.h" +#include "components/zucchini/suffix_array.h" +#include "components/zucchini/targets_affinity.h" + +namespace zucchini { + +namespace { + +// Parameters for patch generation. +constexpr double kMinEquivalenceSimilarity = 12.0; +constexpr double kMinLabelAffinity = 64.0; +constexpr size_t kNumIterations = 2; + +} // namespace + +std::vector<offset_t> FindExtraTargets(const TargetPool& projected_old_targets, + const TargetPool& new_targets) { + std::vector<offset_t> extra_targets; + std::set_difference( + new_targets.begin(), new_targets.end(), projected_old_targets.begin(), + projected_old_targets.end(), std::back_inserter(extra_targets)); + return extra_targets; +} + +EquivalenceMap CreateEquivalenceMap(const ImageIndex& old_image_index, + const ImageIndex& new_image_index) { + // Label matching (between "old" and "new") can guide EquivalenceMap + // construction; but EquivalenceMap induces Label matching. This apparent + // "chick and egg" problem is solved by multiple iterations alternating 2 + // steps: + // - Association of targets based on previous EquivalenceMap. Note that the + // EquivalenceMap is empty on first iteration, so this is a no-op. + // - Construction of refined EquivalenceMap based on new targets associations. + size_t pool_count = old_image_index.PoolCount(); + // |target_affinities| is outside the loop to reduce allocation. + std::vector<TargetsAffinity> target_affinities(pool_count); + + EquivalenceMap equivalence_map; + for (size_t i = 0; i < kNumIterations; ++i) { + EncodedView old_view(old_image_index); + EncodedView new_view(new_image_index); + + // Associate targets from "old" to "new" image based on |equivalence_map| + // for each reference pool. + for (const auto& old_pool_tag_and_targets : + old_image_index.target_pools()) { + PoolTag pool_tag = old_pool_tag_and_targets.first; + target_affinities[pool_tag.value()].InferFromSimilarities( + equivalence_map, old_pool_tag_and_targets.second.targets(), + new_image_index.pool(pool_tag).targets()); + + // Creates labels for strongly associated targets. + std::vector<uint32_t> old_labels; + std::vector<uint32_t> new_labels; + size_t label_bound = target_affinities[pool_tag.value()].AssignLabels( + kMinLabelAffinity, &old_labels, &new_labels); + old_view.SetLabels(pool_tag, std::move(old_labels), label_bound); + new_view.SetLabels(pool_tag, std::move(new_labels), label_bound); + } + // Build equivalence map, where references in "old" and "new" that share + // common semantics (i.e., their respective targets were associated earlier + // on) are considered equivalent. + equivalence_map.Build( + MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality()), + old_view, new_view, target_affinities, kMinEquivalenceSimilarity); + } + + return equivalence_map; +} + +bool GenerateEquivalencesAndExtraData(ConstBufferView new_image, + const EquivalenceMap& equivalence_map, + PatchElementWriter* patch_writer) { + // Make 2 passes through |equivalence_map| to reduce write churn. + // Pass 1: Write all equivalences. + EquivalenceSink equivalences_sink; + for (const EquivalenceCandidate& candidate : equivalence_map) + equivalences_sink.PutNext(candidate.eq); + patch_writer->SetEquivalenceSink(std::move(equivalences_sink)); + + // Pass 2: Write data in gaps in |new_image| before / between after + // |equivalence_map| as "extra data". + ExtraDataSink extra_data_sink; + offset_t dst_offset = 0; + for (const EquivalenceCandidate& candidate : equivalence_map) { + extra_data_sink.PutNext( + new_image[{dst_offset, candidate.eq.dst_offset - dst_offset}]); + dst_offset = candidate.eq.dst_end(); + DCHECK_LE(dst_offset, new_image.size()); + } + extra_data_sink.PutNext( + new_image[{dst_offset, new_image.size() - dst_offset}]); + patch_writer->SetExtraDataSink(std::move(extra_data_sink)); + return true; +} + +bool GenerateRawDelta(ConstBufferView old_image, + ConstBufferView new_image, + const EquivalenceMap& equivalence_map, + const ImageIndex& new_image_index, + PatchElementWriter* patch_writer) { + RawDeltaSink raw_delta_sink; + + // Visit |equivalence_map| blocks in |new_image| order. Find and emit all + // bytewise differences. + offset_t base_copy_offset = 0; + for (const EquivalenceCandidate& candidate : equivalence_map) { + Equivalence equivalence = candidate.eq; + // For each bytewise delta from |old_image| to |new_image|, compute "copy + // offset" and pass it along with delta to the sink. + for (offset_t i = 0; i < equivalence.length; ++i) { + if (new_image_index.IsReference(equivalence.dst_offset + i)) + continue; // Skip references since they're handled elsewhere. + + int8_t diff = new_image[equivalence.dst_offset + i] - + old_image[equivalence.src_offset + i]; + if (diff) + raw_delta_sink.PutNext({base_copy_offset + i, diff}); + } + base_copy_offset += equivalence.length; + } + patch_writer->SetRawDeltaSink(std::move(raw_delta_sink)); + return true; +} + +bool GenerateReferencesDelta(const ReferenceSet& src_refs, + const ReferenceSet& dst_refs, + const TargetPool& projected_target_pool, + const OffsetMapper& offset_mapper, + const EquivalenceMap& equivalence_map, + ReferenceDeltaSink* reference_delta_sink) { + size_t ref_width = src_refs.width(); + auto dst_ref = dst_refs.begin(); + + // For each equivalence, for each covered |dst_ref| and the matching + // |src_ref|, emit the delta between the respective target labels. Note: By + // construction, each reference location (with |ref_width|) lies either + // completely inside an equivalence or completely outside. We perform + // "straddle checks" throughout to verify this assertion. + for (const auto& candidate : equivalence_map) { + const Equivalence equiv = candidate.eq; + // Increment |dst_ref| until it catches up to |equiv|. + while (dst_ref != dst_refs.end() && dst_ref->location < equiv.dst_offset) + ++dst_ref; + if (dst_ref == dst_refs.end()) + break; + if (dst_ref->location >= equiv.dst_end()) + continue; + // Straddle check. + DCHECK_LE(dst_ref->location + ref_width, equiv.dst_end()); + + offset_t src_loc = + equiv.src_offset + (dst_ref->location - equiv.dst_offset); + auto src_ref = std::lower_bound( + src_refs.begin(), src_refs.end(), src_loc, + [](const IndirectReference& a, offset_t b) { return a.location < b; }); + for (; dst_ref != dst_refs.end() && + dst_ref->location + ref_width <= equiv.dst_end(); + ++dst_ref, ++src_ref) { + // Local offset of |src_ref| should match that of |dst_ref|. + DCHECK_EQ(src_ref->location - equiv.src_offset, + dst_ref->location - equiv.dst_offset); + offset_t old_offset = + src_refs.target_pool().OffsetForKey(src_ref->target_key); + offset_t new_estimated_offset = offset_mapper.ForwardProject(old_offset); + offset_t new_estimated_key = + projected_target_pool.KeyForNearestOffset(new_estimated_offset); + offset_t new_offset = + dst_refs.target_pool().OffsetForKey(dst_ref->target_key); + offset_t new_key = projected_target_pool.KeyForOffset(new_offset); + + reference_delta_sink->PutNext( + static_cast<int32_t>(new_key - new_estimated_key)); + } + if (dst_ref == dst_refs.end()) + break; // Done. + // Straddle check. + DCHECK_GE(dst_ref->location, equiv.dst_end()); + } + return true; +} + +bool GenerateExtraTargets(const std::vector<offset_t>& extra_targets, + PoolTag pool_tag, + PatchElementWriter* patch_writer) { + TargetSink target_sink; + for (offset_t target : extra_targets) + target_sink.PutNext(target); + patch_writer->SetTargetSink(pool_tag, std::move(target_sink)); + return true; +} + +bool GenerateRawElement(const std::vector<offset_t>& old_sa, + ConstBufferView old_image, + ConstBufferView new_image, + PatchElementWriter* patch_writer) { + ImageIndex old_image_index(old_image); + ImageIndex new_image_index(new_image); + + EquivalenceMap equivalences; + equivalences.Build(old_sa, EncodedView(old_image_index), + EncodedView(new_image_index), {}, + kMinEquivalenceSimilarity); + + patch_writer->SetReferenceDeltaSink({}); + return GenerateEquivalencesAndExtraData(new_image, equivalences, + patch_writer) && + GenerateRawDelta(old_image, new_image, equivalences, new_image_index, + patch_writer); +} + +bool GenerateExecutableElement(ExecutableType exe_type, + ConstBufferView old_image, + ConstBufferView new_image, + PatchElementWriter* patch_writer) { + // Initialize Disassemblers. + std::unique_ptr<Disassembler> old_disasm = + MakeDisassemblerOfType(old_image, exe_type); + std::unique_ptr<Disassembler> new_disasm = + MakeDisassemblerOfType(new_image, exe_type); + if (!old_disasm || !new_disasm) { + LOG(ERROR) << "Failed to create Disassembler."; + return false; + } + DCHECK_EQ(old_disasm->GetExeType(), new_disasm->GetExeType()); + + // Initialize ImageIndexes. + ImageIndex old_image_index(old_image); + ImageIndex new_image_index(new_image); + if (!old_image_index.Initialize(old_disasm.get()) || + !new_image_index.Initialize(new_disasm.get())) { + LOG(ERROR) << "Failed to create ImageIndex: Overlapping references found?"; + return false; + } + DCHECK_EQ(old_image_index.PoolCount(), new_image_index.PoolCount()); + + EquivalenceMap equivalences = + CreateEquivalenceMap(old_image_index, new_image_index); + OffsetMapper offset_mapper(equivalences); + + ReferenceDeltaSink reference_delta_sink; + for (const auto& old_targets : old_image_index.target_pools()) { + PoolTag pool_tag = old_targets.first; + TargetPool projected_old_targets = old_targets.second; + projected_old_targets.FilterAndProject(offset_mapper); + std::vector<offset_t> extra_target = + FindExtraTargets(projected_old_targets, new_image_index.pool(pool_tag)); + projected_old_targets.InsertTargets(extra_target); + + if (!GenerateExtraTargets(extra_target, pool_tag, patch_writer)) + return false; + for (TypeTag type_tag : old_targets.second.types()) { + if (!GenerateReferencesDelta(old_image_index.refs(type_tag), + new_image_index.refs(type_tag), + projected_old_targets, offset_mapper, + equivalences, &reference_delta_sink)) { + return false; + } + } + } + patch_writer->SetReferenceDeltaSink(std::move(reference_delta_sink)); + + return GenerateEquivalencesAndExtraData(new_image, equivalences, + patch_writer) && + GenerateRawDelta(old_image, new_image, equivalences, new_image_index, + patch_writer); +} + +/******** Exported Functions ********/ + +status::Code GenerateEnsemble(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer) { + std::unique_ptr<EnsembleMatcher> matcher = + std::make_unique<HeuristicEnsembleMatcher>(nullptr); + if (!matcher->RunMatch(old_image, new_image)) { + LOG(INFO) << "RunMatch() failed, generating raw patch."; + return GenerateRaw(old_image, new_image, patch_writer); + } + + const std::vector<ElementMatch>& matches = matcher->matches(); + LOG(INFO) << "Matching: Found " << matches.size() + << " nontrivial matches and " << matcher->num_identical() + << " identical matches."; + size_t num_elements = matches.size(); + if (num_elements == 0) { + LOG(INFO) << "No nontrival matches, generating raw patch."; + return GenerateRaw(old_image, new_image, patch_writer); + } + + PatchType patch_type = PatchType::kRawPatch; + if (num_elements == 1 && matches[0].old_element.size == old_image.size() && + matches[0].new_element.size == new_image.size()) { + // If |old_image| matches |new_image| entirely then we have single patch. + LOG(INFO) << "Old and new files are executables, " + << "generating single-file patch."; + patch_type = PatchType::kSinglePatch; + } else { + LOG(INFO) << "Generating ensemble patch."; + patch_type = PatchType::kEnsemblePatch; + } + + // "Gaps" are |new_image| bytes not covered by new_elements in |matches|. + // These are treated as raw data, and patched against the entire |old_image|. + + // |patch_element_map| (keyed by "new" offsets) stores PatchElementWriter + // results so elements and "gap" results can be computed separately (to reduce + // peak memory usage), and later, properly serialized to |patch_writer| + // ordered by "new" offset. + std::map<offset_t, PatchElementWriter> patch_element_map; + + // Variables to track element patching successes. + std::vector<BufferRegion> covered_new_regions; + size_t covered_new_bytes = 0; + + // Process elements first, since non-fatal failures may turn some into gaps. + for (const ElementMatch& match : matches) { + BufferRegion new_region = match.new_element.region(); + LOG(INFO) << "--- Match [" << new_region.lo() << "," << new_region.hi() + << ")"; + + auto it_and_success = patch_element_map.emplace( + base::checked_cast<offset_t>(new_region.lo()), match); + DCHECK(it_and_success.second); + PatchElementWriter& patch_element = it_and_success.first->second; + + ConstBufferView old_sub_image = old_image[match.old_element.region()]; + ConstBufferView new_sub_image = new_image[new_region]; + if (GenerateExecutableElement(match.exe_type(), old_sub_image, + new_sub_image, &patch_element)) { + covered_new_regions.push_back(new_region); + covered_new_bytes += new_region.size; + } else { + LOG(INFO) << "Fall back to raw patching."; + patch_element_map.erase(it_and_success.first); + } + } + + if (covered_new_bytes == 0) + patch_type = PatchType::kRawPatch; + + if (covered_new_bytes < new_image.size()) { + // Process all "gaps", which are patched against the entire "old" image. To + // compute equivalence maps, "gaps" share a common suffix array + // |old_sa_raw|, whose lifetime is kept separated from elements' suffix + // arrays to reduce peak memory. + Element entire_old_element(old_image.local_region(), kExeTypeNoOp); + ImageIndex old_image_index(old_image); + EncodedView old_view_raw(old_image_index); + std::vector<offset_t> old_sa_raw = + MakeSuffixArray<InducedSuffixSort>(old_view_raw, size_t(256)); + + offset_t gap_lo = 0; + // Add sentinel that points to end of "new" file, to simplify gap iteration. + covered_new_regions.emplace_back(BufferRegion{new_image.size(), 0}); + + for (const BufferRegion& covered : covered_new_regions) { + offset_t gap_hi = base::checked_cast<offset_t>(covered.lo()); + DCHECK_GE(gap_hi, gap_lo); + offset_t gap_size = gap_hi - gap_lo; + if (gap_size > 0) { + LOG(INFO) << "--- Gap [" << gap_lo << "," << gap_hi << ")"; + + ElementMatch gap_match{{entire_old_element, kExeTypeNoOp}, + {{gap_lo, gap_size}, kExeTypeNoOp}}; + auto it_and_success = patch_element_map.emplace(gap_lo, gap_match); + DCHECK(it_and_success.second); + PatchElementWriter& patch_element = it_and_success.first->second; + + ConstBufferView new_sub_image = new_image[{gap_lo, gap_size}]; + if (!GenerateRawElement(old_sa_raw, old_image, new_sub_image, + &patch_element)) { + return status::kStatusFatal; + } + } + gap_lo = base::checked_cast<offset_t>(covered.hi()); + } + } + + patch_writer->SetPatchType(patch_type); + // Write all PatchElementWriter sorted by "new" offset. + for (auto& new_lo_and_patch_element : patch_element_map) + patch_writer->AddElement(std::move(new_lo_and_patch_element.second)); + + return status::kStatusSuccess; +} + +status::Code GenerateRaw(ConstBufferView old_image, + ConstBufferView new_image, + EnsemblePatchWriter* patch_writer) { + patch_writer->SetPatchType(PatchType::kRawPatch); + + ImageIndex old_image_index(old_image); + EncodedView old_view(old_image_index); + std::vector<offset_t> old_sa = + MakeSuffixArray<InducedSuffixSort>(old_view, old_view.Cardinality()); + + PatchElementWriter patch_element( + {Element(old_image.local_region()), Element(new_image.local_region())}); + if (!GenerateRawElement(old_sa, old_image, new_image, &patch_element)) + return status::kStatusFatal; + patch_writer->AddElement(std::move(patch_element)); + return status::kStatusSuccess; +} + +} // namespace zucchini |