diff options
-rw-r--r-- | BUILD.gn | 7 | ||||
-rw-r--r-- | disassembler_ztf.cc | 647 | ||||
-rw-r--r-- | disassembler_ztf.h | 201 | ||||
-rw-r--r-- | disassembler_ztf_unittest.cc | 402 | ||||
-rw-r--r-- | element_detection.cc | 25 | ||||
-rw-r--r-- | image_utils.h | 2 | ||||
-rw-r--r-- | type_ztf.h | 52 |
7 files changed, 1335 insertions, 1 deletions
@@ -12,9 +12,13 @@ buildflag_header("buildflags") { # Disable DEX on Windows Official Builds. _enable_dex = !(is_win && is_official_build) _enable_win = true + + # Disable ZTF (Zucchini Text Format) on official builds it is for testing only. + _enable_ztf = !is_official_build flags = [ "ENABLE_DEX=$_enable_dex", "ENABLE_WIN=$_enable_win", + "ENABLE_ZTF=$_enable_ztf", ] } @@ -42,6 +46,8 @@ static_library("zucchini_lib") { "disassembler_no_op.h", "disassembler_win32.cc", "disassembler_win32.h", + "disassembler_ztf.cc", + "disassembler_ztf.h", "element_detection.cc", "element_detection.h", "encoded_view.cc", @@ -150,6 +156,7 @@ test("zucchini_unittests") { "buffer_view_unittest.cc", "crc32_unittest.cc", "disassembler_dex_unittest.cc", + "disassembler_ztf_unittest.cc", "element_detection_unittest.cc", "encoded_view_unittest.cc", "equivalence_map_unittest.cc", diff --git a/disassembler_ztf.cc b/disassembler_ztf.cc new file mode 100644 index 0000000..f938d42 --- /dev/null +++ b/disassembler_ztf.cc @@ -0,0 +1,647 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_ztf.h" + +#include <algorithm> +#include <cmath> +#include <iterator> +#include <numeric> + +#include "base/logging.h" +#include "base/macros.h" +#include "base/numerics/checked_math.h" +#include "components/zucchini/algorithm.h" +#include "components/zucchini/buffer_source.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/io_utils.h" + +namespace zucchini { + +namespace { + +constexpr uint8_t kDelimiter = ','; + +constexpr int kHeaderMagicSize = 4; +constexpr int kFooterMagicSize = 5; +constexpr int kTotalMagicSize = kHeaderMagicSize + kFooterMagicSize; + +// Number of characters that aren't digits in each type of reference. +constexpr int kNumConstCharInAbs = 3; +constexpr int kNumConstCharInRel = 5; + +/******** ZtfConfig ********/ + +// For passing around metadata about the type of reference to match. +// - |digits_per_dim| is the length of the offset in lines/cols of a +// reference. +// - |open_char| is an ASCII character representing the opening char. +// - |close_char| is an ASCII character representing the closing char. +struct ZtfConfig { + uint8_t digits_per_dim; + uint8_t open_char; + uint8_t close_char; + + constexpr uint8_t abs_width() const { + return digits_per_dim * 2 + kNumConstCharInAbs; + } + + constexpr uint8_t rel_width() const { + return digits_per_dim * 2 + kNumConstCharInRel; + } + + uint8_t Width(ztf::LineCol /* lc */) const { return abs_width(); } + + uint8_t Width(ztf::DeltaLineCol /* dlc */) const { return rel_width(); } +}; + +// Creates a ZtfConfig for parsing or writing based on the desired |digits| and +// |pool|. +template <DisassemblerZtf::ReferencePool pool> +constexpr ZtfConfig MakeZtfConfig(uint8_t digits) { + switch (pool) { + case DisassemblerZtf::kAngles: + return ZtfConfig{digits, '<', '>'}; + case DisassemblerZtf::kBraces: + return ZtfConfig{digits, '{', '}'}; + case DisassemblerZtf::kBrackets: + return ZtfConfig{digits, '[', ']'}; + case DisassemblerZtf::kParentheses: + break; // Handled below. + } + return ZtfConfig{digits, '(', ')'}; +} + +/******** ZtfParser ********/ + +// ZtfParser is used to extract (absolute) LineCol and (relative) DeltaLineCol +// from a ZTF file, and contains various helpers for character, digits, and sign +// matching. +class ZtfParser { + public: + ZtfParser(offset_t hi, ConstBufferView image, ZtfConfig config) + : image_(image), hi_(hi), config_(config) { + DCHECK_LE(static_cast<size_t>(std::pow(10U, config_.digits_per_dim)), + ztf::kMaxDimValue); + } + + // Attempts to match an absolute reference at |offset|. If successful then + // assigns the result to |abs_lc| and returns true. Otherwise returns false. + // An absolute reference takes the form: + // <open><digits><delimiter><digits><close> + bool MatchAtOffset(offset_t offset, ztf::LineCol* abs_lc) { + if (hi_ < config_.abs_width() || offset > hi_ - config_.abs_width()) + return false; + offset_ = offset; + return MatchChar(config_.open_char) && MatchDigits(+1, &abs_lc->line) && + MatchChar(kDelimiter) && MatchDigits(+1, &abs_lc->col) && + MatchChar(config_.close_char); + } + + // Attempts to match an absolute reference at |offset|. If successful then + // assigns the result to |rel_lc| and returns true. Otherwise returns false. A + // relative reference takes the form: + // <open><sign><digits><delimiter><sign><digits><close> + bool MatchAtOffset(offset_t offset, ztf::DeltaLineCol* rel_dlc) { + if (hi_ < config_.rel_width() || offset > hi_ - config_.rel_width()) + return false; + offset_ = offset; + ztf::dim_t line_sign; + ztf::dim_t col_sign; + return MatchChar(config_.open_char) && MatchSign(&line_sign) && + MatchDigits(line_sign, &rel_dlc->line) && MatchChar(kDelimiter) && + MatchSign(&col_sign) && MatchDigits(col_sign, &rel_dlc->col) && + MatchChar(config_.close_char); + } + + private: + // The Match*() functions below can advance |offset_|, and return a bool to + // indicate success to allow chaining using &&. + + // Returns true if |character| is at location |offset_| in |image_| and + // increments |offset_|. + bool MatchChar(uint8_t character) { + return character == image_.read<uint8_t>(offset_++); + } + + // Looks for '+' or '-' at |offset_|. If found, stores +1 or -1 in |sign| and + // returns true. Otherwise returns false. + bool MatchSign(ztf::dim_t* sign) { + uint8_t val = image_.read<uint8_t>(offset_++); + if (val == static_cast<uint8_t>(ztf::SignChar::kMinus)) { + *sign = -1; + return true; + } + if (val == static_cast<uint8_t>(ztf::SignChar::kPlus)) { + *sign = 1; + return true; + } + return false; + } + + // Attempts to extract a number with the number of base 10 digits equal to + // |config_.digits_per_dim| from |image_| starting from |offset_|. Returns + // true and assigns the integer value to |value| if successful. + bool MatchDigits(ztf::dim_t sign, ztf::dim_t* value) { + ztf::dim_t output = 0; + for (int i = 0; i < config_.digits_per_dim; ++i) { + auto digit = image_.read<uint8_t>(offset_++); + if (digit >= '0' && digit < '0' + 10) + output = output * 10 + digit - '0'; + else + return false; + } + if (!output && sign < 0) // Disallow "-0", "-00", etc. + return false; + *value = sign * output; + return true; + } + + ConstBufferView image_; + const offset_t hi_; + const ZtfConfig config_; + offset_t offset_ = 0; + + DISALLOW_COPY_AND_ASSIGN(ZtfParser); +}; + +/******** ZtfWriter ********/ + +// ZtfWriter is used to write references to an image. This includes writing +// the enclosing characters around the reference. +class ZtfWriter { + public: + ZtfWriter(MutableBufferView image, ZtfConfig config) + : image_(image), + config_(config), + val_bound_( + static_cast<ztf::dim_t>(std::pow(10, config_.digits_per_dim))) {} + + // Write an absolute reference |abs_ref| at |offset|. Note that references + // that would overwrite a newline are skipped as this would invalidate all + // the other reference line numbers. + void Write(offset_t offset, ztf::LineCol abs_ref) { + offset_ = offset; + if (!SafeToWriteNumber(abs_ref.line) || !SafeToWriteNumber(abs_ref.col) || + !SafeToWriteData(offset_, offset_ + config_.abs_width())) { + return; + } + WriteChar(config_.open_char); + WriteNumber(abs_ref.line); + WriteChar(kDelimiter); + WriteNumber(abs_ref.col); + WriteChar(config_.close_char); + } + + // Write a relative reference |rel_ref| at |offset|. Note that references + // that would overwrite a newline are skipped as this would invalidate all + // the other reference line numbers. + void Write(offset_t offset, ztf::DeltaLineCol rel_ref) { + offset_ = offset; + if (!SafeToWriteNumber(rel_ref.line) || !SafeToWriteNumber(rel_ref.col) || + !SafeToWriteData(offset_, offset_ + config_.rel_width())) { + return; + } + WriteChar(config_.open_char); + WriteSign(rel_ref.line); + WriteNumber(rel_ref.line); + WriteChar(kDelimiter); + WriteSign(rel_ref.col); + WriteNumber(rel_ref.col); + WriteChar(config_.close_char); + } + + private: + // Returns whether it is safe to modify bytes in |[lo, hi)| in |image_| for + // Reference correction. Failure cases are: + // - Out-of-bound writes. + // - Overwriting '\n'. This is a ZTF special case since '\n' dictates file + // structure, and Reference correction should never mess with this. + bool SafeToWriteData(offset_t lo, offset_t hi) const { + DCHECK_LE(lo, hi); + // Out of bounds. + if (hi > image_.size()) + return false; + for (offset_t i = lo; i < hi; ++i) { + if (image_.read<uint8_t>(i) == '\n') + return false; + } + return true; + } + + // Checks whether it is safe to write a |val| based on + // |config_.digits_per_dim|. + bool SafeToWriteNumber(ztf::dim_t val) const { + return std::abs(val) < val_bound_; + } + + // The Write*() functions each advance |offset_| by a fixed distance. The + // caller should ensure there's enough space to write data. + + // Write |character| at |offset_| and increment |offset_|. + void WriteChar(uint8_t character) { image_.write(offset_++, character); } + + // Write the sign of |value| at |offset_| and increment |offset_|. + void WriteSign(ztf::dim_t value) { + image_.write(offset_++, + value >= 0 ? ztf::SignChar::kPlus : ztf::SignChar::kMinus); + } + + // Writes the absolute value of the number represented by |value| at |offset_| + // using zero padding to fill |config_.digits_per_dim|. + void WriteNumber(ztf::dim_t value) { + size_t size = config_.digits_per_dim + 1; + DCHECK_LE(size, kMaxDigitCount + 1); + char digits[kMaxDigitCount + 1]; // + 1 for terminator. + int len = + snprintf(digits, size, "%0*u", config_.digits_per_dim, std::abs(value)); + DCHECK_EQ(len, config_.digits_per_dim); + for (int i = 0; i < len; ++i) + image_.write(offset_++, digits[i]); + } + + MutableBufferView image_; + const ZtfConfig config_; + // Bound on numeric values, as limited by |config_.digits_per_dim|. + const ztf::dim_t val_bound_; + offset_t offset_ = 0; + DISALLOW_COPY_AND_ASSIGN(ZtfWriter); +}; + +// Specialization of ReferenceReader for reading text references. +template <typename T> +class ZtfReferenceReader : public ReferenceReader { + public: + ZtfReferenceReader(offset_t lo, + offset_t hi, + ConstBufferView image, + const ZtfTranslator& translator, + ZtfConfig config) + : offset_(lo), + hi_(hi), + translator_(translator), + config_(config), + parser_(hi_, image, config_) { + DCHECK_LE(hi_, image.size()); + } + + // Walks |offset_| from |lo| to |hi_| running |parser_|. If any matches are + // found they are returned. + base::Optional<Reference> GetNext() override { + T line_col; + for (; offset_ < hi_; ++offset_) { + if (!parser_.MatchAtOffset(offset_, &line_col)) + continue; + + auto target = ConvertToTargetOffset(offset_, line_col); + // Ignore targets that point outside the file. + if (target == kInvalidOffset) + continue; + offset_t location = offset_; + offset_ += config_.Width(line_col); + return Reference{location, target}; + } + return base::nullopt; + } + + private: + // Converts |lc| (an absolute reference) to an offset using |translator_|. + offset_t ConvertToTargetOffset(offset_t /* location */, + ztf::LineCol lc) const { + return translator_.LineColToOffset(lc); + } + + // Converts |dlc| (a relative reference) to an offset using |translator_|. + // This requires converting the |dlc| to a ztf::LineCol to find the offset. + offset_t ConvertToTargetOffset(offset_t location, + ztf::DeltaLineCol dlc) const { + auto lc = translator_.OffsetToLineCol(location); + if (!lc.has_value()) + return kInvalidOffset; + return translator_.LineColToOffset(lc.value() + dlc); + } + + offset_t offset_; + const offset_t hi_; + const ZtfTranslator& translator_; + const ZtfConfig config_; + ZtfParser parser_; +}; + +// Specialization of ReferenceWriter for writing text references. +template <typename T> +class ZtfReferenceWriter : public ReferenceWriter { + public: + ZtfReferenceWriter(MutableBufferView image, + const ZtfTranslator& translator, + ZtfConfig config) + : translator_(translator), writer_(image, config) {} + + void PutNext(Reference reference) override { + T line_col; + if (!ConvertToTargetLineCol(reference, &line_col)) + return; + + writer_.Write(reference.location, line_col); + } + + private: + // Converts |reference| to an absolute reference to be stored in |out_lc|. + // Returns true on success. + bool ConvertToTargetLineCol(Reference reference, ztf::LineCol* out_lc) { + auto temp_lc = translator_.OffsetToLineCol(reference.target); + if (!temp_lc.has_value() && translator_.IsValid(temp_lc.value())) + return false; + + *out_lc = temp_lc.value(); + return true; + } + + // Converts |reference| to a relative reference to be stored in |out_dlc|. + // Will return true on success. + bool ConvertToTargetLineCol(Reference reference, ztf::DeltaLineCol* out_dlc) { + auto location_lc = translator_.OffsetToLineCol(reference.location); + if (!location_lc.has_value()) + return false; + + auto target_lc = translator_.OffsetToLineCol(reference.target); + if (!target_lc.has_value()) + return false; + + *out_dlc = target_lc.value() - location_lc.value(); + return translator_.IsValid(reference.location, *out_dlc); + } + + const ZtfTranslator& translator_; + ZtfWriter writer_; +}; + +// Reads a text header to check for the magic string "ZTxt" at the start +// indicating the file should be treated as a Zucchini text file. +bool ReadZtfHeader(ConstBufferView image) { + BufferSource source(image); + // Reject empty images and "ZTxtxTZ\n" (missing 't'). + if (source.size() < kTotalMagicSize) + return false; + return source.CheckNextBytes({'Z', 'T', 'x', 't'}); +} + +} // namespace + +/******** ZtfTranslator ********/ + +ZtfTranslator::ZtfTranslator() {} + +ZtfTranslator::~ZtfTranslator() = default; + +bool ZtfTranslator::Init(ConstBufferView image) { + line_starts_.clear(); + // Record the starting offset of every line in |image_| into |line_start_|. + line_starts_.push_back(0); + for (size_t i = 0; i < image.size(); ++i) { + if (image.read<uint8_t>(i) == '\n') { + // Maximum number of entries is |ztf::kMaxDimValue|, including the end + // sentinel. + if (line_starts_.size() >= ztf::kMaxDimValue) + return false; + line_starts_.push_back(i + 1); + // Check that the line length is reachable from an absolute reference. + if (line_starts_.back() - *std::next(line_starts_.rbegin()) >= + ztf::kMaxDimValue) { + return false; + } + } + } + // Since the last character of ZTF file is always '\n', |line_starts_| will + // always contain the file length as the last element, which serves as a + // sentinel. + CHECK_EQ(image.size(), static_cast<size_t>(line_starts_.back())); + return true; +} + +bool ZtfTranslator::IsValid(ztf::LineCol lc) const { + DCHECK(!line_starts_.empty()); + return lc.line >= 1 && lc.col >= 1 && + static_cast<offset_t>(lc.line) <= NumLines() && + static_cast<offset_t>(lc.col) <= LineLength(lc.line); +} + +bool ZtfTranslator::IsValid(offset_t offset, ztf::DeltaLineCol dlc) const { + DCHECK(!line_starts_.empty()); + auto abs_lc = OffsetToLineCol(offset); + if (!abs_lc.has_value()) + return false; + + if (!base::CheckAdd(abs_lc->line, dlc.line).IsValid() || + !base::CheckAdd(abs_lc->col, dlc.col).IsValid()) { + return false; + } + return IsValid(abs_lc.value() + dlc); +} + +offset_t ZtfTranslator::LineColToOffset(ztf::LineCol lc) const { + // Guard against out of bounds access to |line_starts_| and ensure the + // |lc| falls within the file. + DCHECK(!line_starts_.empty()); + if (!IsValid(lc)) + return kInvalidOffset; + + offset_t target = line_starts_[lc.line - 1] + lc.col - 1; + DCHECK_LT(target, line_starts_.back()); + return target; +} + +base::Optional<ztf::LineCol> ZtfTranslator::OffsetToLineCol( + offset_t offset) const { + DCHECK(!line_starts_.empty()); + // Don't place a target outside the image. + if (offset >= line_starts_.back()) + return base::nullopt; + auto it = SearchForRange(offset); + ztf::LineCol lc; + lc.line = std::distance(line_starts_.cbegin(), it) + 1; + lc.col = offset - line_starts_[lc.line - 1] + 1; + DCHECK_LE(static_cast<offset_t>(lc.col), LineLength(lc.line)); + return lc; +} + +std::vector<offset_t>::const_iterator ZtfTranslator::SearchForRange( + offset_t offset) const { + DCHECK(!line_starts_.empty()); + auto it = + std::upper_bound(line_starts_.cbegin(), line_starts_.cend(), offset); + DCHECK(it != line_starts_.cbegin()); + return --it; +} + +offset_t ZtfTranslator::LineLength(uint16_t line) const { + DCHECK_GE(line, 1); + DCHECK_LE(line, NumLines()); + return line_starts_[line] - line_starts_[line - 1]; +} + +/******** DisassemblerZtf ********/ + +// Use 2 even though reference "chaining" isn't present in ZTF as it is the +// usual case for other Disassemblers and this is meant to mimic that as closely +// as possible. +DisassemblerZtf::DisassemblerZtf() : Disassembler(2) {} + +DisassemblerZtf::~DisassemblerZtf() = default; + +// static. +bool DisassemblerZtf::QuickDetect(ConstBufferView image) { + return ReadZtfHeader(image); +} + +ExecutableType DisassemblerZtf::GetExeType() const { + return kExeTypeZtf; +} + +std::string DisassemblerZtf::GetExeTypeString() const { + return "Zucchini Text Format"; +} + +std::vector<ReferenceGroup> DisassemblerZtf::MakeReferenceGroups() const { + return { + {{5, TypeTag(kAnglesAbs1), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadAbs<1, kAngles>, + &DisassemblerZtf::MakeWriteAbs<1, kAngles>}, + {{7, TypeTag(kAnglesAbs2), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadAbs<2, kAngles>, + &DisassemblerZtf::MakeWriteAbs<2, kAngles>}, + {{9, TypeTag(kAnglesAbs3), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadAbs<3, kAngles>, + &DisassemblerZtf::MakeWriteAbs<3, kAngles>}, + {{7, TypeTag(kAnglesRel1), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadRel<1, kAngles>, + &DisassemblerZtf::MakeWriteRel<1, kAngles>}, + {{9, TypeTag(kAnglesRel2), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadRel<2, kAngles>, + &DisassemblerZtf::MakeWriteRel<2, kAngles>}, + {{11, TypeTag(kAnglesRel3), PoolTag(kAngles)}, + &DisassemblerZtf::MakeReadRel<3, kAngles>, + &DisassemblerZtf::MakeWriteRel<3, kAngles>}, + {{5, TypeTag(kBracesAbs1), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadAbs<1, kBraces>, + &DisassemblerZtf::MakeWriteAbs<1, kBraces>}, + {{7, TypeTag(kBracesAbs2), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadAbs<2, kBraces>, + &DisassemblerZtf::MakeWriteAbs<2, kBraces>}, + {{9, TypeTag(kBracesAbs3), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadAbs<3, kBraces>, + &DisassemblerZtf::MakeWriteAbs<3, kBraces>}, + {{7, TypeTag(kBracesRel1), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadRel<1, kBraces>, + &DisassemblerZtf::MakeWriteRel<1, kBraces>}, + {{9, TypeTag(kBracesRel2), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadRel<2, kBraces>, + &DisassemblerZtf::MakeWriteRel<2, kBraces>}, + {{11, TypeTag(kBracesRel3), PoolTag(kBraces)}, + &DisassemblerZtf::MakeReadRel<3, kBraces>, + &DisassemblerZtf::MakeWriteRel<3, kBraces>}, + {{5, TypeTag(kBracketsAbs1), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadAbs<1, kBrackets>, + &DisassemblerZtf::MakeWriteAbs<1, kBrackets>}, + {{7, TypeTag(kBracketsAbs2), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadAbs<2, kBrackets>, + &DisassemblerZtf::MakeWriteAbs<2, kBrackets>}, + {{9, TypeTag(kBracketsAbs3), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadAbs<3, kBrackets>, + &DisassemblerZtf::MakeWriteAbs<3, kBrackets>}, + {{7, TypeTag(kBracketsRel1), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadRel<1, kBrackets>, + &DisassemblerZtf::MakeWriteRel<1, kBrackets>}, + {{9, TypeTag(kBracketsRel2), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadRel<2, kBrackets>, + &DisassemblerZtf::MakeWriteRel<2, kBrackets>}, + {{11, TypeTag(kBracketsRel3), PoolTag(kBrackets)}, + &DisassemblerZtf::MakeReadRel<3, kBrackets>, + &DisassemblerZtf::MakeWriteRel<3, kBrackets>}, + {{5, TypeTag(kParenthesesAbs1), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadAbs<1, kParentheses>, + &DisassemblerZtf::MakeWriteAbs<1, kParentheses>}, + {{7, TypeTag(kParenthesesAbs2), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadAbs<2, kParentheses>, + &DisassemblerZtf::MakeWriteAbs<2, kParentheses>}, + {{9, TypeTag(kParenthesesAbs3), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadAbs<3, kParentheses>, + &DisassemblerZtf::MakeWriteAbs<3, kParentheses>}, + {{7, TypeTag(kParenthesesRel1), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadRel<1, kParentheses>, + &DisassemblerZtf::MakeWriteRel<1, kParentheses>}, + {{9, TypeTag(kParenthesesRel2), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadRel<2, kParentheses>, + &DisassemblerZtf::MakeWriteRel<2, kParentheses>}, + {{11, TypeTag(kParenthesesRel3), PoolTag(kParentheses)}, + &DisassemblerZtf::MakeReadRel<3, kParentheses>, + &DisassemblerZtf::MakeWriteRel<3, kParentheses>}, + }; +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceReader> DisassemblerZtf::MakeReadAbs(offset_t lo, + offset_t hi) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceReader<ztf::LineCol>>( + lo, hi, image_, translator_, MakeZtfConfig<pool>(digits)); +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceReader> DisassemblerZtf::MakeReadRel(offset_t lo, + offset_t hi) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceReader<ztf::DeltaLineCol>>( + lo, hi, image_, translator_, MakeZtfConfig<pool>(digits)); +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceWriter> DisassemblerZtf::MakeWriteAbs( + MutableBufferView image) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceWriter<ztf::LineCol>>( + image, translator_, MakeZtfConfig<pool>(digits)); +} + +template <uint8_t digits, DisassemblerZtf::ReferencePool pool> +std::unique_ptr<ReferenceWriter> DisassemblerZtf::MakeWriteRel( + MutableBufferView image) { + static_assert(digits >= 1 && digits <= kMaxDigitCount, + "|digits| must be in range [1, 3]"); + return std::make_unique<ZtfReferenceWriter<ztf::DeltaLineCol>>( + image, translator_, MakeZtfConfig<pool>(digits)); +} + +bool DisassemblerZtf::Parse(ConstBufferView image) { + image_ = image; + if (!ReadZtfHeader(image_)) + return false; + + CHECK_GE(image_.size(), + static_cast<size_t>(kTotalMagicSize)); // Needs header and footer. + + // Find the terminating footer "txTZ\n" that indicates the end of the image. + offset_t offset = 0; + for (; offset <= image_.size() - kFooterMagicSize; offset++) { + if (image_.read<uint8_t>(offset) == 't' && + image_.read<uint8_t>(offset + 1) == 'x' && + image_.read<uint8_t>(offset + 2) == 'T' && + image_.read<uint8_t>(offset + 3) == 'Z' && + image_.read<uint8_t>(offset + 4) == '\n') { + break; + } + } + + // If no footer is found before the end of the image then the parsing failed. + if (offset > image_.size() - kFooterMagicSize) + return false; + image_.shrink(offset + kFooterMagicSize); + + return translator_.Init(image_); +} + +} // namespace zucchini diff --git a/disassembler_ztf.h b/disassembler_ztf.h new file mode 100644 index 0000000..0719093 --- /dev/null +++ b/disassembler_ztf.h @@ -0,0 +1,201 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_ +#define COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_ + +#include <stdint.h> +#include <stdlib.h> + +#include <memory> +#include <string> +#include <vector> + +#include "base/macros.h" +#include "base/optional.h" +#include "components/zucchini/disassembler.h" +#include "components/zucchini/image_utils.h" +#include "components/zucchini/type_ztf.h" + +namespace zucchini { + +// Disassembler for text based files. This file format is supported for +// debugging Zucchini and is not intended for production usage. +// +// A valid Zucchini Text Format (ZTF) file is specified as follows: +// +// Header: +// The first four bytes must be - 'Z' 'T' 'x' 't' +// Footer: +// The last five bytes must be - 't' 'x' 'T' 'Z' '\n' +// (note that terminating new line is required). +// Content: +// The content can be any sequence of printable ASCII characters and new line +// (but not carriage return). This excludes the sequence that comprises the +// Footer. +// References: +// A reference is either Absolute or Relative. All references must begin and +// end with a pair of enclosing characters <open>, <close>. The options are: +// - Angles: '<' and '>' +// - Braces: '{' and '}' +// - Brackets: '[' and ']' +// - Parentheses: '(' and ')' +// +// A reference contains three items: +// - A line number <line> +// - A delimiter ',' <delimiter> +// - A column number <col> +// <line> and <col> may contain 1-3 digits and both must contain the same +// number of digits. If a number is too short then it can be left-padded +// with '0'. +// +// For Absolute references, <line> and <col> are 1-based (i.e. positive) +// index of line and column numbers of a character in the ZTF. This follows +// standard convention for text editors. Note that "\n" is considered to be +// part of a preceding line. +// +// <open><line><delimiter><col><close> +// +// For Relative references, <line> and <col> are integer offsets deltas of the +// target's (absolute) line and column relative to the line and column of the +// reference's first byte (i.e. <open>). Relative references have <sign> ('+' +// or '-') before <line> and <col>. For the special case of "0", "00", etc., +// <sign> must be "+". +// +// <open><sign><line><delimiter><sign><col><close> +// +// If a reference points outside the target either in writing or reading it is +// considered invalid and ignored. Similarly if it overflows a line. i.e. if a +// line is 10 characters long and a references targets character 11 of that +// line it is rejected. Lines are delimited with '\n' which is counted toward +// the line length. +// +// If a reference is to be written that would overwrite a '\n' character it is +// ignored as this would break all other line values. + +enum : size_t { kMaxDigitCount = 3 }; + +// Helper class for translating among offset_t, ztf::LineCol and +// ztf::DeltaLineCol. +class ZtfTranslator { + public: + ZtfTranslator(); + ~ZtfTranslator(); + + // Initializes |line_starts_| with the contents of |image|. + bool Init(ConstBufferView image); + + // Checks if |lc| is a valid location in the file. + bool IsValid(ztf::LineCol lc) const; + + // Checks if |dlc| relative to |offset| is a valid location in the file. + bool IsValid(offset_t offset, ztf::DeltaLineCol dlc) const; + + // Returns the offset corresponding to |line_col| if it is valid. Otherwise + // returns |kInvalidOffset|. + offset_t LineColToOffset(ztf::LineCol line_col) const; + + // Returns the ztf::LineCol for an |offset| if it is valid. Otherwise returns + // base::nullopt. + base::Optional<ztf::LineCol> OffsetToLineCol(offset_t offset) const; + + private: + // Returns an iterator to the range containing |offset|. Which is represented + // by the starting offset. The next element will contain the upper bound of + // the range. + std::vector<offset_t>::const_iterator SearchForRange(offset_t offset) const; + + // Returns the length of a 1-indexed line. The caller is expected to check + // that the requested line exists. + offset_t LineLength(uint16_t line) const; + + offset_t NumLines() const { + return static_cast<offset_t>(line_starts_.size() - 1); + } + + // |line_starts_| is a sorted list of each line's starting offset, along with + // the image size as the sentinel; it looks like {0, ..., image.size}. + std::vector<offset_t> line_starts_; + DISALLOW_COPY_AND_ASSIGN(ZtfTranslator); +}; + +// Disassembler for Zucchini Text Format (ZTF). +class DisassemblerZtf : public Disassembler { + public: + // Target Pools + enum ReferencePool : uint8_t { + kAngles, // <> + kBraces, // {} + kBrackets, // [] + kParentheses // () + }; + + // Type breakdown. Should contain all permutations of ReferencePool, Abs|Rel + // and the possible number of digits (1-3). + enum ReferenceType : uint8_t { + kAnglesAbs1, + kAnglesAbs2, + kAnglesAbs3, + kAnglesRel1, + kAnglesRel2, + kAnglesRel3, + kBracesAbs1, + kBracesAbs2, + kBracesAbs3, + kBracesRel1, + kBracesRel2, + kBracesRel3, + kBracketsAbs1, + kBracketsAbs2, + kBracketsAbs3, + kBracketsRel1, + kBracketsRel2, + kBracketsRel3, + kParenthesesAbs1, + kParenthesesAbs2, + kParenthesesAbs3, + kParenthesesRel1, + kParenthesesRel2, + kParenthesesRel3, + kNumTypes + }; + + DisassemblerZtf(); + ~DisassemblerZtf() override; + + // Applies quick checks to determine if |image| *may* point to the start of a + // ZTF file. Returns true on success. + static bool QuickDetect(ConstBufferView image); + + // Disassembler: + ExecutableType GetExeType() const override; + std::string GetExeTypeString() const override; + std::vector<ReferenceGroup> MakeReferenceGroups() const override; + + // Reference Readers, templated to allow configurable digit count and pool. + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceReader> MakeReadAbs(offset_t lo, offset_t hi); + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceReader> MakeReadRel(offset_t lo, offset_t hi); + + // Reference Writers, templated to allow configurable digit count and pool. + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceWriter> MakeWriteAbs(MutableBufferView image); + template <uint8_t digits, ReferencePool pool> + std::unique_ptr<ReferenceWriter> MakeWriteRel(MutableBufferView image); + + private: + friend Disassembler; + + // Disassembler: + bool Parse(ConstBufferView image) override; + + ZtfTranslator translator_; + + DISALLOW_COPY_AND_ASSIGN(DisassemblerZtf); +}; + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_DISASSEMBLER_ZTF_H_ diff --git a/disassembler_ztf_unittest.cc b/disassembler_ztf_unittest.cc new file mode 100644 index 0000000..1e71359 --- /dev/null +++ b/disassembler_ztf_unittest.cc @@ -0,0 +1,402 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/zucchini/disassembler_ztf.h" + +#include <stddef.h> +#include <stdint.h> + +#include <algorithm> +#include <map> +#include <set> +#include <utility> +#include <vector> + +#include "base/logging.h" +#include "base/stl_util.h" +#include "base/strings/string_piece.h" +#include "components/zucchini/buffer_view.h" +#include "components/zucchini/element_detection.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace zucchini { + +namespace { + +constexpr char kNormalText[] = R"(ZTxt +Hello World! +This is an example of an absolute reference <<1,1>> +And {-01,+05} is an example of a relative ref +txTZ +TRAILING DATA)"; +// -1 to exclude null byte. +constexpr size_t kNormalTextExtraBytes = base::size("TRAILING DATA") - 1; + +constexpr char kOutOfBoundsText[] = R"(ZTxt<1,1> +Hello World! +This is an example of an OOB absolute reference <890,605> +And {-050,+100} is an example of an OOB relative ref. +but [+00,+10] is valid at least. As is (1,5). +<1, 6> and { ,1} aren't nor is {4,5] +{7,6}<1,1><2,3>{+00,+00}{004,100}[+00,+60][+000,-100]<-000,-035>(-00,-00)txTZ +)"; + +// Converts a raw string into data. +std::vector<uint8_t> StrToData(base::StringPiece s) { + return std::vector<uint8_t>(s.begin(), s.end()); +} + +// Compare if |a.location < b.location| as references have unique locations. +struct ReferenceCompare { + bool operator()(const Reference& a, const Reference& b) const { + return a.location < b.location; + } +}; + +using ReferenceKey = + std::pair<DisassemblerZtf::ReferencePool, DisassemblerZtf::ReferenceType>; +using ReferenceSets = + std::map<ReferenceKey, std::set<Reference, ReferenceCompare>>; + +// Write references in |refs_to_write| to |image|. Also validate the +// disassembler parses |image| such that it is of |expected_size|. +void WriteReferences(MutableBufferView image, + size_t expected_size, + const ReferenceSets& refs_to_write) { + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + std::unique_ptr<DisassemblerZtf> dis = + Disassembler::Make<DisassemblerZtf>(image); + EXPECT_TRUE(dis); + EXPECT_EQ(expected_size, dis->size()); + image.shrink(dis->size()); + auto reference_groups = dis->MakeReferenceGroups(); + for (const auto& group : reference_groups) { + auto writer = group.GetWriter(image, dis.get()); + ReferenceKey key = { + static_cast<DisassemblerZtf::ReferencePool>(group.pool_tag().value()), + static_cast<DisassemblerZtf::ReferenceType>(group.type_tag().value())}; + if (!refs_to_write.count(key)) + continue; + for (const auto& ref : refs_to_write.at(key)) + writer->PutNext(ref); + } +} + +// Read references in |refs_to_read| from |image|. Once found +// the elements are removed from |refs_to_read|. Also validate the +// disassembler parses |image| such that it is of |expected_size|. +void ReadReferences(ConstBufferView image, + size_t expected_size, + ReferenceSets* refs_to_read) { + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + std::unique_ptr<DisassemblerZtf> dis = + Disassembler::Make<DisassemblerZtf>(image); + EXPECT_TRUE(dis); + EXPECT_EQ(expected_size, dis->size()); + auto reference_groups = dis->MakeReferenceGroups(); + for (const auto& group : reference_groups) { + auto reader = group.GetReader(dis.get()); + ReferenceKey key = { + static_cast<DisassemblerZtf::ReferencePool>(group.pool_tag().value()), + static_cast<DisassemblerZtf::ReferenceType>(group.type_tag().value())}; + if (!refs_to_read->count(key)) { + // No elements of this pool/type pair are expected so assert that none are + // found. + auto ref = reader->GetNext(); + EXPECT_FALSE(ref.has_value()); + continue; + } + // For each reference remove it from the set if it exists, error if + // unexpected references are found. + for (auto ref = reader->GetNext(); ref.has_value(); + ref = reader->GetNext()) { + EXPECT_EQ(1UL, refs_to_read->at(key).erase(ref.value())); + } + EXPECT_EQ(0U, refs_to_read->at(key).size()); + } +} + +void TestTranslation(const ZtfTranslator& translator, + offset_t expected_location, + ztf::LineCol lc) { + // Check the lc is translated to the expected location. + EXPECT_EQ(expected_location, translator.LineColToOffset(lc)); + auto new_lc = translator.OffsetToLineCol(expected_location); + if (expected_location == kInvalidOffset) { + EXPECT_FALSE(translator.IsValid(lc)); + EXPECT_FALSE(new_lc.has_value()); + } else { + EXPECT_TRUE(translator.IsValid(lc)); + // Check that the reverse is true. |ztf::LineCol{0, 0}| is a sentinel and + // should never be valid. + EXPECT_EQ(lc.line, new_lc->line); + EXPECT_EQ(lc.col, new_lc->col); + } +} + +template <typename T> +size_t CountDistinct(const std::vector<T>& v) { + return std::set<T>(v.begin(), v.end()).size(); +} + +} // namespace + +TEST(ZtfTranslatorTest, Translate) { + ztf::dim_t kMaxVal = INT16_MAX; + ztf::dim_t kMinVal = INT16_MIN; + + const std::vector<uint8_t> text(StrToData(kOutOfBoundsText)); + ConstBufferView image(text.data(), text.size()); + ZtfTranslator translator; + EXPECT_TRUE(translator.Init(image)); + + // Absolute Translations: + + // Check a bunch of invalid locations. + TestTranslation(translator, kInvalidOffset, ztf::LineCol{50, 60}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 0}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, 0}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{0, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, -1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{-1, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{-1, -1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, kMaxVal}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{kMaxVal, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, kMinVal}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{kMinVal, 1}); + + // Check the start of the file. + TestTranslation(translator, 0, ztf::LineCol{1, 1}); + TestTranslation(translator, 1, ztf::LineCol{1, 2}); + + // Check the boundary around a newline. + TestTranslation(translator, 9, ztf::LineCol{1, 10}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{1, 11}); + TestTranslation(translator, 10, ztf::LineCol{2, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{2, 0}); + + // Check the end of the file. + TestTranslation(translator, kInvalidOffset, ztf::LineCol{8, 1}); + TestTranslation(translator, kInvalidOffset, ztf::LineCol{7, 79}); + // Need to subtract to account for the newline. + TestTranslation(translator, text.size() - 1, ztf::LineCol{7, 78}); + TestTranslation(translator, text.size() - 2, ztf::LineCol{7, 77}); + + // Delta Validity + // - Reminder! 0 -> 1:1 + + // Common possible edge cases. + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 0})); + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 1})); + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{1, 0})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{-1, -1})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{-1, 0})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, -1})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, -1})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, kMaxVal})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{kMaxVal, 0})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, kMinVal})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{kMinVal, 0})); + EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{0, kMaxVal})); + EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{kMaxVal, 0})); + EXPECT_FALSE(translator.IsValid(233, ztf::DeltaLineCol{kMaxVal, kMaxVal})); + + // Newline area. + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{0, 9})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{0, 10})); + EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{0, 1})); + EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{-1, 0})); + EXPECT_FALSE(translator.IsValid(9, ztf::DeltaLineCol{1, -10})); + EXPECT_TRUE(translator.IsValid(9, ztf::DeltaLineCol{1, -9})); + + // End of file. + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{7, 78})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{7, 77})); + EXPECT_FALSE(translator.IsValid(0, ztf::DeltaLineCol{6, 78})); + EXPECT_TRUE(translator.IsValid(0, ztf::DeltaLineCol{6, 77})); + EXPECT_FALSE(translator.IsValid(text.size() - 1, ztf::DeltaLineCol{0, 1})); + EXPECT_FALSE(translator.IsValid(text.size() - 1, ztf::DeltaLineCol{1, 0})); + EXPECT_TRUE(translator.IsValid(text.size() - 2, ztf::DeltaLineCol{0, 1})); + EXPECT_FALSE(translator.IsValid(text.size() - 2, ztf::DeltaLineCol{1, 0})); +} + +// Ensures that ReferenceGroups from DisassemblerZtf::MakeReferenceGroups() +// cover each non-sentinel element in ReferenceType in order, exactly once. Also +// ensures that the ReferenceType elements are grouped by ReferencePool, and +// listed in increasing order. +TEST(DisassemblerZtfTest, ReferenceGroups) { + std::vector<uint32_t> pool_list; + std::vector<uint32_t> type_list; + DisassemblerZtf dis; + for (ReferenceGroup group : dis.MakeReferenceGroups()) { + pool_list.push_back(static_cast<uint32_t>(group.pool_tag().value())); + type_list.push_back(static_cast<uint32_t>(group.type_tag().value())); + } + + // Check ReferenceByte coverage. + constexpr size_t kNumTypes = DisassemblerZtf::kNumTypes; + EXPECT_EQ(kNumTypes, type_list.size()); + EXPECT_EQ(kNumTypes, CountDistinct(type_list)); + EXPECT_TRUE(std::is_sorted(type_list.begin(), type_list.end())); + + // Check that ReferenceType elements are grouped by ReferencePool. Note that + // repeats can occur, and pools can be skipped. + EXPECT_TRUE(std::is_sorted(pool_list.begin(), pool_list.end())); +} + +TEST(DisassemblerZtfTest, BadMagic) { + // Test a case where there is no header so a disassembler cannot be created. + { + const std::vector<uint8_t> text(StrToData("foobarbaz bazbarfoo")); + ConstBufferView image(text.data(), text.size()); + EXPECT_FALSE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } + // Test a case where there is no footer so a disassembler cannot be created. + { + const std::vector<uint8_t> text(StrToData("ZTxtfoobarbaz bazbarfootxTZ")); + ConstBufferView image(text.data(), text.size()); + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } + // Test when the header is too short + { + const std::vector<uint8_t> text(StrToData("ZTxtxTZ\n")); + ConstBufferView image(text.data(), text.size()); + EXPECT_FALSE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } +} + +TEST(DisassemblerZtfTest, ZtfSizeBound) { + { + std::vector<uint8_t> text(StrToData("ZTxt")); + std::fill_n(std::back_inserter(text), ztf::kMaxDimValue - 2, '\n'); + text.insert(text.end(), {'t', 'x', 'T', 'Z', '\n'}); + ConstBufferView image(text.data(), text.size()); + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + EXPECT_TRUE(Disassembler::Make<DisassemblerZtf>(image)); + } + { + std::vector<uint8_t> text(StrToData("ZTxt")); + std::fill_n(std::back_inserter(text), ztf::kMaxDimValue - 1, '\n'); + text.insert(text.end(), {'t', 'x', 'T', 'Z', '\n'}); + ConstBufferView image(text.data(), text.size()); + EXPECT_TRUE(DisassemblerZtf::QuickDetect(image)); + EXPECT_FALSE(Disassembler::Make<DisassemblerZtf>(image)); + } +} + +// Try reading from a well formed source. +TEST(DisassemblerZtfTest, NormalRead) { + const std::vector<uint8_t> text(StrToData(kNormalText)); + ConstBufferView image(text.data(), text.size()); + ReferenceSets expected_map = { + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}, + {Reference({63, 0})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2}, + {Reference({74, 27})}}, + }; + ReadReferences(image, text.size() - kNormalTextExtraBytes, &expected_map); +} + +// Try writing to a well formed source and ensure that what is read back +// reflects what was written. +TEST(DisassemblerZtfTest, NormalWrite) { + std::vector<uint8_t> mutable_text(StrToData(kNormalText)); + MutableBufferView image(mutable_text.data(), mutable_text.size()); + ReferenceSets change_map = { + {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + {Reference({63, 71})}}, + {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel3}, + {Reference({74, 4})}}, + }; + WriteReferences(image, mutable_text.size() - kNormalTextExtraBytes, + change_map); + + // As a sanity check see if a disassembler can identify the same references. + ConstBufferView const_image(image); + ReadReferences(const_image, mutable_text.size() - kNormalTextExtraBytes, + &change_map); +} + +// Try reading from a source rife with errors. +TEST(DisassemblerZtfTest, ReadOutOfBoundsRefs) { + const std::vector<uint8_t> text(StrToData(kOutOfBoundsText)); + ConstBufferView image(text.data(), text.size()); + ReferenceSets expected_map = { + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}, + {Reference({4, 0}), Reference({223, 0}), Reference({228, 12})}}, + {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2}, + {Reference({139, 149})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesAbs1}, + {Reference({218, 223})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2}, + {Reference({233, 233})}}, + {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + {Reference({174, 4})}}, + }; + ReadReferences(image, text.size(), &expected_map); +} + +// Try writing to a source rife with errors (malformed references or ones that +// reference non-existent locations. Some of the values written are also bad. To +// validate check if the expected set of references are read back. +TEST(DisassemblerZtfTest, WriteOutOfBoundsRefs) { + // Replace |old_val| (provided for checking) with |new_val| in |set|. + auto update_set = [](Reference old_ref, Reference new_ref, + std::set<Reference, ReferenceCompare>* set) { + auto it = set->find(old_ref); + EXPECT_NE(it, set->cend()); + EXPECT_EQ(*it, old_ref); + set->erase(it); + set->insert(new_ref); + }; + + // Replace |old_val| (provided for checking) with |new_val| in the set which + // is the value corresponding to |key| in |map|. + auto update_map = + [update_set]( + ReferenceKey key, Reference old_ref, Reference new_ref, + std::map<ReferenceKey, std::set<Reference, ReferenceCompare>>* map) { + auto it = map->find(key); + EXPECT_NE(it, map->cend()); + update_set(old_ref, new_ref, &(it->second)); + }; + + std::vector<uint8_t> mutable_text(StrToData(kOutOfBoundsText)); + MutableBufferView image(mutable_text.data(), mutable_text.size()); + ReferenceSets change_map = { + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}, + {Reference({223, 15}), Reference({228, 13})}}, + {{DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs3}, + {Reference({4, 50})}}, // This should fail to write. + {{DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2}, + {Reference({139, mutable_text.size()})}}, // This should fail. + {{DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + {Reference({174, 21})}}, // This should fail. + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesAbs1}, + {Reference({218, 219})}}, + {{DisassemblerZtf::kBraces, DisassemblerZtf::kBracesRel2}, + {Reference({233, 174})}}, + }; + WriteReferences(image, mutable_text.size(), change_map); + + // As a sanity check see if a disassembler can identify the same references + // (excluding the invalid ones). + change_map.erase(change_map.find( + {DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs3})); + change_map.at({DisassemblerZtf::kAngles, DisassemblerZtf::kAnglesAbs1}) + .emplace(Reference{4, 0}); + update_map({DisassemblerZtf::kBrackets, DisassemblerZtf::kBracketsRel2}, + Reference({139, mutable_text.size()}), Reference({139, 149}), + &change_map); + update_map({DisassemblerZtf::kParentheses, DisassemblerZtf::kParenthesesAbs1}, + Reference({174, 21}), Reference({174, 4}), &change_map); + ConstBufferView const_image(image); + ReadReferences(const_image, mutable_text.size(), &change_map); +} + +} // namespace zucchini diff --git a/element_detection.cc b/element_detection.cc index a826f54..6b31f61 100644 --- a/element_detection.cc +++ b/element_detection.cc @@ -9,9 +9,19 @@ #include "base/logging.h" #include "components/zucchini/buildflags.h" #include "components/zucchini/disassembler.h" -#include "components/zucchini/disassembler_dex.h" #include "components/zucchini/disassembler_no_op.h" + +#if BUILDFLAG(ENABLE_DEX) +#include "components/zucchini/disassembler_dex.h" +#endif // BUILDFLAG(ENABLE_DEX) + +#if BUILDFLAG(ENABLE_WIN) #include "components/zucchini/disassembler_win32.h" +#endif // BUILDFLAG(ENABLE_WIN) + +#if BUILDFLAG(ENABLE_ZTF) +#include "components/zucchini/disassembler_ztf.h" +#endif // BUILDFLAG(ENABLE_ZTF) namespace zucchini { @@ -48,6 +58,15 @@ std::unique_ptr<Disassembler> MakeDisassemblerWithoutFallback( } #endif // BUILDFLAG(ENABLE_DEX) +#if BUILDFLAG(ENABLE_ZTF) + if (DisassemblerZtf::QuickDetect(image)) { + // This disallows very short examples like "ZTxtxtZ\n" in ensemble patching. + auto disasm = Disassembler::Make<DisassemblerZtf>(image); + if (disasm && disasm->size() >= kMinProgramSize) + return disasm; + } +#endif // BUILDFLAG(ENABLE_ZTF) + return nullptr; } @@ -64,6 +83,10 @@ std::unique_ptr<Disassembler> MakeDisassemblerOfType(ConstBufferView image, case kExeTypeDex: return Disassembler::Make<DisassemblerDex>(image); #endif // BUILDFLAG(ENABLE_DEX) +#if BUILDFLAG(ENABLE_ZTF) + case kExeTypeZtf: + return Disassembler::Make<DisassemblerZtf>(image); +#endif // BUILDFLAG(ENABLE_ZTF) case kExeTypeNoOp: return Disassembler::Make<DisassemblerNoOp>(image); default: diff --git a/image_utils.h b/image_utils.h index 9f561ba..9aba0a6 100644 --- a/image_utils.h +++ b/image_utils.h @@ -162,6 +162,7 @@ enum ExecutableType : uint32_t { kExeTypeElfArm32 = ExeTypeToUint32("EA32"), kExeTypeElfAArch64 = ExeTypeToUint32("EA64"), kExeTypeDex = ExeTypeToUint32("DEX "), + kExeTypeZtf = ExeTypeToUint32("ZTF "), }; constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) { @@ -174,6 +175,7 @@ constexpr ExecutableType CastToExecutableType(uint32_t possible_exe_type) { case kExeTypeElfArm32: // Falls through. case kExeTypeElfAArch64: // Falls through. case kExeTypeDex: // Falls through. + case kExeTypeZtf: // Falls through. case kExeTypeUnknown: return static_cast<ExecutableType>(possible_exe_type); default: diff --git a/type_ztf.h b/type_ztf.h new file mode 100644 index 0000000..42798b2 --- /dev/null +++ b/type_ztf.h @@ -0,0 +1,52 @@ +// Copyright 2018 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef COMPONENTS_ZUCCHINI_TYPE_ZTF_H_ +#define COMPONENTS_ZUCCHINI_TYPE_ZTF_H_ + +#include <stddef.h> +#include <stdint.h> + +namespace zucchini { + +namespace ztf { + +typedef int16_t dim_t; + +// A exclusive upper bound on number of lines and/or columns. Throughout the ZTF +// code a dimension (dim) refers to a block of 1-3 digits which contain a line +// or column number. +enum : size_t { kMaxDimValue = 1000 }; + +enum SignChar : uint8_t { + kMinus = '-', + kPlus = '+', +}; + +// Lines and columns are 1-based to follow the convention of most modern text +// editing software. |line| and |col| should be positive, but int16_t is used to +// limit ranges such that it matches DeltaLineCol. +struct LineCol { + dim_t line; + dim_t col; +}; + +struct DeltaLineCol { + dim_t line; + dim_t col; +}; + +constexpr DeltaLineCol operator-(const LineCol& lhs, const LineCol& rhs) { + return DeltaLineCol{lhs.line - rhs.line, lhs.col - rhs.col}; +} + +constexpr LineCol operator+(const LineCol& lhs, const DeltaLineCol& rhs) { + return LineCol{lhs.line + rhs.line, lhs.col + rhs.col}; +} + +} // namespace ztf + +} // namespace zucchini + +#endif // COMPONENTS_ZUCCHINI_TYPE_ZTF_H_ |