diff options
author | Etienne Pierre-doray <etiennep@chromium.org> | 2021-04-16 18:20:33 +0000 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2021-07-25 21:11:19 -0700 |
commit | e1c6a71ae0b029b30d69960900d9bb1a1ad45945 (patch) | |
tree | d0a23cf895e7098b14d360fb8eead4982479f91f | |
parent | fc058e3b20ed8f45828c24d0da6f4bfcdcf737b1 (diff) | |
download | zucchini-e1c6a71ae0b029b30d69960900d9bb1a1ad45945.tar.gz |
[zucchini] Use deque to store rel32_locations.
An optimization to reduce zucchini peak memory footprint.
MakeReadRel32 bring peak memory unnecessarily high due to std::vector
allocation heuristic, along with shrink_to_fit which temporarily
forces additional allocation: in the order of 32MB+19MB while only
19MB was needed.
deque puts less memory pressure than vector since it doesn't have
contiguous requirement and shrink_to_fit can be done in-place.
deque
Zucchini.PeakPagefileUsage 738876 KiB
Zucchini.PeakPagefileUsageChange 717436 KiB
Zucchini.PeakWorkingSetSize 632284 KiB
Zucchini.PeakWorkingSetSizeChange 623464 KiB
Zucchini.TotalTime 9.40955 s
vector
Zucchini.PeakPagefileUsage 755252 KiB
Zucchini.PeakPagefileUsageChange 733820 KiB
Zucchini.PeakWorkingSetSize 632660 KiB
Zucchini.PeakWorkingSetSizeChange 624616 KiB
Zucchini.TotalTime 10.3224 s
Bug: 1194281
Change-Id: Ic5a7f529d4465241990dbe27a485a67b32ab44b1
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2830864
Commit-Queue: Etienne Pierre-Doray <etiennep@chromium.org>
Reviewed-by: Samuel Huang <huangs@chromium.org>
Cr-Commit-Position: refs/heads/master@{#873383}
NOKEYCHECK=True
GitOrigin-RevId: 52f908bb699f8d106a1c43e10257759cce90500d
-rw-r--r-- | disassembler_elf.h | 4 | ||||
-rw-r--r-- | disassembler_win32.h | 4 | ||||
-rw-r--r-- | rel32_utils.cc | 2 | ||||
-rw-r--r-- | rel32_utils.h | 14 | ||||
-rw-r--r-- | rel32_utils_unittest.cc | 13 |
5 files changed, 21 insertions, 16 deletions
diff --git a/disassembler_elf.h b/disassembler_elf.h index e279c29..60d524c 100644 --- a/disassembler_elf.h +++ b/disassembler_elf.h @@ -7,6 +7,7 @@ #include <stdint.h> +#include <deque> #include <memory> #include <string> #include <vector> @@ -178,7 +179,8 @@ class DisassemblerElfIntel : public DisassemblerElf<Traits> { private: // Sorted file offsets of rel32 locations. - std::vector<offset_t> rel32_locations_; + // Using std::deque to reduce peak memory footprint. + std::deque<offset_t> rel32_locations_; DISALLOW_COPY_AND_ASSIGN(DisassemblerElfIntel); }; diff --git a/disassembler_win32.h b/disassembler_win32.h index 6c7ba91..8f9fd58 100644 --- a/disassembler_win32.h +++ b/disassembler_win32.h @@ -8,6 +8,7 @@ #include <stddef.h> #include <stdint.h> +#include <deque> #include <memory> #include <string> #include <utility> @@ -108,7 +109,8 @@ class DisassemblerWin32 : public Disassembler { std::vector<offset_t> reloc_block_offsets_; offset_t reloc_end_ = 0; std::vector<offset_t> abs32_locations_; - std::vector<offset_t> rel32_locations_; + // Using std::deque to reduce peak memory footprint. + std::deque<offset_t> rel32_locations_; // Initialization states of reference storage, used for lazy initialization. // TODO(huangs): Investigate whether lazy initialization is useful for memory diff --git a/rel32_utils.cc b/rel32_utils.cc index 37bcb6f..e6d187d 100644 --- a/rel32_utils.cc +++ b/rel32_utils.cc @@ -16,7 +16,7 @@ namespace zucchini { Rel32ReaderX86::Rel32ReaderX86(ConstBufferView image, offset_t lo, offset_t hi, - const std::vector<offset_t>* locations, + const std::deque<offset_t>* locations, const AddressTranslator& translator) : image_(image), target_rva_to_offset_(translator), diff --git a/rel32_utils.h b/rel32_utils.h index 618ed0d..946fcc6 100644 --- a/rel32_utils.h +++ b/rel32_utils.h @@ -6,8 +6,8 @@ #define COMPONENTS_ZUCCHINI_REL32_UTILS_H_ #include <algorithm> +#include <deque> #include <memory> -#include <vector> #include "base/logging.h" #include "base/macros.h" @@ -32,7 +32,7 @@ class Rel32ReaderX86 : public ReferenceReader { Rel32ReaderX86(ConstBufferView image, offset_t lo, offset_t hi, - const std::vector<offset_t>* locations, + const std::deque<offset_t>* locations, const AddressTranslator& translator); ~Rel32ReaderX86() override; @@ -44,8 +44,8 @@ class Rel32ReaderX86 : public ReferenceReader { AddressTranslator::RvaToOffsetCache target_rva_to_offset_; AddressTranslator::OffsetToRvaCache location_offset_to_rva_; const offset_t hi_; - const std::vector<offset_t>::const_iterator last_; - std::vector<offset_t>::const_iterator current_; + const std::deque<offset_t>::const_iterator last_; + std::deque<offset_t>::const_iterator current_; DISALLOW_COPY_AND_ASSIGN(Rel32ReaderX86); }; @@ -79,7 +79,7 @@ class Rel32ReaderArm : public ReferenceReader { Rel32ReaderArm(const AddressTranslator& translator, ConstBufferView view, - const std::vector<offset_t>& rel32_locations, + const std::deque<offset_t>& rel32_locations, offset_t lo, offset_t hi) : view_(view), @@ -110,8 +110,8 @@ class Rel32ReaderArm : public ReferenceReader { ConstBufferView view_; AddressTranslator::OffsetToRvaCache offset_to_rva_; AddressTranslator::RvaToOffsetCache rva_to_offset_; - std::vector<offset_t>::const_iterator cur_it_; - std::vector<offset_t>::const_iterator rel32_end_; + std::deque<offset_t>::const_iterator cur_it_; + std::deque<offset_t>::const_iterator rel32_end_; offset_t hi_; DISALLOW_COPY_AND_ASSIGN(Rel32ReaderArm); diff --git a/rel32_utils_unittest.cc b/rel32_utils_unittest.cc index e3d34f4..3fdf5d6 100644 --- a/rel32_utils_unittest.cc +++ b/rel32_utils_unittest.cc @@ -6,6 +6,7 @@ #include <stdint.h> +#include <deque> #include <memory> #include <utility> #include <vector> @@ -88,7 +89,7 @@ TEST(Rel32UtilsTest, Rel32ReaderX86) { }; ConstBufferView buffer(bytes.data(), bytes.size()); // Specify rel32 locations directly, instead of parsing. - std::vector<offset_t> rel32_locations = {0x0008U, 0x0010U, 0x0018U, 0x001CU}; + std::deque<offset_t> rel32_locations = {0x0008U, 0x0010U, 0x0018U, 0x001CU}; // Generate everything. auto reader1 = std::make_unique<Rel32ReaderX86>(buffer, 0x0000U, 0x0020U, @@ -163,8 +164,8 @@ TEST(Rel32UtilsTest, Rel32ReaderArm_Arm32) { }; ConstBufferView region(&bytes[0], bytes.size()); // Specify rel32 locations directly, instead of parsing. - std::vector<offset_t> rel32_locations_A24 = {0x0008U, 0x0010U, 0x0018U, - 0x001CU}; + std::deque<offset_t> rel32_locations_A24 = {0x0008U, 0x0010U, 0x0018U, + 0x001CU}; // Generate everything. auto reader1 = @@ -427,21 +428,21 @@ TEST(Rel32UtilsTest, Rel32ReaderArm_AArch64) { MutableBufferView region(&bytes[0], bytes.size()); // Generate Immd26. We specify rel32 locations directly. - std::vector<offset_t> rel32_locations_Immd26 = {0x0008U}; + std::deque<offset_t> rel32_locations_Immd26 = {0x0008U}; auto reader1 = std::make_unique< Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd26>>( translator, region, rel32_locations_Immd26, 0x0000U, 0x0020U); CheckReader({{0x0008U, 0x0010U}}, std::move(reader1)); // Generate Immd19. - std::vector<offset_t> rel32_locations_Immd19 = {0x0010U, 0x0018U}; + std::deque<offset_t> rel32_locations_Immd19 = {0x0010U, 0x0018U}; auto reader2 = std::make_unique< Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd19>>( translator, region, rel32_locations_Immd19, 0x0000U, 0x0020U); CheckReader({{0x0010U, 0x0014U}, {0x0018U, 0x0010U}}, std::move(reader2)); // Generate Immd14. - std::vector<offset_t> rel32_locations_Immd14 = {0x001CU}; + std::deque<offset_t> rel32_locations_Immd14 = {0x001CU}; auto reader3 = std::make_unique< Rel32ReaderArm<AArch64Rel32Translator::AddrTraits_Immd14>>( translator, region, rel32_locations_Immd14, 0x0000U, 0x0020U); |