From 901a08a73484b0989530fb9fc7de0829d13d6757 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Tue, 31 Mar 2020 11:59:47 -0700 Subject: simpleperf: adjust based on opencsd change. Currently, simpleperf gets branch_addr from instruction decoder. But OpenCSD is changed to cache InstrRange elements. Thus when simpleperf gets an InstrRange element, the instruction decoder may point to an instruction executed later. Fix it by using the start_addr of the next InstrRange element as the branch_addr of the current InstrRange element. Also add a test. Also use recorded binary path in output to help test. Bug: 153039105 Test: run simpleperf manually so the inject result doesn't Test: change for a 10s system wide recording data. Test: run simpleperf_unit_test. Change-Id: I6fdf32d3bac18ed3762c944c282ec881d09395b4 Merged-In: I6fdf32d3bac18ed3762c944c282ec881d09395b4 (cherry picked from commit 2c294914c1eeb30754a8a2449795b296bd2176e9) --- simpleperf/ETMDecoder.cpp | 62 +++++++++++++++++++++++++++----- simpleperf/ETMDecoder.h | 1 + simpleperf/cmd_inject.cpp | 35 ++++++++++++------ simpleperf/cmd_inject_test.cpp | 5 +++ simpleperf/testdata/etm/perf_inject.data | 24 +++++++++++++ 5 files changed, 108 insertions(+), 19 deletions(-) create mode 100644 simpleperf/testdata/etm/perf_inject.data diff --git a/simpleperf/ETMDecoder.cpp b/simpleperf/ETMDecoder.cpp index 5c8d4570..136bbcdf 100644 --- a/simpleperf/ETMDecoder.cpp +++ b/simpleperf/ETMDecoder.cpp @@ -396,6 +396,12 @@ class DataDumper : public ElementCallback { // It decodes each ETMV4IPacket into TraceElements, and generates ETMInstrRanges from TraceElements. // Decoding each packet is slow, but ensures correctness. class InstrRangeParser : public ElementCallback { + private: + struct TraceData { + ETMInstrRange instr_range; + bool wait_for_branch_to_addr_fix = false; + }; + public: InstrRangeParser(MapLocator& map_locator, const ETMDecoder::CallbackFn& callback) : map_locator_(map_locator), callback_(callback) {} @@ -404,33 +410,64 @@ class InstrRangeParser : public ElementCallback { const OcsdTraceElement& elem, const ocsd_instr_info* next_instr) override { if (elem.getType() == OCSD_GEN_TRC_ELEM_INSTR_RANGE) { + TraceData& data = trace_data_[trace_id]; const MapEntry* map = map_locator_.FindMap(trace_id, elem.st_addr); if (map == nullptr) { + FlushData(data); return OCSD_RESP_CONT; } - instr_range_.dso = map->dso; - instr_range_.start_addr = map->GetVaddrInFile(elem.st_addr); - instr_range_.end_addr = map->GetVaddrInFile(elem.en_addr - elem.last_instr_sz); + uint64_t start_addr = map->GetVaddrInFile(elem.st_addr); + auto& instr_range = data.instr_range; + + if (data.wait_for_branch_to_addr_fix) { + // OpenCSD may cache a list of InstrRange elements, making it inaccurate to get branch to + // address from next_instr->branch_addr. So fix it by using the start address of the next + // InstrRange element. + instr_range.branch_to_addr = start_addr; + } + FlushData(data); + instr_range.dso = map->dso; + instr_range.start_addr = start_addr; + instr_range.end_addr = map->GetVaddrInFile(elem.en_addr - elem.last_instr_sz); bool end_with_branch = elem.last_i_type == OCSD_INSTR_BR || elem.last_i_type == OCSD_INSTR_BR_INDIRECT; bool branch_taken = end_with_branch && elem.last_instr_exec; if (elem.last_i_type == OCSD_INSTR_BR && branch_taken) { // It is based on the assumption that we only do immediate branch inside a binary, // which may not be true for all cases. TODO: http://b/151665001. - instr_range_.branch_to_addr = map->GetVaddrInFile(next_instr->branch_addr); + instr_range.branch_to_addr = map->GetVaddrInFile(next_instr->branch_addr); + data.wait_for_branch_to_addr_fix = true; } else { - instr_range_.branch_to_addr = 0; + instr_range.branch_to_addr = 0; } - instr_range_.branch_taken_count = branch_taken ? 1 : 0; - instr_range_.branch_not_taken_count = branch_taken ? 0 : 1; - callback_(instr_range_); + instr_range.branch_taken_count = branch_taken ? 1 : 0; + instr_range.branch_not_taken_count = branch_taken ? 0 : 1; + + } else if (elem.getType() == OCSD_GEN_TRC_ELEM_TRACE_ON) { + // According to the ETM Specification, the Trace On element indicates a discontinuity in the + // instruction trace stream. So it cuts the connection between instr ranges. + FlushData(trace_data_[trace_id]); } return OCSD_RESP_CONT; } + void FinishData() { + for (auto& pair : trace_data_) { + FlushData(pair.second); + } + } + private: + void FlushData(TraceData& data) { + if (data.instr_range.dso != nullptr) { + callback_(data.instr_range); + data.instr_range.dso = nullptr; + } + data.wait_for_branch_to_addr_fix = false; + } + MapLocator& map_locator_; - ETMInstrRange instr_range_; + std::unordered_map trace_data_; ETMDecoder::CallbackFn callback_; }; @@ -522,6 +559,13 @@ class ETMDecoderImpl : public ETMDecoder { return true; } + bool FinishData() override { + if (instr_range_parser_) { + instr_range_parser_->FinishData(); + } + return true; + } + private: void InstallMapLocator() { if (!map_locator_) { diff --git a/simpleperf/ETMDecoder.h b/simpleperf/ETMDecoder.h index 3ebbd7dd..b9493acd 100644 --- a/simpleperf/ETMDecoder.h +++ b/simpleperf/ETMDecoder.h @@ -61,6 +61,7 @@ class ETMDecoder { virtual void RegisterCallback(const CallbackFn& callback) = 0; virtual bool ProcessData(const uint8_t* data, size_t size) = 0; + virtual bool FinishData() = 0; }; } // namespace simpleperf \ No newline at end of file diff --git a/simpleperf/cmd_inject.cpp b/simpleperf/cmd_inject.cpp index 0a6719d8..ee27b0b8 100644 --- a/simpleperf/cmd_inject.cpp +++ b/simpleperf/cmd_inject.cpp @@ -80,6 +80,9 @@ class InjectCommand : public Command { if (!record_file_reader_->ReadDataSection([this](auto r) { return ProcessRecord(r.get()); })) { return false; } + if (etm_decoder_ && !etm_decoder_->FinishData()) { + return false; + } PostProcess(); output_fp_.reset(nullptr); return true; @@ -164,7 +167,7 @@ class InjectCommand : public Command { return; } - auto& binary = binary_map_[instr_range.dso->GetDebugFilePath()]; + auto& binary = binary_map_[instr_range.dso]; binary.range_count_map[AddrPair(instr_range.start_addr, instr_range.end_addr)] += instr_range.branch_taken_count + instr_range.branch_not_taken_count; if (instr_range.branch_taken_count > 0) { @@ -174,13 +177,23 @@ class InjectCommand : public Command { } void PostProcess() { - for (const auto& pair : binary_map_) { - const std::string& binary_path = pair.first; - const BinaryInfo& binary = pair.second; + // binary_map is used to store instruction ranges, which can have a large amount. And it has + // a larger access time (instruction ranges * executed time). So it's better to use + // unorder_maps to speed up access time. But we also want a stable output here, to compare + // output changes result from code changes. So generate a sorted output here. + std::vector dso_v; + for (auto& p : binary_map_) { + dso_v.emplace_back(p.first); + } + std::sort(dso_v.begin(), dso_v.end(), [](Dso* d1, Dso* d2) { return d1->Path() < d2->Path(); }); + for (auto dso : dso_v) { + const BinaryInfo& binary = binary_map_[dso]; // Write range_count_map. - fprintf(output_fp_.get(), "%zu\n", binary.range_count_map.size()); - for (const auto& pair2 : binary.range_count_map) { + std::map range_count_map(binary.range_count_map.begin(), + binary.range_count_map.end()); + fprintf(output_fp_.get(), "%zu\n", range_count_map.size()); + for (const auto& pair2 : range_count_map) { const AddrPair& addr_range = pair2.first; uint64_t count = pair2.second; @@ -192,8 +205,10 @@ class InjectCommand : public Command { fprintf(output_fp_.get(), "0\n"); // Write branch_count_map. - fprintf(output_fp_.get(), "%zu\n", binary.branch_count_map.size()); - for (const auto& pair2 : binary.branch_count_map) { + std::map branch_count_map(binary.branch_count_map.begin(), + binary.branch_count_map.end()); + fprintf(output_fp_.get(), "%zu\n", branch_count_map.size()); + for (const auto& pair2 : branch_count_map) { const AddrPair& branch = pair2.first; uint64_t count = pair2.second; @@ -202,7 +217,7 @@ class InjectCommand : public Command { } // Write the binary path in comment. - fprintf(output_fp_.get(), "// %s\n\n", binary_path.c_str()); + fprintf(output_fp_.get(), "// %s\n\n", dso->Path().c_str()); } } @@ -217,7 +232,7 @@ class InjectCommand : public Command { std::unique_ptr output_fp_; // Store results for AutoFDO. - std::unordered_map binary_map_; + std::unordered_map binary_map_; }; } // namespace diff --git a/simpleperf/cmd_inject_test.cpp b/simpleperf/cmd_inject_test.cpp index 0d66d6cd..1f057cb0 100644 --- a/simpleperf/cmd_inject_test.cpp +++ b/simpleperf/cmd_inject_test.cpp @@ -20,6 +20,7 @@ #include "command.h" #include "get_test_data.h" #include "test_util.h" +#include "utils.h" static std::unique_ptr InjectCmd() { return CreateCommandInstance("inject"); } @@ -31,6 +32,10 @@ TEST(cmd_inject, smoke) { ASSERT_TRUE(android::base::ReadFileToString(tmpfile.path, &data)); // Test that we can find instr range in etm_test_loop binary. ASSERT_NE(data.find("etm_test_loop"), std::string::npos); + std::string expected_data; + ASSERT_TRUE(android::base::ReadFileToString( + GetTestData(std::string("etm") + OS_PATH_SEPARATOR + "perf_inject.data"), &expected_data)); + ASSERT_EQ(data, expected_data); } TEST(cmd_inject, binary_option) { diff --git a/simpleperf/testdata/etm/perf_inject.data b/simpleperf/testdata/etm/perf_inject.data new file mode 100644 index 00000000..4c121a32 --- /dev/null +++ b/simpleperf/testdata/etm/perf_inject.data @@ -0,0 +1,24 @@ +10 +1000-1004:1 +100c-1050:1 +1054-106c:1 +105c-106c:100 +1070-1070:1 +1074-1080:100 +1084-1088:1 +108c-10a0:1 +10a4-10b0:1 +10e0-10ec:1 +0 +9 +1004->100c:1 +1050->10e0:1 +106c->1074:100 +1070->1084:1 +1080->105c:100 +1088->0:1 +10a0->1054:1 +10b0->0:1 +10ec->0:1 +// /data/local/tmp/etm_test_loop + -- cgit v1.2.3