aboutsummaryrefslogtreecommitdiff
path: root/test/benchmark_all_sizes.cc
diff options
context:
space:
mode:
Diffstat (limited to 'test/benchmark_all_sizes.cc')
-rw-r--r--test/benchmark_all_sizes.cc19
1 files changed, 17 insertions, 2 deletions
diff --git a/test/benchmark_all_sizes.cc b/test/benchmark_all_sizes.cc
index 16cc57c..527aad6 100644
--- a/test/benchmark_all_sizes.cc
+++ b/test/benchmark_all_sizes.cc
@@ -16,6 +16,10 @@ test/benchmark_all_sizes.cc -o /tmp/b -O3 --std=c++11 -fPIE -static \
#include "../public/gemmlowp.h"
+#ifdef GEMMLOWP_PROFILING
+#include "../profiling/profiler.h"
+#endif
+
#if defined GEMMLOWP_ANDROID && defined GEMMLOWP_ARM_32
// Compilation workaround
namespace std {
@@ -122,10 +126,10 @@ float benchmark_8bit(int rows, int depth, int cols) {
MakeZero(&rhs);
MakeZero(&result);
- typedef std::tuple<OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint,
+ typedef std::tuple<OutputStageQuantizeDownInt32ByFixedPoint,
OutputStageSaturatingCastToUint8>
Pipeline;
- gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint
+ gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint
quantize_down_stage;
quantize_down_stage.result_offset_after_shift = 128;
quantize_down_stage.result_fixedpoint_multiplier = 1234567890;
@@ -345,7 +349,18 @@ void run_benchmarks(std::map<Shape, float>* results) {
int main() {
std::map<Shape, float> results;
+
+#ifdef GEMMLOWP_PROFILING
+ gemmlowp::RegisterCurrentThreadForProfiling();
+ gemmlowp::StartProfiling();
+#endif
+
run_benchmarks(&results);
+
+#ifdef GEMMLOWP_PROFILING
+ gemmlowp::FinishProfiling();
+#endif
+
printf("Using %d thread(s)\n", kNumThreads);
printf("depth,rows,cols,latency(s),Gop/s\n");
for (const auto& result : results) {