diff options
Diffstat (limited to 'test/benchmark_all_sizes.cc')
-rw-r--r-- | test/benchmark_all_sizes.cc | 19 |
1 files changed, 17 insertions, 2 deletions
diff --git a/test/benchmark_all_sizes.cc b/test/benchmark_all_sizes.cc index 16cc57c..527aad6 100644 --- a/test/benchmark_all_sizes.cc +++ b/test/benchmark_all_sizes.cc @@ -16,6 +16,10 @@ test/benchmark_all_sizes.cc -o /tmp/b -O3 --std=c++11 -fPIE -static \ #include "../public/gemmlowp.h" +#ifdef GEMMLOWP_PROFILING +#include "../profiling/profiler.h" +#endif + #if defined GEMMLOWP_ANDROID && defined GEMMLOWP_ARM_32 // Compilation workaround namespace std { @@ -122,10 +126,10 @@ float benchmark_8bit(int rows, int depth, int cols) { MakeZero(&rhs); MakeZero(&result); - typedef std::tuple<OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint, + typedef std::tuple<OutputStageQuantizeDownInt32ByFixedPoint, OutputStageSaturatingCastToUint8> Pipeline; - gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint + gemmlowp::OutputStageQuantizeDownInt32ByFixedPoint quantize_down_stage; quantize_down_stage.result_offset_after_shift = 128; quantize_down_stage.result_fixedpoint_multiplier = 1234567890; @@ -345,7 +349,18 @@ void run_benchmarks(std::map<Shape, float>* results) { int main() { std::map<Shape, float> results; + +#ifdef GEMMLOWP_PROFILING + gemmlowp::RegisterCurrentThreadForProfiling(); + gemmlowp::StartProfiling(); +#endif + run_benchmarks(&results); + +#ifdef GEMMLOWP_PROFILING + gemmlowp::FinishProfiling(); +#endif + printf("Using %d thread(s)\n", kNumThreads); printf("depth,rows,cols,latency(s),Gop/s\n"); for (const auto& result : results) { |