diff options
author | Christopher Ferris <cferris@google.com> | 2013-07-03 01:12:23 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2013-07-03 01:12:23 +0000 |
commit | a8f6f3218aabdd5a691679d06034dbb9a7812854 (patch) | |
tree | 3c3ab0ce3844b27c412f497a21460dbef3daf31c | |
parent | faec68b05e13ce31566feabd01fc23960a93aff1 (diff) | |
parent | 55e29061da35c13e47e49cfd6cac97ecc5de5e0c (diff) | |
download | extras-a8f6f3218aabdd5a691679d06034dbb9a7812854.tar.gz |
Merge "Add vldr vstr type benchmarking."
-rw-r--r-- | tests/memtest/bandwidth.cpp | 27 | ||||
-rw-r--r-- | tests/memtest/bandwidth.h | 185 | ||||
-rw-r--r-- | tests/memtest/memtest.cpp | 16 |
3 files changed, 193 insertions, 35 deletions
diff --git a/tests/memtest/bandwidth.cpp b/tests/memtest/bandwidth.cpp index 523cabe3..cf406e6c 100644 --- a/tests/memtest/bandwidth.cpp +++ b/tests/memtest/bandwidth.cpp @@ -101,8 +101,10 @@ BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) { bench = new CopyLdrdStrdBenchmark(); } else if (strcmp(name, "copy_ldmia_stmia") == 0) { bench = new CopyLdmiaStmiaBenchmark(); - } else if (strcmp(name, "copy_vld_vst") == 0) { - bench = new CopyVldVstBenchmark(); + } else if (strcmp(name, "copy_vld1_vst1") == 0) { + bench = new CopyVld1Vst1Benchmark(); + } else if (strcmp(name, "copy_vldr_vstr") == 0) { + bench = new CopyVldrVstrBenchmark(); } else if (strcmp(name, "copy_vldmia_vstmia") == 0) { bench = new CopyVldmiaVstmiaBenchmark(); } else if (strcmp(name, "memcpy") == 0) { @@ -111,8 +113,10 @@ BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) { bench = new WriteStrdBenchmark(); } else if (strcmp(name, "write_stmia") == 0) { bench = new WriteStmiaBenchmark(); - } else if (strcmp(name, "write_vst") == 0) { - bench = new WriteVstBenchmark(); + } else if (strcmp(name, "write_vst1") == 0) { + bench = new WriteVst1Benchmark(); + } else if (strcmp(name, "write_vstr") == 0) { + bench = new WriteVstrBenchmark(); } else if (strcmp(name, "write_vstmia") == 0) { bench = new WriteVstmiaBenchmark(); } else if (strcmp(name, "memset") == 0) { @@ -121,8 +125,10 @@ BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) { bench = new ReadLdrdBenchmark(); } else if (strcmp(name, "read_ldmia") == 0) { bench = new ReadLdmiaBenchmark(); - } else if (strcmp(name, "read_vld") == 0) { - bench = new ReadVldBenchmark(); + } else if (strcmp(name, "read_vld1") == 0) { + bench = new ReadVld1Benchmark(); + } else if (strcmp(name, "read_vldr") == 0) { + bench = new ReadVldrBenchmark(); } else if (strcmp(name, "read_vldmia") == 0) { bench = new ReadVldmiaBenchmark(); } else { @@ -418,7 +424,8 @@ int copy_bandwidth(int argc, char** argv) { std::vector<BandwidthBenchmark*> bench_objs; bench_objs.push_back(new CopyLdrdStrdBenchmark()); bench_objs.push_back(new CopyLdmiaStmiaBenchmark()); - bench_objs.push_back(new CopyVldVstBenchmark()); + bench_objs.push_back(new CopyVld1Vst1Benchmark()); + bench_objs.push_back(new CopyVldrVstrBenchmark()); bench_objs.push_back(new CopyVldmiaVstmiaBenchmark()); bench_objs.push_back(new MemcpyBenchmark()); @@ -432,7 +439,8 @@ int write_bandwidth(int argc, char** argv) { std::vector<BandwidthBenchmark*> bench_objs; bench_objs.push_back(new WriteStrdBenchmark()); bench_objs.push_back(new WriteStmiaBenchmark()); - bench_objs.push_back(new WriteVstBenchmark()); + bench_objs.push_back(new WriteVst1Benchmark()); + bench_objs.push_back(new WriteVstrBenchmark()); bench_objs.push_back(new WriteVstmiaBenchmark()); bench_objs.push_back(new MemsetBenchmark()); @@ -447,7 +455,8 @@ int read_bandwidth(int argc, char** argv) { std::vector<BandwidthBenchmark*> bench_objs; bench_objs.push_back(new ReadLdrdBenchmark()); bench_objs.push_back(new ReadLdmiaBenchmark()); - bench_objs.push_back(new ReadVldBenchmark()); + bench_objs.push_back(new ReadVld1Benchmark()); + bench_objs.push_back(new ReadVldrBenchmark()); bench_objs.push_back(new ReadVldmiaBenchmark()); if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) { diff --git a/tests/memtest/bandwidth.h b/tests/memtest/bandwidth.h index b890f809..a09d082d 100644 --- a/tests/memtest/bandwidth.h +++ b/tests/memtest/bandwidth.h @@ -141,7 +141,7 @@ public: memset(_dst, 0, _size); bench(1); if (memcmp(_src, _dst, _size) != 0) { - printf("Strings failed to compare after one loop.\n"); + printf("Buffers failed to compare after one loop.\n"); return false; } @@ -150,7 +150,7 @@ public: _num_loops = 2; bench(2); if (memcmp(_src, _dst, _size) != 0) { - printf("Strings failed to compare after two loops.\n"); + printf("Buffers failed to compare after two loops.\n"); return false; } @@ -257,17 +257,17 @@ protected: } }; -class CopyVldVstBenchmark : public CopyBandwidthBenchmark { +class CopyVld1Vst1Benchmark : public CopyBandwidthBenchmark { public: - CopyVldVstBenchmark() : CopyBandwidthBenchmark() { } - virtual ~CopyVldVstBenchmark() {} + CopyVld1Vst1Benchmark() : CopyBandwidthBenchmark() { } + virtual ~CopyVld1Vst1Benchmark() {} - const char *getName() { return "vld/vst"; } + const char *getName() { return "vld1/vst1"; } bool usesNeon() { return true; } protected: - // Copy using vld/vst instructions. + // Copy using vld1/vst1 instructions. void bench(size_t num_loops) { #if defined(__ARM_NEON__) asm volatile( @@ -300,6 +300,63 @@ protected: } }; +class CopyVldrVstrBenchmark : public CopyBandwidthBenchmark { +public: + CopyVldrVstrBenchmark() : CopyBandwidthBenchmark() { } + virtual ~CopyVldrVstrBenchmark() {} + + const char *getName() { return "vldr/vstr"; } + + bool usesNeon() { return true; } + +protected: + // Copy using vldr/vstr instructions. + void bench(size_t num_loops) { +#if defined(__ARM_NEON__) + asm volatile( + "stmfd sp!, {r0,r1,r2,r3,r4}\n" + + "mov r0, %0\n" + "mov r1, %1\n" + "mov r2, %2\n" + "mov r3, %3\n" + + "0:\n" + "mov r4, r2, lsr #6\n" + + "1:\n" + "vldr d0, [r0, #0]\n" + "subs r4, r4, #1\n" + "vldr d1, [r0, #8]\n" + "vstr d0, [r1, #0]\n" + "vldr d0, [r0, #16]\n" + "vstr d1, [r1, #8]\n" + "vldr d1, [r0, #24]\n" + "vstr d0, [r1, #16]\n" + "vldr d0, [r0, #32]\n" + "vstr d1, [r1, #24]\n" + "vldr d1, [r0, #40]\n" + "vstr d0, [r1, #32]\n" + "vldr d0, [r0, #48]\n" + "vstr d1, [r1, #40]\n" + "vldr d1, [r0, #56]\n" + "vstr d0, [r1, #48]\n" + "add r0, r0, #64\n" + "vstr d1, [r1, #56]\n" + "add r1, r1, #64\n" + "bgt 1b\n" + + "sub r0, r0, r2\n" + "sub r1, r1, r2\n" + "subs r3, r3, #1\n" + "bgt 0b\n" + + "ldmfd sp!, {r0,r1,r2,r3,r4}\n" + :: "r" (_src), "r" (_dst), "r" (_size), "r" (num_loops) : "r0", "r1", "r2", "r3"); +#endif + } +}; + class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark { public: CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { } @@ -310,7 +367,7 @@ public: bool usesNeon() { return true; } protected: - // Copy using vld/vst instructions. + // Copy using vldmia/vstmia instructions. void bench(size_t num_loops) { #if defined(__ARM_NEON__) asm volatile( @@ -406,7 +463,7 @@ public: bench(1); for (size_t i = 0; i < _size; i++) { if (_buffer[i] != 1) { - printf("Strings failed to compare after one loop.\n"); + printf("Buffer failed to compare after one loop.\n"); return false; } } @@ -415,7 +472,7 @@ public: bench(2); for (size_t i = 0; i < _size; i++) { if (_buffer[i] != 2) { - printf("Strings failed to compare after two loops.\n"); + printf("Buffer failed to compare after two loops.\n"); return false; } } @@ -513,12 +570,12 @@ protected: } }; -class WriteVstBenchmark : public WriteBandwidthBenchmark { +class WriteVst1Benchmark : public WriteBandwidthBenchmark { public: - WriteVstBenchmark() : WriteBandwidthBenchmark() { } - virtual ~WriteVstBenchmark() {} + WriteVst1Benchmark() : WriteBandwidthBenchmark() { } + virtual ~WriteVst1Benchmark() {} - const char *getName() { return "vst"; } + const char *getName() { return "vst1"; } bool usesNeon() { return true; } @@ -558,6 +615,55 @@ protected: } }; +class WriteVstrBenchmark : public WriteBandwidthBenchmark { +public: + WriteVstrBenchmark() : WriteBandwidthBenchmark() { } + virtual ~WriteVstrBenchmark() {} + + const char *getName() { return "vstr"; } + + bool usesNeon() { return true; } + +protected: + // Write a given value using vst. + void bench(size_t num_loops) { +#if defined(__ARM_NEON__) + asm volatile( + "stmfd sp!, {r0,r1,r2,r3,r4}\n" + + "mov r0, %0\n" + "mov r1, %1\n" + "mov r2, %2\n" + "mov r4, #0\n" + + "0:\n" + "mov r3, r1, lsr #5\n" + + "add r4, r4, #1\n" + "vdup.8 d0, r4\n" + "vmov d1, d0\n" + "vmov d2, d0\n" + "vmov d3, d0\n" + + "1:\n" + "vstr d0, [r0, #0]\n" + "subs r3, r3, #1\n" + "vstr d1, [r0, #8]\n" + "vstr d0, [r0, #16]\n" + "vstr d1, [r0, #24]\n" + "add r0, r0, #32\n" + "bgt 1b\n" + + "sub r0, r0, r1\n" + "subs r2, r2, #1\n" + "bgt 0b\n" + + "ldmfd sp!, {r0,r1,r2,r3,r4}\n" + :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); +#endif + } +}; + class WriteVstmiaBenchmark : public WriteBandwidthBenchmark { public: WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { } @@ -690,12 +796,12 @@ protected: } }; -class ReadVldBenchmark : public SingleBufferBandwidthBenchmark { +class ReadVld1Benchmark : public SingleBufferBandwidthBenchmark { public: - ReadVldBenchmark() : SingleBufferBandwidthBenchmark() { } - virtual ~ReadVldBenchmark() {} + ReadVld1Benchmark() : SingleBufferBandwidthBenchmark() { } + virtual ~ReadVld1Benchmark() {} - const char *getName() { return "vld"; } + const char *getName() { return "vld1"; } bool usesNeon() { return true; } @@ -728,6 +834,49 @@ protected: } }; +class ReadVldrBenchmark : public SingleBufferBandwidthBenchmark { +public: + ReadVldrBenchmark() : SingleBufferBandwidthBenchmark() { } + virtual ~ReadVldrBenchmark() {} + + const char *getName() { return "vldr"; } + + bool usesNeon() { return true; } + +protected: + // Write a given value using vst. + void bench(size_t num_loops) { +#if defined(__ARM_NEON__) + asm volatile( + "stmfd sp!, {r0,r1,r2,r3}\n" + + "mov r0, %0\n" + "mov r1, %1\n" + "mov r2, %2\n" + + "0:\n" + "mov r3, r1, lsr #5\n" + + "1:\n" + "vldr d0, [r0, #0]\n" + "subs r3, r3, #1\n" + "vldr d1, [r0, #8]\n" + "vldr d0, [r0, #16]\n" + "vldr d1, [r0, #24]\n" + "add r0, r0, #32\n" + "bgt 1b\n" + + "sub r0, r0, r1\n" + "subs r2, r2, #1\n" + "bgt 0b\n" + + "ldmfd sp!, {r0,r1,r2,r3}\n" + :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); +#endif + } +}; + + class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark { public: ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { } diff --git a/tests/memtest/memtest.cpp b/tests/memtest/memtest.cpp index 86ec9a41..7b097d39 100644 --- a/tests/memtest/memtest.cpp +++ b/tests/memtest/memtest.cpp @@ -43,15 +43,15 @@ static void usage(char* p) { " write_bandwidth [--size BYTES_TO_WRITE]\n" " read_bandwidth [--size BYTES_TO_COPY]\n" " per_core_bandwidth [--size BYTES]\n" - " --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld_vst |\n" - " copy_vldmia_vstmia | memcpy | write_strd | write_stmia |\n" - " write_vst | write_vstmia | memset | read_ldrd |\n" - " read_ldmia | read_vld | read_vldmia\n" + " --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld1_vst1 |\n" + " copy_vldr_vstr | copy_vldmia_vstmia | memcpy | write_strd |\n" + " write_stmia | write_vst1 | write_vstr | write_vstmia | memset |\n" + " read_ldrd | read_ldmia | read_vld1 | read_vldr | read_vldmia\n" " multithread_bandwidth [--size BYTES]\n" - " --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld_vst |\n" - " copy_vldmia_vstmia | memcpy | write_strd | write_stmia |\n" - " write_vst | write_vstmia | memset | read_ldrd |\n" - " read_ldmia | read_vld | read_vldmia\n" + " --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld1_vst1 |\n" + " copy_vldr_vstr | copy_vldmia_vstmia | memcpy | write_strd |\n" + " write_stmia | write_vst1 | write_vstr | write_vstmia | memset |\n" + " read_ldrd | read_ldmia | read_vld1 | read_vldr | read_vldmia\n" " --num_threads NUM_THREADS_TO_RUN\n" " malloc [fill]\n" " madvise\n" |