From 1a3794a84074d7f22b8ddaba840aedd758a14cdd Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Thu, 2 May 2013 15:12:11 -0700 Subject: Add read bandwidth and small refactor. - Addition of read bandwidth test. - Move the allocation of all buffers used for testing into the setSize() function. - Add a canRun function to indicate whether a test is able to run on this platform. Currently, this only disables neon bandwidth tests on platforms that don't support neon. - Refactor the thread testing functions to use one common routine. Change-Id: I4ebed7d20100f57e93fd10bbe9dbbb53b50cd4a6 --- tests/memtest/bandwidth.cpp | 166 +++++++++++++++----------- tests/memtest/bandwidth.h | 284 +++++++++++++++++++++++++++++++++++++++----- tests/memtest/memtest.cpp | 11 +- 3 files changed, 359 insertions(+), 102 deletions(-) (limited to 'tests') diff --git a/tests/memtest/bandwidth.cpp b/tests/memtest/bandwidth.cpp index d0f09108..0e0ec2e0 100644 --- a/tests/memtest/bandwidth.cpp +++ b/tests/memtest/bandwidth.cpp @@ -98,34 +98,48 @@ BandwidthBenchmark *createBandwidthBenchmarkObject(arg_t values) { size = values["size"].int_value; } if (strcmp(name, "copy_ldrd_strd") == 0) { - bench = new CopyLdrdStrdBenchmark(size); + bench = new CopyLdrdStrdBenchmark(); } else if (strcmp(name, "copy_ldmia_stmia") == 0) { - bench = new CopyLdmiaStmiaBenchmark(size); + bench = new CopyLdmiaStmiaBenchmark(); } else if (strcmp(name, "copy_vld_vst") == 0) { - bench = new CopyVldVstBenchmark(size); + bench = new CopyVldVstBenchmark(); } else if (strcmp(name, "copy_vldmia_vstmia") == 0) { - bench = new CopyVldmiaVstmiaBenchmark(size); + bench = new CopyVldmiaVstmiaBenchmark(); } else if (strcmp(name, "memcpy") == 0) { - bench = new MemcpyBenchmark(size); + bench = new MemcpyBenchmark(); } else if (strcmp(name, "write_strd") == 0) { - bench = new WriteStrdBenchmark(size); + bench = new WriteStrdBenchmark(); } else if (strcmp(name, "write_stmia") == 0) { - bench = new WriteStmiaBenchmark(size); + bench = new WriteStmiaBenchmark(); } else if (strcmp(name, "write_vst") == 0) { - bench = new WriteVstBenchmark(size); + bench = new WriteVstBenchmark(); } else if (strcmp(name, "write_vstmia") == 0) { - bench = new WriteVstmiaBenchmark(size); + bench = new WriteVstmiaBenchmark(); } else if (strcmp(name, "memset") == 0) { - bench = new MemsetBenchmark(size); + bench = new MemsetBenchmark(); + } else if (strcmp(name, "read_ldrd") == 0) { + bench = new ReadLdrdBenchmark(); + } else if (strcmp(name, "read_ldmia") == 0) { + bench = new ReadLdmiaBenchmark(); + } else if (strcmp(name, "read_vld") == 0) { + bench = new ReadVldBenchmark(); + } else if (strcmp(name, "read_vldmia") == 0) { + bench = new ReadVldmiaBenchmark(); + } else { + printf("Unknown type name %s\n", name); + return NULL; } - if (bench) { - if (values.count("num_warm_loops") > 0) { - bench->set_num_loops(values["num_warm_loops"].int_value); - } - if (values.count("num_loops") > 0) { - bench->set_num_loops(values["num_loops"].int_value); - } + if (!bench->setSize(values["size"].int_value)) { + printf("Failed to allocate buffers for benchmark.\n"); + return NULL; + } + + if (values.count("num_warm_loops") > 0) { + bench->set_num_loops(values["num_warm_loops"].int_value); + } + if (values.count("num_loops") > 0) { + bench->set_num_loops(values["num_loops"].int_value); } return bench; @@ -208,7 +222,6 @@ bool processThreadArgs(int argc, char** argv, option_t options[], BandwidthBenchmark *bench = createBandwidthBenchmarkObject(*values); if (!bench) { - printf("Unknown type %s\n", (*values)["type"].char_value); return false; } @@ -292,6 +305,9 @@ int per_core_bandwidth(int argc, char** argv) { it != cpu_list.end(); ++it, ++i) { args[i].core = *it; args[i].bench = createBandwidthBenchmarkObject(values); + if (!args[i].bench) { + return 0; + } } printf("Running on %d cores\n", cpu_list.size()); @@ -325,6 +341,9 @@ int multithread_bandwidth(int argc, char** argv) { for (int i = 0; i < num_threads; i++) { args[i].core = -1; args[i].bench = createBandwidthBenchmarkObject(values); + if (!args[i].bench) { + return 0; + } } printf("Running %d threads\n", num_threads); @@ -341,18 +360,20 @@ int multithread_bandwidth(int argc, char** argv) { return 0; } -int copy_bandwidth(int argc, char** argv) { +bool run_bandwidth_benchmark(int argc, char** argv, const char *name, + std::vector bench_objs) { arg_t values; values["size"].int_value = 0; - values["num_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_LOOPS; - values["num_warm_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_WARM_LOOPS; + values["num_warm_loops"].int_value = 0; + values["num_loops"].int_value = 0; if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) { return -1; } + size_t size = values["size"].int_value; if ((size % 64) != 0) { printf("The size value must be a multiple of 64.\n"); - return -1; + return 1; } if (setpriority(PRIO_PROCESS, 0, -20)) { @@ -360,68 +381,77 @@ int copy_bandwidth(int argc, char** argv) { return -1; } - std::vector bench_objs; - bench_objs.push_back(new CopyLdrdStrdBenchmark(size)); - bench_objs.push_back(new CopyLdmiaStmiaBenchmark(size)); - bench_objs.push_back(new CopyVldVstBenchmark(size)); - bench_objs.push_back(new CopyVldmiaVstmiaBenchmark(size)); - bench_objs.push_back(new MemcpyBenchmark(size)); - - printf("Benchmarking copy bandwidth\n"); - printf(" size = %d\n", bench_objs[0]->size()); - printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); - printf(" num_loops = %d\n\n", values["num_loops"].int_value); + bool preamble_printed = false; + size_t num_warm_loops = values["num_warm_loops"].int_value; + size_t num_loops = values["num_loops"].int_value; for (std::vector::iterator it = bench_objs.begin(); it != bench_objs.end(); ++it) { - (*it)->set_num_warm_loops(values["num_warm_loops"].int_value); - (*it)->set_num_loops(values["num_loops"].int_value); + if (!(*it)->canRun()) { + continue; + } + if (!(*it)->setSize(values["num_warm_loops"].int_value)) { + printf("Failed creating buffer for bandwidth test.\n"); + return false; + } + if (num_warm_loops) { + (*it)->set_num_warm_loops(num_warm_loops); + } + if (num_loops) { + (*it)->set_num_loops(num_loops); + } + if (!preamble_printed) { + preamble_printed = true; + printf("Benchmarking %s bandwidth\n", name); + printf(" size = %d\n", (*it)->size()); + printf(" num_warm_loops = %d\n", (*it)->num_warm_loops()); + printf(" num_loops = %d\n\n", (*it)->num_loops()); + } (*it)->run(); - printf(" Copy bandwidth with %s: %0.2f MB/s\n", (*it)->getName(), + printf(" %s bandwidth with %s: %0.2f MB/s\n", name, (*it)->getName(), (*it)->mb_per_sec()); } - return 0; + return true; } -int write_bandwidth(int argc, char** argv) { - arg_t values; - values["size"].int_value = 0; - values["num_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_LOOPS; - values["num_warm_loops"].int_value = BandwidthBenchmark::DEFAULT_NUM_WARM_LOOPS; - if (!processBandwidthOptions(argc, argv, bandwidth_opts, &values)) { +int copy_bandwidth(int argc, char** argv) { + std::vector bench_objs; + bench_objs.push_back(new CopyLdrdStrdBenchmark()); + bench_objs.push_back(new CopyLdmiaStmiaBenchmark()); + bench_objs.push_back(new CopyVldVstBenchmark()); + bench_objs.push_back(new CopyVldmiaVstmiaBenchmark()); + bench_objs.push_back(new MemcpyBenchmark()); + + if (!run_bandwidth_benchmark(argc, argv, "copy", bench_objs)) { return -1; } + return 0; +} - size_t size = values["size"].int_value; - if ((size % 64) != 0) { - printf("The size value must be a multiple of 64.\n"); - return 1; - } +int write_bandwidth(int argc, char** argv) { + std::vector bench_objs; + bench_objs.push_back(new WriteStrdBenchmark()); + bench_objs.push_back(new WriteStmiaBenchmark()); + bench_objs.push_back(new WriteVstBenchmark()); + bench_objs.push_back(new WriteVstmiaBenchmark()); + bench_objs.push_back(new MemsetBenchmark()); - if (setpriority(PRIO_PROCESS, 0, -20)) { - perror("Unable to raise priority of process."); + if (!run_bandwidth_benchmark(argc, argv, "write", bench_objs)) { return -1; } + return 0; +} + +int read_bandwidth(int argc, char** argv) { std::vector bench_objs; - bench_objs.push_back(new WriteStrdBenchmark(size)); - bench_objs.push_back(new WriteStmiaBenchmark(size)); - bench_objs.push_back(new WriteVstBenchmark(size)); - bench_objs.push_back(new WriteVstmiaBenchmark(size)); - bench_objs.push_back(new MemsetBenchmark(size)); - - printf("Benchmarking write bandwidth\n"); - printf(" size = %d\n", bench_objs[0]->size()); - printf(" num_warm_loops = %d\n", values["num_warm_loops"].int_value); - printf(" num_loops = %d\n\n", values["num_loops"].int_value); - for (std::vector::iterator it = bench_objs.begin(); - it != bench_objs.end(); ++it) { - (*it)->set_num_warm_loops(values["num_warm_loops"].int_value); - (*it)->set_num_loops(values["num_loops"].int_value); - (*it)->run(); - printf(" Write bandwidth with %s: %0.2f MB/s\n", (*it)->getName(), - (*it)->mb_per_sec()); - } + bench_objs.push_back(new ReadLdrdBenchmark()); + bench_objs.push_back(new ReadLdmiaBenchmark()); + bench_objs.push_back(new ReadVldBenchmark()); + bench_objs.push_back(new ReadVldmiaBenchmark()); + if (!run_bandwidth_benchmark(argc, argv, "read", bench_objs)) { + return -1; + } return 0; } diff --git a/tests/memtest/bandwidth.h b/tests/memtest/bandwidth.h index cef5cdd5..414977b9 100644 --- a/tests/memtest/bandwidth.h +++ b/tests/memtest/bandwidth.h @@ -22,13 +22,20 @@ // Bandwidth Class definitions. class BandwidthBenchmark { public: - BandwidthBenchmark(size_t size) - : _size(size), + BandwidthBenchmark() + : _size(0), _num_warm_loops(DEFAULT_NUM_WARM_LOOPS), _num_loops(DEFAULT_NUM_LOOPS) {} virtual ~BandwidthBenchmark() {} - void run() { + bool run() { + if (_size == 0) { + return false; + } + if (!canRun()) { + return false; + } + bench(_num_warm_loops); nsecs_t t = system_time(); @@ -36,12 +43,28 @@ public: t = system_time() - t; _mb_per_sec = (_size*(_num_loops/_BYTES_PER_MB))/(t/_NUM_NS_PER_SEC); + + return true; } + bool canRun() { return !usesNeon() || isNeonSupported(); } + + virtual bool setSize(size_t size) = 0; + virtual const char *getName() = 0; virtual bool verify() = 0; + virtual bool usesNeon() { return false; } + + bool isNeonSupported() { +#if defined(__ARM_NEON__) + return true; +#else + return false; +#endif + } + // Accessors/mutators. double mb_per_sec() { return _mb_per_sec; } size_t num_warm_loops() { return _num_warm_loops; } @@ -73,22 +96,45 @@ private: class CopyBandwidthBenchmark : public BandwidthBenchmark { public: - CopyBandwidthBenchmark(size_t size) : BandwidthBenchmark(size) { - if (_size == 0) { + CopyBandwidthBenchmark() : BandwidthBenchmark(), _src(NULL), _dst(NULL) { } + + bool setSize(size_t size) { + if (_src) { + free(_src); + } + if (_dst) { + free(_dst); + } + + if (size == 0) { _size = DEFAULT_COPY_SIZE; + } else { + _size = size; } + _src = reinterpret_cast(memalign(64, _size)); if (!_src) { - perror("Failed to allocate memory for test."); - exit(1); + perror("Failed to allocate memory for test."); + return false; } _dst = reinterpret_cast(memalign(64, _size)); if (!_dst) { - perror("Failed to allocate memory for test."); - exit(1); + perror("Failed to allocate memory for test."); + return false; + } + + return true; + } + virtual ~CopyBandwidthBenchmark() { + if (_src) { + free(_src); + _src = NULL; + } + if (_dst) { + free(_dst); + _dst = NULL; } } - virtual ~CopyBandwidthBenchmark() { free(_src); free(_dst); } bool verify() { memset(_src, 0x23, _size); @@ -120,7 +166,7 @@ protected: class CopyLdrdStrdBenchmark : public CopyBandwidthBenchmark { public: - CopyLdrdStrdBenchmark(size_t size) : CopyBandwidthBenchmark(size) { } + CopyLdrdStrdBenchmark() : CopyBandwidthBenchmark() { } virtual ~CopyLdrdStrdBenchmark() {} const char *getName() { return "ldrd/strd"; } @@ -174,7 +220,7 @@ protected: class CopyLdmiaStmiaBenchmark : public CopyBandwidthBenchmark { public: - CopyLdmiaStmiaBenchmark(size_t size) : CopyBandwidthBenchmark(size) { } + CopyLdmiaStmiaBenchmark() : CopyBandwidthBenchmark() { } virtual ~CopyLdmiaStmiaBenchmark() {} const char *getName() { return "ldmia/stmia"; } @@ -213,7 +259,7 @@ protected: class CopyVldVstBenchmark : public CopyBandwidthBenchmark { public: - CopyVldVstBenchmark(size_t size) : CopyBandwidthBenchmark(size) { } + CopyVldVstBenchmark() : CopyBandwidthBenchmark() { } virtual ~CopyVldVstBenchmark() {} const char *getName() { return "vld/vst"; } @@ -252,7 +298,7 @@ protected: class CopyVldmiaVstmiaBenchmark : public CopyBandwidthBenchmark { public: - CopyVldmiaVstmiaBenchmark(size_t size) : CopyBandwidthBenchmark(size) { } + CopyVldmiaVstmiaBenchmark() : CopyBandwidthBenchmark() { } virtual ~CopyVldmiaVstmiaBenchmark() {} const char *getName() { return "vldmia/vstmia"; } @@ -289,7 +335,7 @@ protected: class MemcpyBenchmark : public CopyBandwidthBenchmark { public: - MemcpyBenchmark(size_t size) : CopyBandwidthBenchmark(size) { } + MemcpyBenchmark() : CopyBandwidthBenchmark() { } virtual ~MemcpyBenchmark() {} const char *getName() { return "memcpy"; } @@ -302,21 +348,50 @@ protected: } }; -class WriteBandwidthBenchmark : public BandwidthBenchmark { +class SingleBufferBandwidthBenchmark : public BandwidthBenchmark { public: - WriteBandwidthBenchmark(size_t size) : BandwidthBenchmark(size) { + SingleBufferBandwidthBenchmark() : BandwidthBenchmark(), _buffer(NULL) { } + virtual ~SingleBufferBandwidthBenchmark() { + if (_buffer) { + free(_buffer); + _buffer = NULL; + } + } + + bool setSize(size_t size) { + if (_buffer) { + free(_buffer); + _buffer = NULL; + } + if (_size == 0) { - _size = DEFAULT_WRITE_SIZE; + _size = DEFAULT_SINGLE_BUFFER_SIZE; + } else { + _size = size; } _buffer = reinterpret_cast(memalign(64, _size)); if (!_buffer) { - perror("Failed to allocate memory for test."); - exit(1); + perror("Failed to allocate memory for test."); + return false; } memset(_buffer, 0, _size); + + return true; } - virtual ~WriteBandwidthBenchmark() { free(_buffer); } + + bool verify() { return true; } + +protected: + char *_buffer; + + static const unsigned int DEFAULT_SINGLE_BUFFER_SIZE = 16000; +}; + +class WriteBandwidthBenchmark : public SingleBufferBandwidthBenchmark { +public: + WriteBandwidthBenchmark() : SingleBufferBandwidthBenchmark() { } + virtual ~WriteBandwidthBenchmark() { } bool verify() { memset(_buffer, 0, _size); @@ -339,16 +414,11 @@ public: return true; } - -protected: - char *_buffer; - - static const unsigned int DEFAULT_WRITE_SIZE = 16000; }; class WriteStrdBenchmark : public WriteBandwidthBenchmark { public: - WriteStrdBenchmark(size_t size) : WriteBandwidthBenchmark(size) { } + WriteStrdBenchmark() : WriteBandwidthBenchmark() { } virtual ~WriteStrdBenchmark() {} const char *getName() { return "strd"; } @@ -392,7 +462,7 @@ protected: class WriteStmiaBenchmark : public WriteBandwidthBenchmark { public: - WriteStmiaBenchmark(size_t size) : WriteBandwidthBenchmark(size) { } + WriteStmiaBenchmark() : WriteBandwidthBenchmark() { } virtual ~WriteStmiaBenchmark() {} const char *getName() { return "stmia"; } @@ -437,11 +507,13 @@ protected: class WriteVstBenchmark : public WriteBandwidthBenchmark { public: - WriteVstBenchmark(size_t size) : WriteBandwidthBenchmark(size) { } + WriteVstBenchmark() : WriteBandwidthBenchmark() { } virtual ~WriteVstBenchmark() {} const char *getName() { return "vst"; } + bool usesNeon() { return true; } + protected: // Write a given value using vst. void bench(size_t num_loops) { @@ -480,11 +552,13 @@ protected: class WriteVstmiaBenchmark : public WriteBandwidthBenchmark { public: - WriteVstmiaBenchmark(size_t size) : WriteBandwidthBenchmark(size) { } + WriteVstmiaBenchmark() : WriteBandwidthBenchmark() { } virtual ~WriteVstmiaBenchmark() {} const char *getName() { return "vstmia"; } + bool usesNeon() { return true; } + protected: // Write a given value using vstmia. void bench(size_t num_loops) { @@ -523,7 +597,7 @@ protected: class MemsetBenchmark : public WriteBandwidthBenchmark { public: - MemsetBenchmark(size_t size) : WriteBandwidthBenchmark(size) { } + MemsetBenchmark() : WriteBandwidthBenchmark() { } virtual ~MemsetBenchmark() {} const char *getName() { return "memset"; } @@ -536,4 +610,152 @@ protected: } }; +class ReadLdrdBenchmark : public SingleBufferBandwidthBenchmark { +public: + ReadLdrdBenchmark() : SingleBufferBandwidthBenchmark() { } + virtual ~ReadLdrdBenchmark() {} + + const char *getName() { return "ldrd"; } + +protected: + // Write a given value using strd. + void bench(size_t num_loops) { + asm volatile( + "stmfd sp!, {r0,r1,r2,r3,r4,r5}\n" + + "mov r0, %0\n" + "mov r1, %1\n" + "mov r2, %2\n" + + "0:\n" + "mov r3, r1, lsr #5\n" + + "1:\n" + "subs r3, r3, #1\n" + "ldrd r4, r5, [r0]\n" + "ldrd r4, r5, [r0, #8]\n" + "ldrd r4, r5, [r0, #16]\n" + "ldrd r4, r5, [r0, #24]\n" + "add r0, r0, #32\n" + "bgt 1b\n" + + "sub r0, r0, r1\n" + "subs r2, r2, #1\n" + "bgt 0b\n" + + "ldmfd sp!, {r0,r1,r2,r3,r4,r5}\n" + :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); + } +}; + +class ReadLdmiaBenchmark : public SingleBufferBandwidthBenchmark { +public: + ReadLdmiaBenchmark() : SingleBufferBandwidthBenchmark() { } + virtual ~ReadLdmiaBenchmark() {} + + const char *getName() { return "ldmia"; } + +protected: + // Write a given value using stmia. + void bench(size_t num_loops) { + asm volatile( + "stmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" + + "mov r0, %0\n" + "mov r1, %1\n" + "mov r2, %2\n" + + "0:\n" + "mov r3, r1, lsr #5\n" + + "1:\n" + "subs r3, r3, #1\n" + "ldmia r0!, {r4, r5, r6, r7, r8, r9, r10, r11}\n" + "bgt 1b\n" + + "sub r0, r0, r1\n" + "subs r2, r2, #1\n" + "bgt 0b\n" + + "ldmfd sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11}\n" + :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); + } +}; + +class ReadVldBenchmark : public SingleBufferBandwidthBenchmark { +public: + ReadVldBenchmark() : SingleBufferBandwidthBenchmark() { } + virtual ~ReadVldBenchmark() {} + + const char *getName() { return "vld"; } + + bool usesNeon() { return true; } + +protected: + // Write a given value using vst. + void bench(size_t num_loops) { +#if defined(__ARM_NEON__) + asm volatile( + "stmfd sp!, {r0,r1,r2,r3}\n" + + "mov r0, %0\n" + "mov r1, %1\n" + "mov r2, %2\n" + + "0:\n" + "mov r3, r1, lsr #5\n" + + "1:\n" + "subs r3, r3, #1\n" + "vld1.8 {d0-d3}, [r0:128]!\n" + "bgt 1b\n" + + "sub r0, r0, r1\n" + "subs r2, r2, #1\n" + "bgt 0b\n" + + "ldmfd sp!, {r0,r1,r2,r3}\n" + :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); +#endif + } +}; + +class ReadVldmiaBenchmark : public SingleBufferBandwidthBenchmark { +public: + ReadVldmiaBenchmark() : SingleBufferBandwidthBenchmark() { } + virtual ~ReadVldmiaBenchmark() {} + + const char *getName() { return "vldmia"; } + + bool usesNeon() { return true; } + +protected: + // Write a given value using vstmia. + void bench(size_t num_loops) { +#if defined(__ARM_NEON__) + asm volatile( + "stmfd sp!, {r0,r1,r2,r3}\n" + + "mov r0, %0\n" + "mov r1, %1\n" + "mov r2, %2\n" + + "0:\n" + "mov r3, r1, lsr #5\n" + + "1:\n" + "subs r3, r3, #1\n" + "vldmia r0!, {d0-d3}\n" + "bgt 1b\n" + + "sub r0, r0, r1\n" + "subs r2, r2, #1\n" + "bgt 0b\n" + + "ldmfd sp!, {r0,r1,r2,r3}\n" + :: "r" (_buffer), "r" (_size), "r" (num_loops) : "r0", "r1", "r2"); +#endif + } +}; + #endif // __BANDWIDTH_H__ diff --git a/tests/memtest/memtest.cpp b/tests/memtest/memtest.cpp index 0d76b45d..86ec9a41 100644 --- a/tests/memtest/memtest.cpp +++ b/tests/memtest/memtest.cpp @@ -41,14 +41,17 @@ static void usage(char* p) { " is one of the following:\n" " copy_bandwidth [--size BYTES_TO_COPY]\n" " write_bandwidth [--size BYTES_TO_WRITE]\n" + " read_bandwidth [--size BYTES_TO_COPY]\n" " per_core_bandwidth [--size BYTES]\n" " --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld_vst |\n" " copy_vldmia_vstmia | memcpy | write_strd | write_stmia |\n" - " write_vst | write_vstmia | memset\n" + " write_vst | write_vstmia | memset | read_ldrd |\n" + " read_ldmia | read_vld | read_vldmia\n" " multithread_bandwidth [--size BYTES]\n" " --type copy_ldrd_strd | copy_ldmia_stmia | copy_vld_vst |\n" " copy_vldmia_vstmia | memcpy | write_strd | write_stmia |\n" - " write_vst | write_vstmia | memset\n" + " write_vst | write_vstmia | memset | read_ldrd |\n" + " read_ldmia | read_vld | read_vldmia\n" " --num_threads NUM_THREADS_TO_RUN\n" " malloc [fill]\n" " madvise\n" @@ -61,6 +64,7 @@ static void usage(char* p) { int copy_bandwidth(int argc, char** argv); int write_bandwidth(int argc, char** argv); +int read_bandwidth(int argc, char** argv); int per_core_bandwidth(int argc, char** argv); int multithread_bandwidth(int argc, char** argv); int malloc_test(int argc, char** argv); @@ -83,7 +87,8 @@ function_t function_table[] = { { "crawl", crawl_test }, { "fp", fp_test }, { "copy_bandwidth", copy_bandwidth }, - { "write_bandwidth", write_bandwidth}, + { "write_bandwidth", write_bandwidth }, + { "read_bandwidth", read_bandwidth }, { "per_core_bandwidth", per_core_bandwidth }, { "multithread_bandwidth", multithread_bandwidth }, }; -- cgit v1.2.3