aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZhi An Ng <zhin@google.com>2022-02-03 13:35:48 -0800
committerXNNPACK Team <xnnpack-github-robot@google.com>2022-02-03 13:36:47 -0800
commit2188833bcb0a80a253f6ad8315ed647c758c2ea6 (patch)
tree6461d686eef5b27e2a5235f2e063277f655e5bc2
parent94def8a8a34fb38c64a45ed201bad696e75be87a (diff)
downloadXNNPACK-2188833bcb0a80a253f6ad8315ed647c758c2ea6.tar.gz
Fix F32 IGEMM benchmark loop to not require capping NC to NR
PiperOrigin-RevId: 426226711
-rw-r--r--bench/f32-igemm.cc15
1 files changed, 6 insertions, 9 deletions
diff --git a/bench/f32-igemm.cc b/bench/f32-igemm.cc
index 80662596b..0c41e07ed 100644
--- a/bench/f32-igemm.cc
+++ b/bench/f32-igemm.cc
@@ -130,15 +130,12 @@ static void IGEMMBenchmark(benchmark::State& state,
for (uint32_t m = 0; m < output_size; m += mr) {
const uint32_t mb = min(output_size - m, mr);
- for (uint32_t n = 0; n < group_output_channels; n += nr) {
- const uint32_t nb = min(group_output_channels - n, nr);
- f32_igemm(
- mb, nb, group_input_channels * sizeof(float), kernel_size * mr * sizeof(void*),
- i.data() + buffer_index * i_elements + m,
- w.data() + buffer_index * w_elements + n * (kc_stride * kernel_size + 1),
- c.data() + buffer_index * c_elements + m * group_output_channels + n, group_output_channels * sizeof(float), nr * sizeof(float),
- 0, z.data(), &params);
- }
+ f32_igemm(
+ mb, group_output_channels, group_input_channels * sizeof(float), kernel_size * mr * sizeof(void*),
+ i.data() + buffer_index * i_elements + m,
+ w.data() + buffer_index * w_elements,
+ c.data() + buffer_index * c_elements + m * group_output_channels, group_output_channels * sizeof(float), nr * sizeof(float),
+ 0, z.data(), &params);
}
}