aboutsummaryrefslogtreecommitdiff
path: root/scripts/generate-f32-vmulcaddc.sh
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2019-12-04 15:10:12 -0800
committerXNNPACK Team <xnnpack-github-robot@google.com>2019-12-04 15:10:53 -0800
commit436ebe6cc2a7a6cc746ac4bcb8cf95f665ae6c29 (patch)
tree367cdc7cbcbcaa139c4a55fc72e2c4b26b7bdfdb /scripts/generate-f32-vmulcaddc.sh
parent05f3f6dc940ea45796c009bd09779f597a99151d (diff)
downloadXNNPACK-436ebe6cc2a7a6cc746ac4bcb8cf95f665ae6c29.tar.gz
Separate WAsm micro-kernels and scalar micro-kernels
- WAsm-specific microkernels explicitly use f32.min/f32.max WAsm instructions - About 2% end-to-end improvement on x86, no change on ARM64 PiperOrigin-RevId: 283845483
Diffstat (limited to 'scripts/generate-f32-vmulcaddc.sh')
-rwxr-xr-xscripts/generate-f32-vmulcaddc.sh12
1 files changed, 9 insertions, 3 deletions
diff --git a/scripts/generate-f32-vmulcaddc.sh b/scripts/generate-f32-vmulcaddc.sh
index f438fc609..014670e97 100755
--- a/scripts/generate-f32-vmulcaddc.sh
+++ b/scripts/generate-f32-vmulcaddc.sh
@@ -5,9 +5,15 @@
# LICENSE file in the root directory of this source tree.
#################################### Scalar ###################################
-tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=1 -D ROW_TILE=2 -o src/f32-vmulcaddc/gen/c1-scalar-2x.c
-tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=2 -D ROW_TILE=2 -o src/f32-vmulcaddc/gen/c2-scalar-2x.c
-tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=4 -D ROW_TILE=2 -o src/f32-vmulcaddc/gen/c4-scalar-2x.c
+### Generic C micro-kernels
+tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=1 -D ROW_TILE=2 -D WASM=0 -o src/f32-vmulcaddc/gen/c1-scalar-2x.c
+tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=2 -D ROW_TILE=2 -D WASM=0 -o src/f32-vmulcaddc/gen/c2-scalar-2x.c
+tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=4 -D ROW_TILE=2 -D WASM=0 -o src/f32-vmulcaddc/gen/c4-scalar-2x.c
+
+### WAsm-specific micro-kernels
+tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=1 -D ROW_TILE=2 -D WASM=1 -o src/f32-vmulcaddc/gen/c1-wasm-2x.c
+tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=2 -D ROW_TILE=2 -D WASM=1 -o src/f32-vmulcaddc/gen/c2-wasm-2x.c
+tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=4 -D ROW_TILE=2 -D WASM=1 -o src/f32-vmulcaddc/gen/c4-wasm-2x.c
################################### ARM NEON ##################################
tools/xngen src/f32-vmulcaddc/neon.c.in -D CHANNEL_TILE=4 -D ROW_TILE=2 -D FMA=0 -o src/f32-vmulcaddc/gen/c4-neon-2x.c