From 436ebe6cc2a7a6cc746ac4bcb8cf95f665ae6c29 Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Wed, 4 Dec 2019 15:10:12 -0800 Subject: Separate WAsm micro-kernels and scalar micro-kernels - WAsm-specific microkernels explicitly use f32.min/f32.max WAsm instructions - About 2% end-to-end improvement on x86, no change on ARM64 PiperOrigin-RevId: 283845483 --- scripts/generate-f32-vmulcaddc.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'scripts/generate-f32-vmulcaddc.sh') diff --git a/scripts/generate-f32-vmulcaddc.sh b/scripts/generate-f32-vmulcaddc.sh index f438fc609..014670e97 100755 --- a/scripts/generate-f32-vmulcaddc.sh +++ b/scripts/generate-f32-vmulcaddc.sh @@ -5,9 +5,15 @@ # LICENSE file in the root directory of this source tree. #################################### Scalar ################################### -tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=1 -D ROW_TILE=2 -o src/f32-vmulcaddc/gen/c1-scalar-2x.c -tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=2 -D ROW_TILE=2 -o src/f32-vmulcaddc/gen/c2-scalar-2x.c -tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=4 -D ROW_TILE=2 -o src/f32-vmulcaddc/gen/c4-scalar-2x.c +### Generic C micro-kernels +tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=1 -D ROW_TILE=2 -D WASM=0 -o src/f32-vmulcaddc/gen/c1-scalar-2x.c +tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=2 -D ROW_TILE=2 -D WASM=0 -o src/f32-vmulcaddc/gen/c2-scalar-2x.c +tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=4 -D ROW_TILE=2 -D WASM=0 -o src/f32-vmulcaddc/gen/c4-scalar-2x.c + +### WAsm-specific micro-kernels +tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=1 -D ROW_TILE=2 -D WASM=1 -o src/f32-vmulcaddc/gen/c1-wasm-2x.c +tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=2 -D ROW_TILE=2 -D WASM=1 -o src/f32-vmulcaddc/gen/c2-wasm-2x.c +tools/xngen src/f32-vmulcaddc/scalar.c.in -D CHANNEL_TILE=4 -D ROW_TILE=2 -D WASM=1 -o src/f32-vmulcaddc/gen/c4-wasm-2x.c ################################### ARM NEON ################################## tools/xngen src/f32-vmulcaddc/neon.c.in -D CHANNEL_TILE=4 -D ROW_TILE=2 -D FMA=0 -o src/f32-vmulcaddc/gen/c4-neon-2x.c -- cgit v1.2.3