aboutsummaryrefslogtreecommitdiff
path: root/scripts/generate-f16-gemm.sh
diff options
context:
space:
mode:
authorFrank Barchard <fbarchard@google.com>2020-05-06 01:12:04 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2020-05-06 01:12:34 -0700
commit3f9f99f372081793f1f3765af7fd51046209b8ee (patch)
treec2a6d79bd0d4adfa2167c8d25fe4648e43d35be2 /scripts/generate-f16-gemm.sh
parent5d761c426d3978a1c364dc391e187fe70e10bf13 (diff)
downloadXNNPACK-3f9f99f372081793f1f3765af7fd51046209b8ee.tar.gz
Nx16 FP16 intrinsic GEMM and IGEMM ukernels
IGEMM performance mobilenet_v2 on Pixel 4 f16_igemm_6x16__neonfp16arith_ld64 8986179 f16_igemm_4x16__neonfp16arith_ld64 9969704 f16_igemm_8x16__neonfp16arith_ld64 10405198 f16_igemm_1x16__neonfp16arith_ld64 11418367 f16_igemm_6x8__neonfp16arith_ld64 11488318 f16_igemm_4x8__neonfp16arith_ld64 11566898 f16_igemm_8x8__neonfp16arith_ld64 11992713 f16_igemm_1x8__neonfp16arith_ld64 24999989 GEMM performance mobilenet_v2 on Pixel 4 f16_gemm_6x8__aarch64_neonfp16arith_ld64 5026706 f16_gemm_6x16__aarch64_neonfp16arith_ld32 5067141 f16_gemm_4x16__aarch64_neonfp16arith_ld32 5083430 f16_gemm_8x8__aarch64_neonfp16arith_ld64 5157512 f16_gemm_4x8__aarch64_neonfp16arith_ld64 5215077 f16_gemm_6x16__neonfp16arith_ld64 8605063 f16_gemm_4x16__neonfp16arith_ld64 8856811 f16_gemm_1x16__aarch64_neonfp16arith_ld32 9828810 f16_gemm_1x16__neonfp16arith_ld64 10187374 f16_gemm_1x8__aarch64_neonfp16arith_ld64 10449552 f16_gemm_4x8__neonfp16arith_ld64 10537397 f16_gemm_6x8__neonfp16arith_ld64 10797656 f16_gemm_8x8__neonfp16arith_ld64 11174740 f16_gemm_8x16__neonfp16arith_ld64 12283543 f16_gemm_1x8__neonfp16arith_ld64 16208999 PiperOrigin-RevId: 310106462
Diffstat (limited to 'scripts/generate-f16-gemm.sh')
-rwxr-xr-xscripts/generate-f16-gemm.sh9
1 files changed, 9 insertions, 0 deletions
diff --git a/scripts/generate-f16-gemm.sh b/scripts/generate-f16-gemm.sh
index b91daf8b0..443af5ad0 100755
--- a/scripts/generate-f16-gemm.sh
+++ b/scripts/generate-f16-gemm.sh
@@ -32,5 +32,14 @@ tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=4 -D NR=8 -D INC=1 -o src
tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=6 -D NR=8 -D INC=1 -o src/f16-gemm/gen-inc/6x8inc-minmax-neonfp16arith-ld64.c
tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=8 -D NR=8 -D INC=1 -o src/f16-gemm/gen-inc/8x8inc-minmax-neonfp16arith-ld64.c
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=1 -D NR=16 -D INC=0 -o src/f16-gemm/gen/1x16-minmax-neonfp16arith-ld64.c
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=4 -D NR=16 -D INC=0 -o src/f16-gemm/gen/4x16-minmax-neonfp16arith-ld64.c
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=6 -D NR=16 -D INC=0 -o src/f16-gemm/gen/6x16-minmax-neonfp16arith-ld64.c
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=8 -D NR=16 -D INC=0 -o src/f16-gemm/gen/8x16-minmax-neonfp16arith-ld64.c
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=1 -D NR=16 -D INC=1 -o src/f16-gemm/gen-inc/1x16inc-minmax-neonfp16arith-ld64.c
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=4 -D NR=16 -D INC=1 -o src/f16-gemm/gen-inc/4x16inc-minmax-neonfp16arith-ld64.c
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=6 -D NR=16 -D INC=1 -o src/f16-gemm/gen-inc/6x16inc-minmax-neonfp16arith-ld64.c
+tools/xngen src/f16-gemm/neonfp16arith-ld64.c.in -D MR=8 -D NR=16 -D INC=1 -o src/f16-gemm/gen-inc/8x16inc-minmax-neonfp16arith-ld64.c
+
################################## Unit tests #################################
tools/generate-gemm-test.py --spec test/f16-gemm-minmax.yaml --output test/f16-gemm-minmax.cc