aboutsummaryrefslogtreecommitdiff
path: root/scripts/generate-qs8-vadd.sh
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2020-09-06 22:40:56 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2020-09-06 22:41:34 -0700
commitbb9225e24044023b0cc0ae14c8b53ff3f0d94419 (patch)
tree5a8a69d4a07f649d5a15ab62f99c6576500cb991 /scripts/generate-qs8-vadd.sh
parent2ffc5e6f7cab6e516c1c21bc9688e4be001b9b0c (diff)
downloadXNNPACK-bb9225e24044023b0cc0ae14c8b53ff3f0d94419.tar.gz
SSE4.1 and XOP versions of MUL32 VADD[C] microkernels
PiperOrigin-RevId: 330328493
Diffstat (limited to 'scripts/generate-qs8-vadd.sh')
-rwxr-xr-xscripts/generate-qs8-vadd.sh20
1 files changed, 20 insertions, 0 deletions
diff --git a/scripts/generate-qs8-vadd.sh b/scripts/generate-qs8-vadd.sh
index bea8907dd..da86632df 100755
--- a/scripts/generate-qs8-vadd.sh
+++ b/scripts/generate-qs8-vadd.sh
@@ -37,6 +37,16 @@ tools/xngen src/qs8-vadd/sse-mul16-ld64.c.in -D BATCH_TILE=16 -D SSE=4 -o src/qs
tools/xngen src/qs8-vadd/sse-mul16-ld64.c.in -D BATCH_TILE=24 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul16-ld64-x24.c
tools/xngen src/qs8-vadd/sse-mul16-ld64.c.in -D BATCH_TILE=32 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul16-ld64-x32.c
+tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=8 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x8.c
+tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=16 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x16.c
+tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=24 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x24.c
+tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=32 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x32.c
+
+tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=8 -D SSE=5 -o src/qs8-vadd/gen/minmax-xop-mul32-ld32-x8.c
+tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=16 -D SSE=5 -o src/qs8-vadd/gen/minmax-xop-mul32-ld32-x16.c
+tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=24 -D SSE=5 -o src/qs8-vadd/gen/minmax-xop-mul32-ld32-x24.c
+tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=32 -D SSE=5 -o src/qs8-vadd/gen/minmax-xop-mul32-ld32-x32.c
+
tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=8 -D SSE=2 -o src/qs8-vaddc/gen/minmax-sse2-mul16-ld64-x8.c
tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=16 -D SSE=2 -o src/qs8-vaddc/gen/minmax-sse2-mul16-ld64-x16.c
tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=24 -D SSE=2 -o src/qs8-vaddc/gen/minmax-sse2-mul16-ld64-x24.c
@@ -47,6 +57,16 @@ tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=16 -D SSE=4 -o src/q
tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=24 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x24.c
tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=32 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x32.c
+tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=8 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x8.c
+tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=16 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x16.c
+tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=24 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x24.c
+tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=32 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x32.c
+
+tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=8 -D SSE=5 -o src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x8.c
+tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=16 -D SSE=5 -o src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x16.c
+tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=24 -D SSE=5 -o src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x24.c
+tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=32 -D SSE=5 -o src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x32.c
+
################################## Unit tests #################################
tools/generate-vbinary-test.py --tester VAddMicrokernelTester --spec test/qs8-vadd-minmax.yaml --output test/qs8-vadd-minmax.cc
tools/generate-vbinary-test.py --tester VAddCMicrokernelTester --spec test/qs8-vaddc-minmax.yaml --output test/qs8-vaddc-minmax.cc