diff options
author | Marat Dukhan <maratek@google.com> | 2020-09-06 22:40:56 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2020-09-06 22:41:34 -0700 |
commit | bb9225e24044023b0cc0ae14c8b53ff3f0d94419 (patch) | |
tree | 5a8a69d4a07f649d5a15ab62f99c6576500cb991 /scripts/generate-qs8-vadd.sh | |
parent | 2ffc5e6f7cab6e516c1c21bc9688e4be001b9b0c (diff) | |
download | XNNPACK-bb9225e24044023b0cc0ae14c8b53ff3f0d94419.tar.gz |
SSE4.1 and XOP versions of MUL32 VADD[C] microkernels
PiperOrigin-RevId: 330328493
Diffstat (limited to 'scripts/generate-qs8-vadd.sh')
-rwxr-xr-x | scripts/generate-qs8-vadd.sh | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/scripts/generate-qs8-vadd.sh b/scripts/generate-qs8-vadd.sh index bea8907dd..da86632df 100755 --- a/scripts/generate-qs8-vadd.sh +++ b/scripts/generate-qs8-vadd.sh @@ -37,6 +37,16 @@ tools/xngen src/qs8-vadd/sse-mul16-ld64.c.in -D BATCH_TILE=16 -D SSE=4 -o src/qs tools/xngen src/qs8-vadd/sse-mul16-ld64.c.in -D BATCH_TILE=24 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul16-ld64-x24.c tools/xngen src/qs8-vadd/sse-mul16-ld64.c.in -D BATCH_TILE=32 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul16-ld64-x32.c +tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=8 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x8.c +tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=16 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x16.c +tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=24 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x24.c +tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=32 -D SSE=4 -o src/qs8-vadd/gen/minmax-sse41-mul32-ld32-x32.c + +tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=8 -D SSE=5 -o src/qs8-vadd/gen/minmax-xop-mul32-ld32-x8.c +tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=16 -D SSE=5 -o src/qs8-vadd/gen/minmax-xop-mul32-ld32-x16.c +tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=24 -D SSE=5 -o src/qs8-vadd/gen/minmax-xop-mul32-ld32-x24.c +tools/xngen src/qs8-vadd/sse-mul32-ld32.c.in -D BATCH_TILE=32 -D SSE=5 -o src/qs8-vadd/gen/minmax-xop-mul32-ld32-x32.c + tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=8 -D SSE=2 -o src/qs8-vaddc/gen/minmax-sse2-mul16-ld64-x8.c tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=16 -D SSE=2 -o src/qs8-vaddc/gen/minmax-sse2-mul16-ld64-x16.c tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=24 -D SSE=2 -o src/qs8-vaddc/gen/minmax-sse2-mul16-ld64-x24.c @@ -47,6 +57,16 @@ tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=16 -D SSE=4 -o src/q tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=24 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x24.c tools/xngen src/qs8-vaddc/sse-mul16-ld64.c.in -D BATCH_TILE=32 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul16-ld64-x32.c +tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=8 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x8.c +tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=16 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x16.c +tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=24 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x24.c +tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=32 -D SSE=4 -o src/qs8-vaddc/gen/minmax-sse41-mul32-ld32-x32.c + +tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=8 -D SSE=5 -o src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x8.c +tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=16 -D SSE=5 -o src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x16.c +tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=24 -D SSE=5 -o src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x24.c +tools/xngen src/qs8-vaddc/sse-mul32-ld32.c.in -D BATCH_TILE=32 -D SSE=5 -o src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x32.c + ################################## Unit tests ################################# tools/generate-vbinary-test.py --tester VAddMicrokernelTester --spec test/qs8-vadd-minmax.yaml --output test/qs8-vadd-minmax.cc tools/generate-vbinary-test.py --tester VAddCMicrokernelTester --spec test/qs8-vaddc-minmax.yaml --output test/qs8-vaddc-minmax.cc |