diff options
author | Marat Dukhan <maratek@google.com> | 2020-10-25 19:17:35 -0700 |
---|---|---|
committer | XNNPACK Team <xnnpack-github-robot@google.com> | 2020-10-25 19:18:13 -0700 |
commit | 82f0c3247dd04cc196880792c5b5cad64722db92 (patch) | |
tree | 9d90721589c3990ebb1596e23c3bdf5b1bf72a03 /scripts | |
parent | 0ff9718d746f1770847a6b19e4c3e5ba9be38556 (diff) | |
download | XNNPACK-82f0c3247dd04cc196880792c5b5cad64722db92.tar.gz |
Auto-generate NEON/NEONFMA versions of DWCONV2D CHW 3x3s2p1 micro-kernels
PiperOrigin-RevId: 338962587
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/generate-f32-dwconv2d-chw.sh | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/scripts/generate-f32-dwconv2d-chw.sh b/scripts/generate-f32-dwconv2d-chw.sh index 47f588792..c8b405b2f 100755 --- a/scripts/generate-f32-dwconv2d-chw.sh +++ b/scripts/generate-f32-dwconv2d-chw.sh @@ -29,6 +29,26 @@ tools/xngen src/f32-dwconv2d-chw/3x3p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=3 tools/xngen src/f32-dwconv2d-chw/3x3p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=4 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-1x4-acc4.c tools/xngen src/f32-dwconv2d-chw/3x3p1-neon.c.in -D ROW_TILE=2 -D ACCUMULATORS=2 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-2x4-acc2.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=1 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=2 -D ACCUMULATORS=1 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-2x4.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=3 -D ACCUMULATORS=1 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-3x4.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=4 -D ACCUMULATORS=1 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-4x4.c + +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=2 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4-acc2.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=3 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4-acc3.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=4 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4-acc4.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=2 -D ACCUMULATORS=2 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-2x4-acc2.c + +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=1 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=2 -D ACCUMULATORS=1 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-2x4.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=3 -D ACCUMULATORS=1 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-3x4.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=4 -D ACCUMULATORS=1 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-4x4.c + +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=2 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4-acc2.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=3 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4-acc3.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=4 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4-acc4.c +tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=2 -D ACCUMULATORS=2 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-2x4-acc2.c + ################################### x86 SSE ################################### tools/xngen src/f32-dwconv2d-chw/3x3p1-sse.c.in -D ROW_TILE=1 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-1x4.c tools/xngen src/f32-dwconv2d-chw/3x3p1-sse.c.in -D ROW_TILE=2 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-2x4.c |