aboutsummaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorMarat Dukhan <maratek@google.com>2020-10-25 19:17:35 -0700
committerXNNPACK Team <xnnpack-github-robot@google.com>2020-10-25 19:18:13 -0700
commit82f0c3247dd04cc196880792c5b5cad64722db92 (patch)
tree9d90721589c3990ebb1596e23c3bdf5b1bf72a03 /scripts
parent0ff9718d746f1770847a6b19e4c3e5ba9be38556 (diff)
downloadXNNPACK-82f0c3247dd04cc196880792c5b5cad64722db92.tar.gz
Auto-generate NEON/NEONFMA versions of DWCONV2D CHW 3x3s2p1 micro-kernels
PiperOrigin-RevId: 338962587
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/generate-f32-dwconv2d-chw.sh20
1 files changed, 20 insertions, 0 deletions
diff --git a/scripts/generate-f32-dwconv2d-chw.sh b/scripts/generate-f32-dwconv2d-chw.sh
index 47f588792..c8b405b2f 100755
--- a/scripts/generate-f32-dwconv2d-chw.sh
+++ b/scripts/generate-f32-dwconv2d-chw.sh
@@ -29,6 +29,26 @@ tools/xngen src/f32-dwconv2d-chw/3x3p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=3
tools/xngen src/f32-dwconv2d-chw/3x3p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=4 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-1x4-acc4.c
tools/xngen src/f32-dwconv2d-chw/3x3p1-neon.c.in -D ROW_TILE=2 -D ACCUMULATORS=2 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3p1-minmax-neonfma-2x4-acc2.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=1 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=2 -D ACCUMULATORS=1 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-2x4.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=3 -D ACCUMULATORS=1 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-3x4.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=4 -D ACCUMULATORS=1 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-4x4.c
+
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=2 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4-acc2.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=3 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4-acc3.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=4 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-1x4-acc4.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=2 -D ACCUMULATORS=2 -D FMA=0 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neon-2x4-acc2.c
+
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=1 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=2 -D ACCUMULATORS=1 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-2x4.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=3 -D ACCUMULATORS=1 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-3x4.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=4 -D ACCUMULATORS=1 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-4x4.c
+
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=2 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4-acc2.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=3 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4-acc3.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=1 -D ACCUMULATORS=4 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-1x4-acc4.c
+tools/xngen src/f32-dwconv2d-chw/3x3s2p1-neon.c.in -D ROW_TILE=2 -D ACCUMULATORS=2 -D FMA=1 -o src/f32-dwconv2d-chw/gen/3x3s2p1-minmax-neonfma-2x4-acc2.c
+
################################### x86 SSE ###################################
tools/xngen src/f32-dwconv2d-chw/3x3p1-sse.c.in -D ROW_TILE=1 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-1x4.c
tools/xngen src/f32-dwconv2d-chw/3x3p1-sse.c.in -D ROW_TILE=2 -D ACCUMULATORS=1 -o src/f32-dwconv2d-chw/gen/3x3p1-minmax-sse-2x4.c