aboutsummaryrefslogtreecommitdiff
path: root/vpx_dsp/vpx_dsp_rtcd_defs.pl
diff options
context:
space:
mode:
Diffstat (limited to 'vpx_dsp/vpx_dsp_rtcd_defs.pl')
-rw-r--r--vpx_dsp/vpx_dsp_rtcd_defs.pl414
1 files changed, 292 insertions, 122 deletions
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl
index 8725821b6..e9d63f6ef 100644
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -17,6 +17,10 @@ print <<EOF
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/vpx_filter.h"
+#if CONFIG_VP9_ENCODER
+ struct macroblock_plane;
+ struct ScanOrder;
+#endif
EOF
}
@@ -38,7 +42,7 @@ if ($opts{arch} eq "x86_64") {
#
add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d207_predictor_4x4 sse2/;
+specialize qw/vpx_d207_predictor_4x4 neon sse2/;
add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_4x4 neon sse2/;
@@ -46,7 +50,7 @@ specialize qw/vpx_d45_predictor_4x4 neon sse2/;
add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d63_predictor_4x4 ssse3/;
+specialize qw/vpx_d63_predictor_4x4 neon ssse3/;
add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
@@ -57,12 +61,13 @@ specialize qw/vpx_h_predictor_4x4 neon dspr2 msa sse2/;
add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d117_predictor_4x4 neon/;
add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_4x4 neon/;
add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d153_predictor_4x4 ssse3/;
+specialize qw/vpx_d153_predictor_4x4 neon ssse3/;
add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_4x4 neon msa sse2/;
@@ -86,7 +91,7 @@ add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, co
specialize qw/vpx_dc_128_predictor_4x4 msa neon sse2/;
add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d207_predictor_8x8 ssse3/;
+specialize qw/vpx_d207_predictor_8x8 neon ssse3/;
add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
# TODO(crbug.com/webm/1522): Re-enable vsx implementation.
@@ -94,19 +99,20 @@ specialize qw/vpx_d45_predictor_8x8 neon sse2/;
add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
# TODO(crbug.com/webm/1522): Re-enable vsx implementation.
-specialize qw/vpx_d63_predictor_8x8 ssse3/;
+specialize qw/vpx_d63_predictor_8x8 neon ssse3/;
add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
# TODO(crbug.com/webm/1522): Re-enable vsx implementation.
specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2/;
add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d117_predictor_8x8 neon/;
add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_8x8 neon/;
add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d153_predictor_8x8 ssse3/;
+specialize qw/vpx_d153_predictor_8x8 neon ssse3/;
add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_8x8 neon msa sse2/;
@@ -129,24 +135,25 @@ add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, co
specialize qw/vpx_dc_128_predictor_8x8 neon msa sse2/;
add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d207_predictor_16x16 ssse3/;
+specialize qw/vpx_d207_predictor_16x16 neon ssse3/;
add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_16x16 neon ssse3 vsx/;
add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d63_predictor_16x16 ssse3 vsx/;
+specialize qw/vpx_d63_predictor_16x16 neon ssse3 vsx/;
add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_16x16 neon dspr2 msa sse2 vsx/;
add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d117_predictor_16x16 neon/;
add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_16x16 neon/;
add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d153_predictor_16x16 ssse3/;
+specialize qw/vpx_d153_predictor_16x16 neon ssse3/;
add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_16x16 neon msa sse2 vsx/;
@@ -167,24 +174,25 @@ add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride,
specialize qw/vpx_dc_128_predictor_16x16 neon msa sse2 vsx/;
add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d207_predictor_32x32 ssse3/;
+specialize qw/vpx_d207_predictor_32x32 neon ssse3/;
add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d45_predictor_32x32 neon ssse3 vsx/;
add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d63_predictor_32x32 ssse3 vsx/;
+specialize qw/vpx_d63_predictor_32x32 neon ssse3 vsx/;
add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_h_predictor_32x32 neon msa sse2 vsx/;
add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
+specialize qw/vpx_d117_predictor_32x32 neon/;
add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_d135_predictor_32x32 neon/;
add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_d153_predictor_32x32 ssse3/;
+specialize qw/vpx_d153_predictor_32x32 neon ssse3/;
add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left";
specialize qw/vpx_v_predictor_32x32 neon msa sse2 vsx/;
@@ -207,25 +215,25 @@ specialize qw/vpx_dc_128_predictor_32x32 msa neon sse2 vsx/;
# High bitdepth functions
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d207_predictor_4x4 sse2/;
+ specialize qw/vpx_highbd_d207_predictor_4x4 neon sse2/;
add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_d45_predictor_4x4 neon ssse3/;
add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d63_predictor_4x4 sse2/;
+ specialize qw/vpx_highbd_d63_predictor_4x4 neon sse2/;
add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_h_predictor_4x4 neon sse2/;
add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d117_predictor_4x4 sse2/;
+ specialize qw/vpx_highbd_d117_predictor_4x4 neon sse2/;
add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_d135_predictor_4x4 neon sse2/;
add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d153_predictor_4x4 sse2/;
+ specialize qw/vpx_highbd_d153_predictor_4x4 neon sse2/;
add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_v_predictor_4x4 neon sse2/;
@@ -246,25 +254,25 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_dc_128_predictor_4x4 neon sse2/;
add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d207_predictor_8x8 ssse3/;
+ specialize qw/vpx_highbd_d207_predictor_8x8 neon ssse3/;
add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_d45_predictor_8x8 neon ssse3/;
add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d63_predictor_8x8 ssse3/;
+ specialize qw/vpx_highbd_d63_predictor_8x8 neon ssse3/;
add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_h_predictor_8x8 neon sse2/;
add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d117_predictor_8x8 ssse3/;
+ specialize qw/vpx_highbd_d117_predictor_8x8 neon ssse3/;
add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_d135_predictor_8x8 neon ssse3/;
add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d153_predictor_8x8 ssse3/;
+ specialize qw/vpx_highbd_d153_predictor_8x8 neon ssse3/;
add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_v_predictor_8x8 neon sse2/;
@@ -285,25 +293,25 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_dc_128_predictor_8x8 neon sse2/;
add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d207_predictor_16x16 ssse3/;
+ specialize qw/vpx_highbd_d207_predictor_16x16 neon ssse3/;
add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_d45_predictor_16x16 neon ssse3/;
add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d63_predictor_16x16 ssse3/;
+ specialize qw/vpx_highbd_d63_predictor_16x16 neon ssse3/;
add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_h_predictor_16x16 neon sse2/;
add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d117_predictor_16x16 ssse3/;
+ specialize qw/vpx_highbd_d117_predictor_16x16 neon ssse3/;
add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_d135_predictor_16x16 neon ssse3/;
add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d153_predictor_16x16 ssse3/;
+ specialize qw/vpx_highbd_d153_predictor_16x16 neon ssse3/;
add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_v_predictor_16x16 neon sse2/;
@@ -324,25 +332,25 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_highbd_dc_128_predictor_16x16 neon sse2/;
add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d207_predictor_32x32 ssse3/;
+ specialize qw/vpx_highbd_d207_predictor_32x32 neon ssse3/;
add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_d45_predictor_32x32 neon ssse3/;
add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d63_predictor_32x32 ssse3/;
+ specialize qw/vpx_highbd_d63_predictor_32x32 neon ssse3/;
add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_h_predictor_32x32 neon sse2/;
add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d117_predictor_32x32 ssse3/;
+ specialize qw/vpx_highbd_d117_predictor_32x32 neon ssse3/;
add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_d135_predictor_32x32 neon ssse3/;
add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
- specialize qw/vpx_highbd_d153_predictor_32x32 ssse3/;
+ specialize qw/vpx_highbd_d153_predictor_32x32 neon ssse3/;
add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd";
specialize qw/vpx_highbd_v_predictor_32x32 neon sse2/;
@@ -374,22 +382,22 @@ add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride,
specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx mmi lsx/;
add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/;
+specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/;
+specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/;
+specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_avg sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/;
+specialize qw/vpx_convolve8_avg sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/;
+specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-specialize qw/vpx_convolve8_avg_vert sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/;
+specialize qw/vpx_convolve8_avg_vert sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/;
add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_2d ssse3 neon msa/;
@@ -589,7 +597,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vpx_fdct8x8_1 sse2 neon msa/;
add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride";
- specialize qw/vpx_fdct16x16 neon sse2 msa lsx/;
+ specialize qw/vpx_fdct16x16 neon sse2 avx2 msa lsx/;
add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride";
specialize qw/vpx_fdct16x16_1 sse2 neon msa/;
@@ -633,12 +641,12 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
specialize qw/vpx_idct8x8_64_add neon sse2 vsx/;
specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/;
specialize qw/vpx_idct8x8_1_add neon sse2/;
- specialize qw/vpx_idct16x16_256_add neon sse2 vsx/;
+ specialize qw/vpx_idct16x16_256_add neon sse2 avx2 vsx/;
specialize qw/vpx_idct16x16_38_add neon sse2/;
specialize qw/vpx_idct16x16_10_add neon sse2/;
specialize qw/vpx_idct16x16_1_add neon sse2/;
- specialize qw/vpx_idct32x32_1024_add neon sse2 vsx/;
- specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/;
+ specialize qw/vpx_idct32x32_1024_add neon sse2 avx2 vsx/;
+ specialize qw/vpx_idct32x32_135_add neon sse2 ssse3 avx2/;
specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/;
specialize qw/vpx_idct32x32_1_add neon sse2/;
specialize qw/vpx_iwht4x4_16_add sse2 vsx/;
@@ -714,17 +722,17 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
# Quantization
#
if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
- add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order";
specialize qw/vpx_quantize_b neon sse2 ssse3 avx avx2 vsx lsx/;
- add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order";
specialize qw/vpx_quantize_b_32x32 neon ssse3 avx avx2 vsx lsx/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
- add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order";
specialize qw/vpx_highbd_quantize_b neon sse2 avx2/;
- add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
+ add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order";
specialize qw/vpx_highbd_quantize_b_32x32 neon sse2 avx2/;
} # CONFIG_VP9_HIGHBITDEPTH
} # CONFIG_VP9_ENCODER
@@ -736,32 +744,35 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes") {
add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride";
specialize qw/vpx_subtract_block neon msa mmi sse2 avx2 vsx lsx/;
+add_proto qw/int64_t/, "vpx_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height";
+specialize qw/vpx_sse sse4_1 avx2 neon neon_dotprod/;
+
#
# Single block SAD
#
add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad64x64 neon avx2 msa sse2 vsx mmi lsx/;
+specialize qw/vpx_sad64x64 neon neon_dotprod avx2 msa sse2 vsx mmi lsx/;
add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad64x32 neon avx2 msa sse2 vsx mmi/;
+specialize qw/vpx_sad64x32 neon neon_dotprod avx2 msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x64 neon avx2 msa sse2 vsx mmi/;
+specialize qw/vpx_sad32x64 neon neon_dotprod avx2 msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x32 neon avx2 msa sse2 vsx mmi lsx/;
+specialize qw/vpx_sad32x32 neon neon_dotprod avx2 msa sse2 vsx mmi lsx/;
add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad32x16 neon avx2 msa sse2 vsx mmi/;
+specialize qw/vpx_sad32x16 neon neon_dotprod avx2 msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x32 neon msa sse2 vsx mmi/;
+specialize qw/vpx_sad16x32 neon neon_dotprod msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x16 neon msa sse2 vsx mmi lsx/;
+specialize qw/vpx_sad16x16 neon neon_dotprod msa sse2 vsx mmi lsx/;
add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
-specialize qw/vpx_sad16x8 neon msa sse2 vsx mmi/;
+specialize qw/vpx_sad16x8 neon neon_dotprod msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad8x16 neon msa sse2 vsx mmi/;
@@ -778,6 +789,45 @@ specialize qw/vpx_sad4x8 neon msa sse2 mmi/;
add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_sad4x4 neon msa sse2 mmi/;
+add_proto qw/unsigned int vpx_sad_skip_64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_64x64 neon neon_dotprod avx2 sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_64x32 neon neon_dotprod avx2 sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_32x64 neon neon_dotprod avx2 sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_32x32 neon neon_dotprod avx2 sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_32x16 neon neon_dotprod avx2 sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_16x32 neon neon_dotprod sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_16x16 neon neon_dotprod sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_16x8 neon neon_dotprod sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_8x16 neon sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_8x8 neon sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_8x4 neon/;
+
+add_proto qw/unsigned int vpx_sad_skip_4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_4x8 neon sse2/;
+
+add_proto qw/unsigned int vpx_sad_skip_4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+specialize qw/vpx_sad_skip_4x4 neon/;
+
#
# Avg
#
@@ -802,19 +852,19 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
specialize qw/vpx_hadamard_32x32 sse2 avx2 neon/;
add_proto qw/void vpx_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
- specialize qw/vpx_highbd_hadamard_8x8 avx2/;
+ specialize qw/vpx_highbd_hadamard_8x8 avx2 neon/;
add_proto qw/void vpx_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
- specialize qw/vpx_highbd_hadamard_16x16 avx2/;
+ specialize qw/vpx_highbd_hadamard_16x16 avx2 neon/;
add_proto qw/void vpx_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff";
- specialize qw/vpx_highbd_hadamard_32x32 avx2/;
+ specialize qw/vpx_highbd_hadamard_32x32 avx2 neon/;
add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length";
specialize qw/vpx_satd avx2 sse2 neon/;
add_proto qw/int vpx_highbd_satd/, "const tran_low_t *coeff, int length";
- specialize qw/vpx_highbd_satd avx2/;
+ specialize qw/vpx_highbd_satd avx2 neon/;
} else {
add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff";
specialize qw/vpx_hadamard_8x8 sse2 neon msa vsx lsx/, "$ssse3_x86_64";
@@ -830,38 +880,37 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") {
}
add_proto qw/void vpx_int_pro_row/, "int16_t hbuf[16], const uint8_t *ref, const int ref_stride, const int height";
- specialize qw/vpx_int_pro_row sse2 neon msa/;
-
+ specialize qw/vpx_int_pro_row neon sse2 msa/;
add_proto qw/int16_t vpx_int_pro_col/, "const uint8_t *ref, const int width";
- specialize qw/vpx_int_pro_col sse2 neon msa/;
+ specialize qw/vpx_int_pro_col neon sse2 msa/;
add_proto qw/int vpx_vector_var/, "const int16_t *ref, const int16_t *src, const int bwl";
specialize qw/vpx_vector_var neon sse2 msa/;
} # CONFIG_VP9_ENCODER
add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad64x64_avg neon avx2 msa sse2 vsx mmi lsx/;
+specialize qw/vpx_sad64x64_avg neon neon_dotprod avx2 msa sse2 vsx mmi lsx/;
add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad64x32_avg neon avx2 msa sse2 vsx mmi/;
+specialize qw/vpx_sad64x32_avg neon neon_dotprod avx2 msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad32x64_avg neon avx2 msa sse2 vsx mmi/;
+specialize qw/vpx_sad32x64_avg neon neon_dotprod avx2 msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad32x32_avg neon avx2 msa sse2 vsx mmi lsx/;
+specialize qw/vpx_sad32x32_avg neon neon_dotprod avx2 msa sse2 vsx mmi lsx/;
add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad32x16_avg neon avx2 msa sse2 vsx mmi/;
+specialize qw/vpx_sad32x16_avg neon neon_dotprod avx2 msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x32_avg neon msa sse2 vsx mmi/;
+specialize qw/vpx_sad16x32_avg neon neon_dotprod msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x16_avg neon msa sse2 vsx mmi/;
+specialize qw/vpx_sad16x16_avg neon neon_dotprod msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
-specialize qw/vpx_sad16x8_avg neon msa sse2 vsx mmi/;
+specialize qw/vpx_sad16x8_avg neon neon_dotprod msa sse2 vsx mmi/;
add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_sad8x16_avg neon msa sse2 mmi/;
@@ -881,45 +930,84 @@ specialize qw/vpx_sad4x4_avg neon msa sse2 mmi/;
#
# Multi-block SAD, comparing a reference to N independent blocks
#
-add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
-specialize qw/vpx_sad64x64x4d avx512 avx2 neon msa sse2 vsx mmi lsx/;
+add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad64x64x4d avx512 avx2 neon neon_dotprod msa sse2 vsx mmi lsx/;
-add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
-specialize qw/vpx_sad64x32x4d neon msa sse2 vsx mmi lsx/;
+add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad64x32x4d neon neon_dotprod msa sse2 vsx mmi lsx/;
-add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
-specialize qw/vpx_sad32x64x4d neon msa sse2 vsx mmi lsx/;
+add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad32x64x4d neon neon_dotprod msa sse2 vsx mmi lsx/;
-add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
-specialize qw/vpx_sad32x32x4d avx2 neon msa sse2 vsx mmi lsx/;
+add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad32x32x4d avx2 neon neon_dotprod msa sse2 vsx mmi lsx/;
-add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
-specialize qw/vpx_sad32x16x4d neon msa sse2 vsx mmi/;
+add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad32x16x4d neon neon_dotprod msa sse2 vsx mmi/;
-add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
-specialize qw/vpx_sad16x32x4d neon msa sse2 vsx mmi/;
+add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad16x32x4d neon neon_dotprod msa sse2 vsx mmi/;
-add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
-specialize qw/vpx_sad16x16x4d neon msa sse2 vsx mmi lsx/;
+add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad16x16x4d neon neon_dotprod msa sse2 vsx mmi lsx/;
-add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
-specialize qw/vpx_sad16x8x4d neon msa sse2 vsx mmi/;
+add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad16x8x4d neon neon_dotprod msa sse2 vsx mmi/;
-add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad8x16x4d neon msa sse2 mmi/;
-add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad8x8x4d neon msa sse2 mmi lsx/;
-add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad8x4x4d neon msa sse2 mmi/;
-add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad4x8x4d neon msa sse2 mmi/;
-add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_sad4x4x4d neon msa sse2 mmi/;
+add_proto qw/void vpx_sad_skip_64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_64x64x4d neon neon_dotprod avx2 sse2/;
+
+add_proto qw/void vpx_sad_skip_64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_64x32x4d neon neon_dotprod avx2 sse2/;
+
+add_proto qw/void vpx_sad_skip_32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_32x64x4d neon neon_dotprod avx2 sse2/;
+
+add_proto qw/void vpx_sad_skip_32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_32x32x4d neon neon_dotprod avx2 sse2/;
+
+add_proto qw/void vpx_sad_skip_32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_32x16x4d neon neon_dotprod avx2 sse2/;
+
+add_proto qw/void vpx_sad_skip_16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_16x32x4d neon neon_dotprod sse2/;
+
+add_proto qw/void vpx_sad_skip_16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_16x16x4d neon neon_dotprod sse2/;
+
+add_proto qw/void vpx_sad_skip_16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_16x8x4d neon neon_dotprod sse2/;
+
+add_proto qw/void vpx_sad_skip_8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_8x16x4d neon sse2/;
+
+add_proto qw/void vpx_sad_skip_8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_8x8x4d neon sse2/;
+
+add_proto qw/void vpx_sad_skip_8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_8x4x4d neon/;
+
+add_proto qw/void vpx_sad_skip_4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_4x8x4d neon sse2/;
+
+add_proto qw/void vpx_sad_skip_4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+specialize qw/vpx_sad_skip_4x4x4d neon/;
+
add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size";
specialize qw/vpx_sum_squares_2d_i16 neon sse2 msa/;
@@ -941,6 +1029,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src8_ptr, ptrdiff_t src_stride, const uint8_t *pred8_ptr, ptrdiff_t pred_stride, int bd";
specialize qw/vpx_highbd_subtract_block neon avx2/;
+ add_proto qw/int64_t/, "vpx_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height";
+ specialize qw/vpx_highbd_sse sse4_1 avx2 neon/;
+
#
# Single block SAD
#
@@ -983,16 +1074,56 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/unsigned int vpx_highbd_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
specialize qw/vpx_highbd_sad4x4 neon/;
+ add_proto qw/unsigned int vpx_highbd_sad_skip_64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_64x64 neon sse2 avx2/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_64x32 neon sse2 avx2/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_32x64 neon sse2 avx2/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_32x32 neon sse2 avx2/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_32x16 neon sse2 avx2/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_16x32 neon sse2 avx2/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_16x16 neon sse2 avx2/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_16x8 neon sse2 avx2/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_8x16 neon sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_8x8 neon sse2/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_8x4 neon/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_4x8 neon/;
+
+ add_proto qw/unsigned int vpx_highbd_sad_skip_4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride";
+ specialize qw/vpx_highbd_sad_skip_4x4 neon/;
+
#
# Avg
#
add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *s8, int p";
- specialize qw/vpx_highbd_avg_8x8 sse2/;
+ specialize qw/vpx_highbd_avg_8x8 sse2 neon/;
add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *s8, int p";
- specialize qw/vpx_highbd_avg_4x4 sse2/;
+ specialize qw/vpx_highbd_avg_4x4 sse2 neon/;
add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s8, int p, const uint8_t *d8, int dp, int *min, int *max";
+ specialize qw/vpx_highbd_minmax_8x8 neon/;
add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred";
specialize qw/vpx_highbd_sad64x64_avg sse2 neon avx2/;
@@ -1036,45 +1167,84 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
#
# Multi-block SAD, comparing a reference to N independent blocks
#
- add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad64x64x4d sse2 neon avx2/;
- add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad64x32x4d sse2 neon avx2/;
- add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad32x64x4d sse2 neon avx2/;
- add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad32x32x4d sse2 neon avx2/;
- add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad32x16x4d sse2 neon avx2/;
- add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad16x32x4d sse2 neon avx2/;
- add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad16x16x4d sse2 neon avx2/;
- add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad16x8x4d sse2 neon avx2/;
- add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad8x16x4d sse2 neon/;
- add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad8x8x4d sse2 neon/;
- add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad8x4x4d sse2 neon/;
- add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad4x8x4d sse2 neon/;
- add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
specialize qw/vpx_highbd_sad4x4x4d sse2 neon/;
+ add_proto qw/void vpx_highbd_sad_skip_64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_64x64x4d neon sse2 avx2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_64x32x4d neon sse2 avx2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_32x64x4d neon sse2 avx2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_32x32x4d neon sse2 avx2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_32x16x4d neon sse2 avx2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_16x32x4d neon sse2 avx2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_16x16x4d neon sse2 avx2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_16x8x4d neon sse2 avx2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_8x16x4d neon sse2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_8x8x4d neon sse2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_8x4x4d neon/;
+
+ add_proto qw/void vpx_highbd_sad_skip_4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_4x8x4d neon sse2/;
+
+ add_proto qw/void vpx_highbd_sad_skip_4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]";
+ specialize qw/vpx_highbd_sad_skip_4x4x4d neon/;
+
#
# Structured Similarity (SSIM)
#
@@ -1090,73 +1260,73 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq "
# Variance
#
add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi vsx lsx/;
+ specialize qw/vpx_variance64x64 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/;
add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi vsx/;
+ specialize qw/vpx_variance64x32 sse2 avx2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x64 sse2 avx2 neon msa mmi vsx/;
+ specialize qw/vpx_variance32x64 sse2 avx2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi vsx lsx/;
+ specialize qw/vpx_variance32x32 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/;
add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi vsx/;
+ specialize qw/vpx_variance32x16 sse2 avx2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x32 sse2 avx2 neon msa mmi vsx/;
+ specialize qw/vpx_variance16x32 sse2 avx2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi vsx lsx/;
+ specialize qw/vpx_variance16x16 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/;
add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance16x8 sse2 avx2 neon msa mmi vsx/;
+ specialize qw/vpx_variance16x8 sse2 avx2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x16 sse2 neon msa mmi vsx/;
+ specialize qw/vpx_variance8x16 sse2 avx2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x8 sse2 neon msa mmi vsx lsx/;
+ specialize qw/vpx_variance8x8 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/;
add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance8x4 sse2 neon msa mmi vsx/;
+ specialize qw/vpx_variance8x4 sse2 avx2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x8 sse2 neon msa mmi vsx/;
+ specialize qw/vpx_variance4x8 sse2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_variance4x4 sse2 neon msa mmi vsx/;
+ specialize qw/vpx_variance4x4 sse2 neon neon_dotprod msa mmi vsx/;
#
# Specialty Variance
#
add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get16x16var sse2 avx2 neon msa vsx lsx/;
+ specialize qw/vpx_get16x16var sse2 avx2 neon neon_dotprod msa vsx lsx/;
add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum";
- specialize qw/vpx_get8x8var sse2 neon msa vsx/;
+ specialize qw/vpx_get8x8var sse2 neon neon_dotprod msa vsx/;
add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi vsx lsx/;
+ specialize qw/vpx_mse16x16 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/;
add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_mse16x8 sse2 avx2 msa mmi vsx/;
+ specialize qw/vpx_mse16x8 sse2 avx2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_mse8x16 sse2 msa mmi vsx/;
+ specialize qw/vpx_mse8x16 sse2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse";
- specialize qw/vpx_mse8x8 sse2 msa mmi vsx/;
+ specialize qw/vpx_mse8x8 sse2 neon neon_dotprod msa mmi vsx/;
add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *";
specialize qw/vpx_get_mb_ss sse2 msa vsx/;
add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride";
- specialize qw/vpx_get4x4sse_cs neon msa vsx/;
+ specialize qw/vpx_get4x4sse_cs neon neon_dotprod msa vsx/;
add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride";
- specialize qw/vpx_comp_avg_pred neon sse2 vsx lsx/;
+ specialize qw/vpx_comp_avg_pred neon sse2 avx2 vsx lsx/;
#
# Subpixel Variance