diff options
Diffstat (limited to 'vpx_dsp/vpx_dsp_rtcd_defs.pl')
-rw-r--r-- | vpx_dsp/vpx_dsp_rtcd_defs.pl | 414 |
1 files changed, 292 insertions, 122 deletions
diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.pl b/vpx_dsp/vpx_dsp_rtcd_defs.pl index 8725821b6..e9d63f6ef 100644 --- a/vpx_dsp/vpx_dsp_rtcd_defs.pl +++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl @@ -17,6 +17,10 @@ print <<EOF #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" +#if CONFIG_VP9_ENCODER + struct macroblock_plane; + struct ScanOrder; +#endif EOF } @@ -38,7 +42,7 @@ if ($opts{arch} eq "x86_64") { # add_proto qw/void vpx_d207_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d207_predictor_4x4 sse2/; +specialize qw/vpx_d207_predictor_4x4 neon sse2/; add_proto qw/void vpx_d45_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45_predictor_4x4 neon sse2/; @@ -46,7 +50,7 @@ specialize qw/vpx_d45_predictor_4x4 neon sse2/; add_proto qw/void vpx_d45e_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; add_proto qw/void vpx_d63_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d63_predictor_4x4 ssse3/; +specialize qw/vpx_d63_predictor_4x4 neon ssse3/; add_proto qw/void vpx_d63e_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; @@ -57,12 +61,13 @@ specialize qw/vpx_h_predictor_4x4 neon dspr2 msa sse2/; add_proto qw/void vpx_he_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; add_proto qw/void vpx_d117_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_4x4 neon/; add_proto qw/void vpx_d135_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d135_predictor_4x4 neon/; add_proto qw/void vpx_d153_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d153_predictor_4x4 ssse3/; +specialize qw/vpx_d153_predictor_4x4 neon ssse3/; add_proto qw/void vpx_v_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_v_predictor_4x4 neon msa sse2/; @@ -86,7 +91,7 @@ add_proto qw/void vpx_dc_128_predictor_4x4/, "uint8_t *dst, ptrdiff_t stride, co specialize qw/vpx_dc_128_predictor_4x4 msa neon sse2/; add_proto qw/void vpx_d207_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d207_predictor_8x8 ssse3/; +specialize qw/vpx_d207_predictor_8x8 neon ssse3/; add_proto qw/void vpx_d45_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; # TODO(crbug.com/webm/1522): Re-enable vsx implementation. @@ -94,19 +99,20 @@ specialize qw/vpx_d45_predictor_8x8 neon sse2/; add_proto qw/void vpx_d63_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; # TODO(crbug.com/webm/1522): Re-enable vsx implementation. -specialize qw/vpx_d63_predictor_8x8 ssse3/; +specialize qw/vpx_d63_predictor_8x8 neon ssse3/; add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; # TODO(crbug.com/webm/1522): Re-enable vsx implementation. specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2/; add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_8x8 neon/; add_proto qw/void vpx_d135_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d135_predictor_8x8 neon/; add_proto qw/void vpx_d153_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d153_predictor_8x8 ssse3/; +specialize qw/vpx_d153_predictor_8x8 neon ssse3/; add_proto qw/void vpx_v_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_v_predictor_8x8 neon msa sse2/; @@ -129,24 +135,25 @@ add_proto qw/void vpx_dc_128_predictor_8x8/, "uint8_t *dst, ptrdiff_t stride, co specialize qw/vpx_dc_128_predictor_8x8 neon msa sse2/; add_proto qw/void vpx_d207_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d207_predictor_16x16 ssse3/; +specialize qw/vpx_d207_predictor_16x16 neon ssse3/; add_proto qw/void vpx_d45_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45_predictor_16x16 neon ssse3 vsx/; add_proto qw/void vpx_d63_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d63_predictor_16x16 ssse3 vsx/; +specialize qw/vpx_d63_predictor_16x16 neon ssse3 vsx/; add_proto qw/void vpx_h_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_h_predictor_16x16 neon dspr2 msa sse2 vsx/; add_proto qw/void vpx_d117_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_16x16 neon/; add_proto qw/void vpx_d135_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d135_predictor_16x16 neon/; add_proto qw/void vpx_d153_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d153_predictor_16x16 ssse3/; +specialize qw/vpx_d153_predictor_16x16 neon ssse3/; add_proto qw/void vpx_v_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_v_predictor_16x16 neon msa sse2 vsx/; @@ -167,24 +174,25 @@ add_proto qw/void vpx_dc_128_predictor_16x16/, "uint8_t *dst, ptrdiff_t stride, specialize qw/vpx_dc_128_predictor_16x16 neon msa sse2 vsx/; add_proto qw/void vpx_d207_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d207_predictor_32x32 ssse3/; +specialize qw/vpx_d207_predictor_32x32 neon ssse3/; add_proto qw/void vpx_d45_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d45_predictor_32x32 neon ssse3 vsx/; add_proto qw/void vpx_d63_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d63_predictor_32x32 ssse3 vsx/; +specialize qw/vpx_d63_predictor_32x32 neon ssse3 vsx/; add_proto qw/void vpx_h_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_h_predictor_32x32 neon msa sse2 vsx/; add_proto qw/void vpx_d117_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; +specialize qw/vpx_d117_predictor_32x32 neon/; add_proto qw/void vpx_d135_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_d135_predictor_32x32 neon/; add_proto qw/void vpx_d153_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; -specialize qw/vpx_d153_predictor_32x32 ssse3/; +specialize qw/vpx_d153_predictor_32x32 neon ssse3/; add_proto qw/void vpx_v_predictor_32x32/, "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left"; specialize qw/vpx_v_predictor_32x32 neon msa sse2 vsx/; @@ -207,25 +215,25 @@ specialize qw/vpx_dc_128_predictor_32x32 msa neon sse2 vsx/; # High bitdepth functions if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_d207_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_4x4 sse2/; + specialize qw/vpx_highbd_d207_predictor_4x4 neon sse2/; add_proto qw/void vpx_highbd_d45_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d45_predictor_4x4 neon ssse3/; add_proto qw/void vpx_highbd_d63_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_4x4 sse2/; + specialize qw/vpx_highbd_d63_predictor_4x4 neon sse2/; add_proto qw/void vpx_highbd_h_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_h_predictor_4x4 neon sse2/; add_proto qw/void vpx_highbd_d117_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_4x4 sse2/; + specialize qw/vpx_highbd_d117_predictor_4x4 neon sse2/; add_proto qw/void vpx_highbd_d135_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d135_predictor_4x4 neon sse2/; add_proto qw/void vpx_highbd_d153_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_4x4 sse2/; + specialize qw/vpx_highbd_d153_predictor_4x4 neon sse2/; add_proto qw/void vpx_highbd_v_predictor_4x4/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_v_predictor_4x4 neon sse2/; @@ -246,25 +254,25 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_dc_128_predictor_4x4 neon sse2/; add_proto qw/void vpx_highbd_d207_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_8x8 ssse3/; + specialize qw/vpx_highbd_d207_predictor_8x8 neon ssse3/; add_proto qw/void vpx_highbd_d45_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d45_predictor_8x8 neon ssse3/; add_proto qw/void vpx_highbd_d63_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_8x8 ssse3/; + specialize qw/vpx_highbd_d63_predictor_8x8 neon ssse3/; add_proto qw/void vpx_highbd_h_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_h_predictor_8x8 neon sse2/; add_proto qw/void vpx_highbd_d117_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_8x8 ssse3/; + specialize qw/vpx_highbd_d117_predictor_8x8 neon ssse3/; add_proto qw/void vpx_highbd_d135_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d135_predictor_8x8 neon ssse3/; add_proto qw/void vpx_highbd_d153_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_8x8 ssse3/; + specialize qw/vpx_highbd_d153_predictor_8x8 neon ssse3/; add_proto qw/void vpx_highbd_v_predictor_8x8/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_v_predictor_8x8 neon sse2/; @@ -285,25 +293,25 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_dc_128_predictor_8x8 neon sse2/; add_proto qw/void vpx_highbd_d207_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_16x16 ssse3/; + specialize qw/vpx_highbd_d207_predictor_16x16 neon ssse3/; add_proto qw/void vpx_highbd_d45_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d45_predictor_16x16 neon ssse3/; add_proto qw/void vpx_highbd_d63_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_16x16 ssse3/; + specialize qw/vpx_highbd_d63_predictor_16x16 neon ssse3/; add_proto qw/void vpx_highbd_h_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_h_predictor_16x16 neon sse2/; add_proto qw/void vpx_highbd_d117_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_16x16 ssse3/; + specialize qw/vpx_highbd_d117_predictor_16x16 neon ssse3/; add_proto qw/void vpx_highbd_d135_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d135_predictor_16x16 neon ssse3/; add_proto qw/void vpx_highbd_d153_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_16x16 ssse3/; + specialize qw/vpx_highbd_d153_predictor_16x16 neon ssse3/; add_proto qw/void vpx_highbd_v_predictor_16x16/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_v_predictor_16x16 neon sse2/; @@ -324,25 +332,25 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_highbd_dc_128_predictor_16x16 neon sse2/; add_proto qw/void vpx_highbd_d207_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d207_predictor_32x32 ssse3/; + specialize qw/vpx_highbd_d207_predictor_32x32 neon ssse3/; add_proto qw/void vpx_highbd_d45_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d45_predictor_32x32 neon ssse3/; add_proto qw/void vpx_highbd_d63_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d63_predictor_32x32 ssse3/; + specialize qw/vpx_highbd_d63_predictor_32x32 neon ssse3/; add_proto qw/void vpx_highbd_h_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_h_predictor_32x32 neon sse2/; add_proto qw/void vpx_highbd_d117_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d117_predictor_32x32 ssse3/; + specialize qw/vpx_highbd_d117_predictor_32x32 neon ssse3/; add_proto qw/void vpx_highbd_d135_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_d135_predictor_32x32 neon ssse3/; add_proto qw/void vpx_highbd_d153_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; - specialize qw/vpx_highbd_d153_predictor_32x32 ssse3/; + specialize qw/vpx_highbd_d153_predictor_32x32 neon ssse3/; add_proto qw/void vpx_highbd_v_predictor_32x32/, "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd"; specialize qw/vpx_highbd_v_predictor_32x32 neon sse2/; @@ -374,22 +382,22 @@ add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx mmi lsx/; add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/; +specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/; add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/; +specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/; add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/; +specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/; add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_avg sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/; +specialize qw/vpx_convolve8_avg sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/; add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/; +specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/; add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; -specialize qw/vpx_convolve8_avg_vert sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx/; +specialize qw/vpx_convolve8_avg_vert sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx/; add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h"; specialize qw/vpx_scaled_2d ssse3 neon msa/; @@ -589,7 +597,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vpx_fdct8x8_1 sse2 neon msa/; add_proto qw/void vpx_fdct16x16/, "const int16_t *input, tran_low_t *output, int stride"; - specialize qw/vpx_fdct16x16 neon sse2 msa lsx/; + specialize qw/vpx_fdct16x16 neon sse2 avx2 msa lsx/; add_proto qw/void vpx_fdct16x16_1/, "const int16_t *input, tran_low_t *output, int stride"; specialize qw/vpx_fdct16x16_1 sse2 neon msa/; @@ -633,12 +641,12 @@ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") { specialize qw/vpx_idct8x8_64_add neon sse2 vsx/; specialize qw/vpx_idct8x8_12_add neon sse2 ssse3/; specialize qw/vpx_idct8x8_1_add neon sse2/; - specialize qw/vpx_idct16x16_256_add neon sse2 vsx/; + specialize qw/vpx_idct16x16_256_add neon sse2 avx2 vsx/; specialize qw/vpx_idct16x16_38_add neon sse2/; specialize qw/vpx_idct16x16_10_add neon sse2/; specialize qw/vpx_idct16x16_1_add neon sse2/; - specialize qw/vpx_idct32x32_1024_add neon sse2 vsx/; - specialize qw/vpx_idct32x32_135_add neon sse2 ssse3/; + specialize qw/vpx_idct32x32_1024_add neon sse2 avx2 vsx/; + specialize qw/vpx_idct32x32_135_add neon sse2 ssse3 avx2/; specialize qw/vpx_idct32x32_34_add neon sse2 ssse3/; specialize qw/vpx_idct32x32_1_add neon sse2/; specialize qw/vpx_iwht4x4_16_add sse2 vsx/; @@ -714,17 +722,17 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # Quantization # if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { - add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vpx_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order"; specialize qw/vpx_quantize_b neon sse2 ssse3 avx avx2 vsx lsx/; - add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vpx_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order"; specialize qw/vpx_quantize_b_32x32 neon ssse3 avx avx2 vsx lsx/; if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { - add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vpx_highbd_quantize_b/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order"; specialize qw/vpx_highbd_quantize_b neon sse2 avx2/; - add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; + add_proto qw/void vpx_highbd_quantize_b_32x32/, "const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order"; specialize qw/vpx_highbd_quantize_b_32x32 neon sse2 avx2/; } # CONFIG_VP9_HIGHBITDEPTH } # CONFIG_VP9_ENCODER @@ -736,32 +744,35 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes") { add_proto qw/void vpx_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride"; specialize qw/vpx_subtract_block neon msa mmi sse2 avx2 vsx lsx/; +add_proto qw/int64_t/, "vpx_sse", "const uint8_t *a, int a_stride, const uint8_t *b,int b_stride, int width, int height"; +specialize qw/vpx_sse sse4_1 avx2 neon neon_dotprod/; + # # Single block SAD # add_proto qw/unsigned int vpx_sad64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad64x64 neon avx2 msa sse2 vsx mmi lsx/; +specialize qw/vpx_sad64x64 neon neon_dotprod avx2 msa sse2 vsx mmi lsx/; add_proto qw/unsigned int vpx_sad64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad64x32 neon avx2 msa sse2 vsx mmi/; +specialize qw/vpx_sad64x32 neon neon_dotprod avx2 msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad32x64 neon avx2 msa sse2 vsx mmi/; +specialize qw/vpx_sad32x64 neon neon_dotprod avx2 msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad32x32 neon avx2 msa sse2 vsx mmi lsx/; +specialize qw/vpx_sad32x32 neon neon_dotprod avx2 msa sse2 vsx mmi lsx/; add_proto qw/unsigned int vpx_sad32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad32x16 neon avx2 msa sse2 vsx mmi/; +specialize qw/vpx_sad32x16 neon neon_dotprod avx2 msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x32 neon msa sse2 vsx mmi/; +specialize qw/vpx_sad16x32 neon neon_dotprod msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x16 neon msa sse2 vsx mmi lsx/; +specialize qw/vpx_sad16x16 neon neon_dotprod msa sse2 vsx mmi lsx/; add_proto qw/unsigned int vpx_sad16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; -specialize qw/vpx_sad16x8 neon msa sse2 vsx mmi/; +specialize qw/vpx_sad16x8 neon neon_dotprod msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vpx_sad8x16 neon msa sse2 vsx mmi/; @@ -778,6 +789,45 @@ specialize qw/vpx_sad4x8 neon msa sse2 mmi/; add_proto qw/unsigned int vpx_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vpx_sad4x4 neon msa sse2 mmi/; +add_proto qw/unsigned int vpx_sad_skip_64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_64x64 neon neon_dotprod avx2 sse2/; + +add_proto qw/unsigned int vpx_sad_skip_64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_64x32 neon neon_dotprod avx2 sse2/; + +add_proto qw/unsigned int vpx_sad_skip_32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_32x64 neon neon_dotprod avx2 sse2/; + +add_proto qw/unsigned int vpx_sad_skip_32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_32x32 neon neon_dotprod avx2 sse2/; + +add_proto qw/unsigned int vpx_sad_skip_32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_32x16 neon neon_dotprod avx2 sse2/; + +add_proto qw/unsigned int vpx_sad_skip_16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_16x32 neon neon_dotprod sse2/; + +add_proto qw/unsigned int vpx_sad_skip_16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_16x16 neon neon_dotprod sse2/; + +add_proto qw/unsigned int vpx_sad_skip_16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_16x8 neon neon_dotprod sse2/; + +add_proto qw/unsigned int vpx_sad_skip_8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_8x16 neon sse2/; + +add_proto qw/unsigned int vpx_sad_skip_8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_8x8 neon sse2/; + +add_proto qw/unsigned int vpx_sad_skip_8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_8x4 neon/; + +add_proto qw/unsigned int vpx_sad_skip_4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_4x8 neon sse2/; + +add_proto qw/unsigned int vpx_sad_skip_4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; +specialize qw/vpx_sad_skip_4x4 neon/; + # # Avg # @@ -802,19 +852,19 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { specialize qw/vpx_hadamard_32x32 sse2 avx2 neon/; add_proto qw/void vpx_highbd_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; - specialize qw/vpx_highbd_hadamard_8x8 avx2/; + specialize qw/vpx_highbd_hadamard_8x8 avx2 neon/; add_proto qw/void vpx_highbd_hadamard_16x16/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; - specialize qw/vpx_highbd_hadamard_16x16 avx2/; + specialize qw/vpx_highbd_hadamard_16x16 avx2 neon/; add_proto qw/void vpx_highbd_hadamard_32x32/, "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff"; - specialize qw/vpx_highbd_hadamard_32x32 avx2/; + specialize qw/vpx_highbd_hadamard_32x32 avx2 neon/; add_proto qw/int vpx_satd/, "const tran_low_t *coeff, int length"; specialize qw/vpx_satd avx2 sse2 neon/; add_proto qw/int vpx_highbd_satd/, "const tran_low_t *coeff, int length"; - specialize qw/vpx_highbd_satd avx2/; + specialize qw/vpx_highbd_satd avx2 neon/; } else { add_proto qw/void vpx_hadamard_8x8/, "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff"; specialize qw/vpx_hadamard_8x8 sse2 neon msa vsx lsx/, "$ssse3_x86_64"; @@ -830,38 +880,37 @@ if (vpx_config("CONFIG_VP9_ENCODER") eq "yes") { } add_proto qw/void vpx_int_pro_row/, "int16_t hbuf[16], const uint8_t *ref, const int ref_stride, const int height"; - specialize qw/vpx_int_pro_row sse2 neon msa/; - + specialize qw/vpx_int_pro_row neon sse2 msa/; add_proto qw/int16_t vpx_int_pro_col/, "const uint8_t *ref, const int width"; - specialize qw/vpx_int_pro_col sse2 neon msa/; + specialize qw/vpx_int_pro_col neon sse2 msa/; add_proto qw/int vpx_vector_var/, "const int16_t *ref, const int16_t *src, const int bwl"; specialize qw/vpx_vector_var neon sse2 msa/; } # CONFIG_VP9_ENCODER add_proto qw/unsigned int vpx_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad64x64_avg neon avx2 msa sse2 vsx mmi lsx/; +specialize qw/vpx_sad64x64_avg neon neon_dotprod avx2 msa sse2 vsx mmi lsx/; add_proto qw/unsigned int vpx_sad64x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad64x32_avg neon avx2 msa sse2 vsx mmi/; +specialize qw/vpx_sad64x32_avg neon neon_dotprod avx2 msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad32x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad32x64_avg neon avx2 msa sse2 vsx mmi/; +specialize qw/vpx_sad32x64_avg neon neon_dotprod avx2 msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad32x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad32x32_avg neon avx2 msa sse2 vsx mmi lsx/; +specialize qw/vpx_sad32x32_avg neon neon_dotprod avx2 msa sse2 vsx mmi lsx/; add_proto qw/unsigned int vpx_sad32x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad32x16_avg neon avx2 msa sse2 vsx mmi/; +specialize qw/vpx_sad32x16_avg neon neon_dotprod avx2 msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad16x32_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad16x32_avg neon msa sse2 vsx mmi/; +specialize qw/vpx_sad16x32_avg neon neon_dotprod msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad16x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad16x16_avg neon msa sse2 vsx mmi/; +specialize qw/vpx_sad16x16_avg neon neon_dotprod msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad16x8_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; -specialize qw/vpx_sad16x8_avg neon msa sse2 vsx mmi/; +specialize qw/vpx_sad16x8_avg neon neon_dotprod msa sse2 vsx mmi/; add_proto qw/unsigned int vpx_sad8x16_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vpx_sad8x16_avg neon msa sse2 mmi/; @@ -881,45 +930,84 @@ specialize qw/vpx_sad4x4_avg neon msa sse2 mmi/; # # Multi-block SAD, comparing a reference to N independent blocks # -add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad64x64x4d avx512 avx2 neon msa sse2 vsx mmi lsx/; +add_proto qw/void vpx_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad64x64x4d avx512 avx2 neon neon_dotprod msa sse2 vsx mmi lsx/; -add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad64x32x4d neon msa sse2 vsx mmi lsx/; +add_proto qw/void vpx_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad64x32x4d neon neon_dotprod msa sse2 vsx mmi lsx/; -add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad32x64x4d neon msa sse2 vsx mmi lsx/; +add_proto qw/void vpx_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad32x64x4d neon neon_dotprod msa sse2 vsx mmi lsx/; -add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad32x32x4d avx2 neon msa sse2 vsx mmi lsx/; +add_proto qw/void vpx_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad32x32x4d avx2 neon neon_dotprod msa sse2 vsx mmi lsx/; -add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad32x16x4d neon msa sse2 vsx mmi/; +add_proto qw/void vpx_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad32x16x4d neon neon_dotprod msa sse2 vsx mmi/; -add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad16x32x4d neon msa sse2 vsx mmi/; +add_proto qw/void vpx_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad16x32x4d neon neon_dotprod msa sse2 vsx mmi/; -add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad16x16x4d neon msa sse2 vsx mmi lsx/; +add_proto qw/void vpx_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad16x16x4d neon neon_dotprod msa sse2 vsx mmi lsx/; -add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; -specialize qw/vpx_sad16x8x4d neon msa sse2 vsx mmi/; +add_proto qw/void vpx_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad16x8x4d neon neon_dotprod msa sse2 vsx mmi/; -add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +add_proto qw/void vpx_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_sad8x16x4d neon msa sse2 mmi/; -add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +add_proto qw/void vpx_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_sad8x8x4d neon msa sse2 mmi lsx/; -add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +add_proto qw/void vpx_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_sad8x4x4d neon msa sse2 mmi/; -add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +add_proto qw/void vpx_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_sad4x8x4d neon msa sse2 mmi/; -add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +add_proto qw/void vpx_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_sad4x4x4d neon msa sse2 mmi/; +add_proto qw/void vpx_sad_skip_64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_64x64x4d neon neon_dotprod avx2 sse2/; + +add_proto qw/void vpx_sad_skip_64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_64x32x4d neon neon_dotprod avx2 sse2/; + +add_proto qw/void vpx_sad_skip_32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_32x64x4d neon neon_dotprod avx2 sse2/; + +add_proto qw/void vpx_sad_skip_32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_32x32x4d neon neon_dotprod avx2 sse2/; + +add_proto qw/void vpx_sad_skip_32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_32x16x4d neon neon_dotprod avx2 sse2/; + +add_proto qw/void vpx_sad_skip_16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_16x32x4d neon neon_dotprod sse2/; + +add_proto qw/void vpx_sad_skip_16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_16x16x4d neon neon_dotprod sse2/; + +add_proto qw/void vpx_sad_skip_16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_16x8x4d neon neon_dotprod sse2/; + +add_proto qw/void vpx_sad_skip_8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_8x16x4d neon sse2/; + +add_proto qw/void vpx_sad_skip_8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_8x8x4d neon sse2/; + +add_proto qw/void vpx_sad_skip_8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_8x4x4d neon/; + +add_proto qw/void vpx_sad_skip_4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_4x8x4d neon sse2/; + +add_proto qw/void vpx_sad_skip_4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; +specialize qw/vpx_sad_skip_4x4x4d neon/; + add_proto qw/uint64_t vpx_sum_squares_2d_i16/, "const int16_t *src, int stride, int size"; specialize qw/vpx_sum_squares_2d_i16 neon sse2 msa/; @@ -941,6 +1029,9 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/void vpx_highbd_subtract_block/, "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src8_ptr, ptrdiff_t src_stride, const uint8_t *pred8_ptr, ptrdiff_t pred_stride, int bd"; specialize qw/vpx_highbd_subtract_block neon avx2/; + add_proto qw/int64_t/, "vpx_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height"; + specialize qw/vpx_highbd_sse sse4_1 avx2 neon/; + # # Single block SAD # @@ -983,16 +1074,56 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { add_proto qw/unsigned int vpx_highbd_sad4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; specialize qw/vpx_highbd_sad4x4 neon/; + add_proto qw/unsigned int vpx_highbd_sad_skip_64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_64x64 neon sse2 avx2/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_64x32 neon sse2 avx2/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_32x64 neon sse2 avx2/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_32x32 neon sse2 avx2/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_32x16 neon sse2 avx2/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_16x32 neon sse2 avx2/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_16x16 neon sse2 avx2/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_16x8 neon sse2 avx2/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_8x16 neon sse2/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_8x8 neon sse2/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_8x4 neon/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_4x8 neon/; + + add_proto qw/unsigned int vpx_highbd_sad_skip_4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride"; + specialize qw/vpx_highbd_sad_skip_4x4 neon/; + # # Avg # add_proto qw/unsigned int vpx_highbd_avg_8x8/, "const uint8_t *s8, int p"; - specialize qw/vpx_highbd_avg_8x8 sse2/; + specialize qw/vpx_highbd_avg_8x8 sse2 neon/; add_proto qw/unsigned int vpx_highbd_avg_4x4/, "const uint8_t *s8, int p"; - specialize qw/vpx_highbd_avg_4x4 sse2/; + specialize qw/vpx_highbd_avg_4x4 sse2 neon/; add_proto qw/void vpx_highbd_minmax_8x8/, "const uint8_t *s8, int p, const uint8_t *d8, int dp, int *min, int *max"; + specialize qw/vpx_highbd_minmax_8x8 neon/; add_proto qw/unsigned int vpx_highbd_sad64x64_avg/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred"; specialize qw/vpx_highbd_sad64x64_avg sse2 neon avx2/; @@ -1036,45 +1167,84 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { # # Multi-block SAD, comparing a reference to N independent blocks # - add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad64x64x4d sse2 neon avx2/; - add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad64x32x4d sse2 neon avx2/; - add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad32x64x4d sse2 neon avx2/; - add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad32x32x4d sse2 neon avx2/; - add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad32x16x4d sse2 neon avx2/; - add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad16x32x4d sse2 neon avx2/; - add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad16x16x4d sse2 neon avx2/; - add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad16x8x4d sse2 neon avx2/; - add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad8x16x4d sse2 neon/; - add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad8x8x4d sse2 neon/; - add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad8x4x4d sse2 neon/; - add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad4x8x4d sse2 neon/; - add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + add_proto qw/void vpx_highbd_sad4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; specialize qw/vpx_highbd_sad4x4x4d sse2 neon/; + add_proto qw/void vpx_highbd_sad_skip_64x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_64x64x4d neon sse2 avx2/; + + add_proto qw/void vpx_highbd_sad_skip_64x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_64x32x4d neon sse2 avx2/; + + add_proto qw/void vpx_highbd_sad_skip_32x64x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_32x64x4d neon sse2 avx2/; + + add_proto qw/void vpx_highbd_sad_skip_32x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_32x32x4d neon sse2 avx2/; + + add_proto qw/void vpx_highbd_sad_skip_32x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_32x16x4d neon sse2 avx2/; + + add_proto qw/void vpx_highbd_sad_skip_16x32x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_16x32x4d neon sse2 avx2/; + + add_proto qw/void vpx_highbd_sad_skip_16x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_16x16x4d neon sse2 avx2/; + + add_proto qw/void vpx_highbd_sad_skip_16x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_16x8x4d neon sse2 avx2/; + + add_proto qw/void vpx_highbd_sad_skip_8x16x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_8x16x4d neon sse2/; + + add_proto qw/void vpx_highbd_sad_skip_8x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_8x8x4d neon sse2/; + + add_proto qw/void vpx_highbd_sad_skip_8x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_8x4x4d neon/; + + add_proto qw/void vpx_highbd_sad_skip_4x8x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_4x8x4d neon sse2/; + + add_proto qw/void vpx_highbd_sad_skip_4x4x4d/, "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]"; + specialize qw/vpx_highbd_sad_skip_4x4x4d neon/; + # # Structured Similarity (SSIM) # @@ -1090,73 +1260,73 @@ if (vpx_config("CONFIG_ENCODERS") eq "yes" || vpx_config("CONFIG_POSTPROC") eq " # Variance # add_proto qw/unsigned int vpx_variance64x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance64x64 sse2 avx2 neon msa mmi vsx lsx/; + specialize qw/vpx_variance64x64 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/; add_proto qw/unsigned int vpx_variance64x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance64x32 sse2 avx2 neon msa mmi vsx/; + specialize qw/vpx_variance64x32 sse2 avx2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_variance32x64/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x64 sse2 avx2 neon msa mmi vsx/; + specialize qw/vpx_variance32x64 sse2 avx2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_variance32x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x32 sse2 avx2 neon msa mmi vsx lsx/; + specialize qw/vpx_variance32x32 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/; add_proto qw/unsigned int vpx_variance32x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance32x16 sse2 avx2 neon msa mmi vsx/; + specialize qw/vpx_variance32x16 sse2 avx2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_variance16x32/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x32 sse2 avx2 neon msa mmi vsx/; + specialize qw/vpx_variance16x32 sse2 avx2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_variance16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x16 sse2 avx2 neon msa mmi vsx lsx/; + specialize qw/vpx_variance16x16 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/; add_proto qw/unsigned int vpx_variance16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance16x8 sse2 avx2 neon msa mmi vsx/; + specialize qw/vpx_variance16x8 sse2 avx2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_variance8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x16 sse2 neon msa mmi vsx/; + specialize qw/vpx_variance8x16 sse2 avx2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_variance8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x8 sse2 neon msa mmi vsx lsx/; + specialize qw/vpx_variance8x8 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/; add_proto qw/unsigned int vpx_variance8x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance8x4 sse2 neon msa mmi vsx/; + specialize qw/vpx_variance8x4 sse2 avx2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_variance4x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance4x8 sse2 neon msa mmi vsx/; + specialize qw/vpx_variance4x8 sse2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_variance4x4/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_variance4x4 sse2 neon msa mmi vsx/; + specialize qw/vpx_variance4x4 sse2 neon neon_dotprod msa mmi vsx/; # # Specialty Variance # add_proto qw/void vpx_get16x16var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get16x16var sse2 avx2 neon msa vsx lsx/; + specialize qw/vpx_get16x16var sse2 avx2 neon neon_dotprod msa vsx lsx/; add_proto qw/void vpx_get8x8var/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum"; - specialize qw/vpx_get8x8var sse2 neon msa vsx/; + specialize qw/vpx_get8x8var sse2 neon neon_dotprod msa vsx/; add_proto qw/unsigned int vpx_mse16x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_mse16x16 sse2 avx2 neon msa mmi vsx lsx/; + specialize qw/vpx_mse16x16 sse2 avx2 neon neon_dotprod msa mmi vsx lsx/; add_proto qw/unsigned int vpx_mse16x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_mse16x8 sse2 avx2 msa mmi vsx/; + specialize qw/vpx_mse16x8 sse2 avx2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_mse8x16/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_mse8x16 sse2 msa mmi vsx/; + specialize qw/vpx_mse8x16 sse2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_mse8x8/, "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse"; - specialize qw/vpx_mse8x8 sse2 msa mmi vsx/; + specialize qw/vpx_mse8x8 sse2 neon neon_dotprod msa mmi vsx/; add_proto qw/unsigned int vpx_get_mb_ss/, "const int16_t *"; specialize qw/vpx_get_mb_ss sse2 msa vsx/; add_proto qw/unsigned int vpx_get4x4sse_cs/, "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride"; - specialize qw/vpx_get4x4sse_cs neon msa vsx/; + specialize qw/vpx_get4x4sse_cs neon neon_dotprod msa vsx/; add_proto qw/void vpx_comp_avg_pred/, "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride"; - specialize qw/vpx_comp_avg_pred neon sse2 vsx lsx/; + specialize qw/vpx_comp_avg_pred neon sse2 avx2 vsx lsx/; # # Subpixel Variance |