aboutsummaryrefslogtreecommitdiff
path: root/vpx_dsp/x86/sad4d_sse2.asm
diff options
context:
space:
mode:
Diffstat (limited to 'vpx_dsp/x86/sad4d_sse2.asm')
-rw-r--r--vpx_dsp/x86/sad4d_sse2.asm43
1 files changed, 41 insertions, 2 deletions
diff --git a/vpx_dsp/x86/sad4d_sse2.asm b/vpx_dsp/x86/sad4d_sse2.asm
index 3f6e55ce9..ed4ea3ef9 100644
--- a/vpx_dsp/x86/sad4d_sse2.asm
+++ b/vpx_dsp/x86/sad4d_sse2.asm
@@ -179,7 +179,16 @@ SECTION .text
; uint8_t *ref[4], int ref_stride,
; uint32_t res[4]);
; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4
-%macro SADNXN4D 2
+%macro SADNXN4D 2-3 0
+%if %3 == 1 ; skip rows
+%if UNIX64
+cglobal sad_skip_%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
+ res, ref2, ref3, ref4
+%else
+cglobal sad_skip_%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
+ ref2, ref3, ref4
+%endif
+%else ; normal sad
%if UNIX64
cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
res, ref2, ref3, ref4
@@ -187,6 +196,11 @@ cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \
cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
ref2, ref3, ref4
%endif
+%endif
+%if %3 == 1
+ lea src_strided, [2*src_strided]
+ lea ref_strided, [2*ref_strided]
+%endif
movsxdifnidn src_strideq, src_strided
movsxdifnidn ref_strideq, ref_strided
mov ref2q, [ref1q+gprsize*1]
@@ -195,9 +209,15 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
mov ref1q, [ref1q+gprsize*0]
PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1
-%rep (%2-4)/2
+%if %3 == 1 ; downsample number of rows by 2
+%define num_rep (%2-8)/4
+%else
+%define num_rep (%2-4)/2
+%endif
+%rep num_rep
PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1
%endrep
+%undef num_rep
PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0
%if %1 > 4
@@ -211,12 +231,19 @@ cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \
punpckhqdq m5, m7
movifnidn r4, r4mp
paddd m4, m5
+%if %3 == 1
+ pslld m4, 1
+%endif
movu [r4], m4
RET
%else
movifnidn r4, r4mp
pshufd m6, m6, 0x08
pshufd m7, m7, 0x08
+%if %3 == 1
+ pslld m6, 1
+ pslld m7, 1
+%endif
movq [r4+0], m6
movq [r4+8], m7
RET
@@ -237,3 +264,15 @@ SADNXN4D 8, 8
SADNXN4D 8, 4
SADNXN4D 4, 8
SADNXN4D 4, 4
+
+SADNXN4D 64, 64, 1
+SADNXN4D 64, 32, 1
+SADNXN4D 32, 64, 1
+SADNXN4D 32, 32, 1
+SADNXN4D 32, 16, 1
+SADNXN4D 16, 32, 1
+SADNXN4D 16, 16, 1
+SADNXN4D 16, 8, 1
+SADNXN4D 8, 16, 1
+SADNXN4D 8, 8, 1
+SADNXN4D 4, 8, 1