diff options
Diffstat (limited to 'vp9/encoder/x86/highbd_temporal_filter_sse4.c')
-rw-r--r-- | vp9/encoder/x86/highbd_temporal_filter_sse4.c | 7 |
1 files changed, 4 insertions, 3 deletions
diff --git a/vp9/encoder/x86/highbd_temporal_filter_sse4.c b/vp9/encoder/x86/highbd_temporal_filter_sse4.c index a7f5117cf..97f182c66 100644 --- a/vp9/encoder/x86/highbd_temporal_filter_sse4.c +++ b/vp9/encoder/x86/highbd_temporal_filter_sse4.c @@ -16,7 +16,7 @@ #include "vpx/vpx_integer.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_temporal_filter.h" -#include "vp9/encoder/x86/temporal_filter_constants.h" +#include "vp9/encoder/vp9_temporal_filter_constants.h" // Compute (a-b)**2 for 8 pixels with size 16-bit static INLINE void highbd_store_dist_8(const uint16_t *a, const uint16_t *b, @@ -141,11 +141,12 @@ static INLINE void highbd_accumulate_and_store_8(const __m128i sum_first_u32, count_u16 = _mm_adds_epu16(count_u16, sum_u16); _mm_storeu_si128((__m128i *)count, count_u16); - pred_u16 = _mm_mullo_epi16(sum_u16, pred_u16); - pred_0_u32 = _mm_cvtepu16_epi32(pred_u16); pred_1_u32 = _mm_unpackhi_epi16(pred_u16, zero); + pred_0_u32 = _mm_mullo_epi32(sum_first_u32, pred_0_u32); + pred_1_u32 = _mm_mullo_epi32(sum_second_u32, pred_1_u32); + accum_0_u32 = _mm_loadu_si128((const __m128i *)accumulator); accum_1_u32 = _mm_loadu_si128((const __m128i *)(accumulator + 4)); |