diff options
Diffstat (limited to 'av1/common/x86/convolve_sse2.c')
-rw-r--r-- | av1/common/x86/convolve_sse2.c | 30 |
1 files changed, 15 insertions, 15 deletions
diff --git a/av1/common/x86/convolve_sse2.c b/av1/common/x86/convolve_sse2.c index 0e7782250..012e75c1a 100644 --- a/av1/common/x86/convolve_sse2.c +++ b/av1/common/x86/convolve_sse2.c @@ -200,31 +200,31 @@ void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst, if (w <= 4) { __m128i s[8], src6, res, res_round, res16; int res_int; - src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride)); + src6 = _mm_cvtsi32_si128(*(int *)(src_ptr + 6 * src_stride)); s[0] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride))); + _mm_cvtsi32_si128(*(int *)(src_ptr + 0 * src_stride)), + _mm_cvtsi32_si128(*(int *)(src_ptr + 1 * src_stride))); s[1] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride))); + _mm_cvtsi32_si128(*(int *)(src_ptr + 1 * src_stride)), + _mm_cvtsi32_si128(*(int *)(src_ptr + 2 * src_stride))); s[2] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride))); + _mm_cvtsi32_si128(*(int *)(src_ptr + 2 * src_stride)), + _mm_cvtsi32_si128(*(int *)(src_ptr + 3 * src_stride))); s[3] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride))); + _mm_cvtsi32_si128(*(int *)(src_ptr + 3 * src_stride)), + _mm_cvtsi32_si128(*(int *)(src_ptr + 4 * src_stride))); s[4] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)), - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride))); + _mm_cvtsi32_si128(*(int *)(src_ptr + 4 * src_stride)), + _mm_cvtsi32_si128(*(int *)(src_ptr + 5 * src_stride))); s[5] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6); + _mm_cvtsi32_si128(*(int *)(src_ptr + 5 * src_stride)), src6); do { s[6] = _mm_unpacklo_epi8( - src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride))); - src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride)); + src6, _mm_cvtsi32_si128(*(int *)(src_ptr + 7 * src_stride))); + src6 = _mm_cvtsi32_si128(*(int *)(src_ptr + 8 * src_stride)); s[7] = _mm_unpacklo_epi8( - _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6); + _mm_cvtsi32_si128(*(int *)(src_ptr + 7 * src_stride)), src6); res = convolve_lo_y(s + 0, coeffs); res_round = _mm_sra_epi32(_mm_add_epi32(res, round_const), round_shift); |