aboutsummaryrefslogtreecommitdiff
path: root/av1/common/x86/convolve_sse2.c
diff options
context:
space:
mode:
Diffstat (limited to 'av1/common/x86/convolve_sse2.c')
-rw-r--r--av1/common/x86/convolve_sse2.c30
1 files changed, 15 insertions, 15 deletions
diff --git a/av1/common/x86/convolve_sse2.c b/av1/common/x86/convolve_sse2.c
index 0e7782250..012e75c1a 100644
--- a/av1/common/x86/convolve_sse2.c
+++ b/av1/common/x86/convolve_sse2.c
@@ -200,31 +200,31 @@ void av1_convolve_y_sr_sse2(const uint8_t *src, int src_stride, uint8_t *dst,
if (w <= 4) {
__m128i s[8], src6, res, res_round, res16;
int res_int;
- src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 6 * src_stride));
+ src6 = _mm_cvtsi32_si128(*(int *)(src_ptr + 6 * src_stride));
s[0] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 0 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)));
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 0 * src_stride)),
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 1 * src_stride)));
s[1] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 1 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)));
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 1 * src_stride)),
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 2 * src_stride)));
s[2] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 2 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)));
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 2 * src_stride)),
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 3 * src_stride)));
s[3] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 3 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)));
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 3 * src_stride)),
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 4 * src_stride)));
s[4] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 4 * src_stride)),
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)));
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 4 * src_stride)),
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 5 * src_stride)));
s[5] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 5 * src_stride)), src6);
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 5 * src_stride)), src6);
do {
s[6] = _mm_unpacklo_epi8(
- src6, _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)));
- src6 = _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 8 * src_stride));
+ src6, _mm_cvtsi32_si128(*(int *)(src_ptr + 7 * src_stride)));
+ src6 = _mm_cvtsi32_si128(*(int *)(src_ptr + 8 * src_stride));
s[7] = _mm_unpacklo_epi8(
- _mm_cvtsi32_si128(*(uint32_t *)(src_ptr + 7 * src_stride)), src6);
+ _mm_cvtsi32_si128(*(int *)(src_ptr + 7 * src_stride)), src6);
res = convolve_lo_y(s + 0, coeffs);
res_round = _mm_sra_epi32(_mm_add_epi32(res, round_const), round_shift);