diff options
Diffstat (limited to 'simd')
-rw-r--r-- | simd/arm/common/jidctint-neon.c | 34 | ||||
-rw-r--r-- | simd/arm/common/jidctred-neon.c | 12 |
2 files changed, 26 insertions, 20 deletions
diff --git a/simd/arm/common/jidctint-neon.c b/simd/arm/common/jidctint-neon.c index 7fb683b4..11076a0c 100644 --- a/simd/arm/common/jidctint-neon.c +++ b/simd/arm/common/jidctint-neon.c @@ -644,15 +644,16 @@ static inline void jsimd_idct_islow_pass2_regular(int16_t *workspace, vreinterpret_u16_u8(cols_45_67.val[0]), vreinterpret_u16_u8(cols_45_67.val[1]) }; + + JSAMPROW outptr0 = output_buf[buf_offset + 0] + output_col; + JSAMPROW outptr1 = output_buf[buf_offset + 1] + output_col; + JSAMPROW outptr2 = output_buf[buf_offset + 2] + output_col; + JSAMPROW outptr3 = output_buf[buf_offset + 3] + output_col; /* VST4 of 16-bit elements completes the transpose. */ - vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 0] + output_col), - cols_01_23_45_67, 0); - vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 1] + output_col), - cols_01_23_45_67, 1); - vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 2] + output_col), - cols_01_23_45_67, 2); - vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 3] + output_col), - cols_01_23_45_67, 3); + vst4_lane_u16((uint16_t *)outptr0, cols_01_23_45_67, 0); + vst4_lane_u16((uint16_t *)outptr1, cols_01_23_45_67, 1); + vst4_lane_u16((uint16_t *)outptr2, cols_01_23_45_67, 2); + vst4_lane_u16((uint16_t *)outptr3, cols_01_23_45_67, 3); } @@ -735,13 +736,14 @@ static inline void jsimd_idct_islow_pass2_sparse(int16_t *workspace, vreinterpret_u16_u8(cols_45_67.val[0]), vreinterpret_u16_u8(cols_45_67.val[1]) }; + + JSAMPROW outptr0 = output_buf[buf_offset + 0] + output_col; + JSAMPROW outptr1 = output_buf[buf_offset + 1] + output_col; + JSAMPROW outptr2 = output_buf[buf_offset + 2] + output_col; + JSAMPROW outptr3 = output_buf[buf_offset + 3] + output_col; /* VST4 of 16-bit elements completes the transpose. */ - vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 0] + output_col), - cols_01_23_45_67, 0); - vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 1] + output_col), - cols_01_23_45_67, 1); - vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 2] + output_col), - cols_01_23_45_67, 2); - vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 3] + output_col), - cols_01_23_45_67, 3); + vst4_lane_u16((uint16_t *)outptr0, cols_01_23_45_67, 0); + vst4_lane_u16((uint16_t *)outptr1, cols_01_23_45_67, 1); + vst4_lane_u16((uint16_t *)outptr2, cols_01_23_45_67, 2); + vst4_lane_u16((uint16_t *)outptr3, cols_01_23_45_67, 3); } diff --git a/simd/arm/common/jidctred-neon.c b/simd/arm/common/jidctred-neon.c index aa107995..7e95bf35 100644 --- a/simd/arm/common/jidctred-neon.c +++ b/simd/arm/common/jidctred-neon.c @@ -455,8 +455,12 @@ void jsimd_idct_4x4_neon(void *dct_table, uint16x4x2_t output_01_23 = { output_0123.val[0], output_0123.val[1] }; /* Store 4x4 block to memory. */ - vst2_lane_u16((uint16_t *)(output_buf[0] + output_col), output_01_23, 0); - vst2_lane_u16((uint16_t *)(output_buf[1] + output_col), output_01_23, 1); - vst2_lane_u16((uint16_t *)(output_buf[2] + output_col), output_01_23, 2); - vst2_lane_u16((uint16_t *)(output_buf[3] + output_col), output_01_23, 3); + JSAMPROW outptr0 = output_buf[0] + output_col; + JSAMPROW outptr1 = output_buf[1] + output_col; + JSAMPROW outptr2 = output_buf[2] + output_col; + JSAMPROW outptr3 = output_buf[3] + output_col; + vst2_lane_u16((uint16_t *)outptr0, output_01_23, 0); + vst2_lane_u16((uint16_t *)outptr1, output_01_23, 1); + vst2_lane_u16((uint16_t *)outptr2, output_01_23, 2); + vst2_lane_u16((uint16_t *)outptr3, output_01_23, 3); } |