aboutsummaryrefslogtreecommitdiff
path: root/simd
diff options
context:
space:
mode:
Diffstat (limited to 'simd')
-rw-r--r--simd/arm/common/jidctint-neon.c34
-rw-r--r--simd/arm/common/jidctred-neon.c12
2 files changed, 26 insertions, 20 deletions
diff --git a/simd/arm/common/jidctint-neon.c b/simd/arm/common/jidctint-neon.c
index 7fb683b4..11076a0c 100644
--- a/simd/arm/common/jidctint-neon.c
+++ b/simd/arm/common/jidctint-neon.c
@@ -644,15 +644,16 @@ static inline void jsimd_idct_islow_pass2_regular(int16_t *workspace,
vreinterpret_u16_u8(cols_45_67.val[0]),
vreinterpret_u16_u8(cols_45_67.val[1])
};
+
+ JSAMPROW outptr0 = output_buf[buf_offset + 0] + output_col;
+ JSAMPROW outptr1 = output_buf[buf_offset + 1] + output_col;
+ JSAMPROW outptr2 = output_buf[buf_offset + 2] + output_col;
+ JSAMPROW outptr3 = output_buf[buf_offset + 3] + output_col;
/* VST4 of 16-bit elements completes the transpose. */
- vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 0] + output_col),
- cols_01_23_45_67, 0);
- vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 1] + output_col),
- cols_01_23_45_67, 1);
- vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 2] + output_col),
- cols_01_23_45_67, 2);
- vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 3] + output_col),
- cols_01_23_45_67, 3);
+ vst4_lane_u16((uint16_t *)outptr0, cols_01_23_45_67, 0);
+ vst4_lane_u16((uint16_t *)outptr1, cols_01_23_45_67, 1);
+ vst4_lane_u16((uint16_t *)outptr2, cols_01_23_45_67, 2);
+ vst4_lane_u16((uint16_t *)outptr3, cols_01_23_45_67, 3);
}
@@ -735,13 +736,14 @@ static inline void jsimd_idct_islow_pass2_sparse(int16_t *workspace,
vreinterpret_u16_u8(cols_45_67.val[0]),
vreinterpret_u16_u8(cols_45_67.val[1])
};
+
+ JSAMPROW outptr0 = output_buf[buf_offset + 0] + output_col;
+ JSAMPROW outptr1 = output_buf[buf_offset + 1] + output_col;
+ JSAMPROW outptr2 = output_buf[buf_offset + 2] + output_col;
+ JSAMPROW outptr3 = output_buf[buf_offset + 3] + output_col;
/* VST4 of 16-bit elements completes the transpose. */
- vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 0] + output_col),
- cols_01_23_45_67, 0);
- vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 1] + output_col),
- cols_01_23_45_67, 1);
- vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 2] + output_col),
- cols_01_23_45_67, 2);
- vst4_lane_u16((uint16_t *)(output_buf[buf_offset + 3] + output_col),
- cols_01_23_45_67, 3);
+ vst4_lane_u16((uint16_t *)outptr0, cols_01_23_45_67, 0);
+ vst4_lane_u16((uint16_t *)outptr1, cols_01_23_45_67, 1);
+ vst4_lane_u16((uint16_t *)outptr2, cols_01_23_45_67, 2);
+ vst4_lane_u16((uint16_t *)outptr3, cols_01_23_45_67, 3);
}
diff --git a/simd/arm/common/jidctred-neon.c b/simd/arm/common/jidctred-neon.c
index aa107995..7e95bf35 100644
--- a/simd/arm/common/jidctred-neon.c
+++ b/simd/arm/common/jidctred-neon.c
@@ -455,8 +455,12 @@ void jsimd_idct_4x4_neon(void *dct_table,
uint16x4x2_t output_01_23 = { output_0123.val[0], output_0123.val[1] };
/* Store 4x4 block to memory. */
- vst2_lane_u16((uint16_t *)(output_buf[0] + output_col), output_01_23, 0);
- vst2_lane_u16((uint16_t *)(output_buf[1] + output_col), output_01_23, 1);
- vst2_lane_u16((uint16_t *)(output_buf[2] + output_col), output_01_23, 2);
- vst2_lane_u16((uint16_t *)(output_buf[3] + output_col), output_01_23, 3);
+ JSAMPROW outptr0 = output_buf[0] + output_col;
+ JSAMPROW outptr1 = output_buf[1] + output_col;
+ JSAMPROW outptr2 = output_buf[2] + output_col;
+ JSAMPROW outptr3 = output_buf[3] + output_col;
+ vst2_lane_u16((uint16_t *)outptr0, output_01_23, 0);
+ vst2_lane_u16((uint16_t *)outptr1, output_01_23, 1);
+ vst2_lane_u16((uint16_t *)outptr2, output_01_23, 2);
+ vst2_lane_u16((uint16_t *)outptr3, output_01_23, 3);
}