diff options
Diffstat (limited to 'vpx_dsp/arm/quantize_neon.c')
-rw-r--r-- | vpx_dsp/arm/quantize_neon.c | 54 |
1 files changed, 23 insertions, 31 deletions
diff --git a/vpx_dsp/arm/quantize_neon.c b/vpx_dsp/arm/quantize_neon.c index 9c227d560..e2351fa2c 100644 --- a/vpx_dsp/arm/quantize_neon.c +++ b/vpx_dsp/arm/quantize_neon.c @@ -14,6 +14,8 @@ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/mem_neon.h" +#include "vp9/common/vp9_scan.h" +#include "vp9/encoder/vp9_block.h" static INLINE void calculate_dqcoeff_and_store(const int16x8_t qcoeff, const int16x8_t dequant, @@ -69,20 +71,19 @@ quantize_b_neon(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, } void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const int16_t *zbin_ptr, const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, - tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, - uint16_t *eob_ptr, const int16_t *scan, - const int16_t *iscan) { + const struct macroblock_plane *const mb_plane, + tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, + const int16_t *dequant_ptr, uint16_t *eob_ptr, + const struct ScanOrder *const scan_order) { const int16x8_t neg_one = vdupq_n_s16(-1); uint16x8_t eob_max; + int16_t const *iscan = scan_order->iscan; // Only the first element of each vector is DC. - int16x8_t zbin = vld1q_s16(zbin_ptr); - int16x8_t round = vld1q_s16(round_ptr); - int16x8_t quant = vld1q_s16(quant_ptr); - int16x8_t quant_shift = vld1q_s16(quant_shift_ptr); + int16x8_t zbin = vld1q_s16(mb_plane->zbin); + int16x8_t round = vld1q_s16(mb_plane->round); + int16x8_t quant = vld1q_s16(mb_plane->quant); + int16x8_t quant_shift = vld1q_s16(mb_plane->quant_shift); int16x8_t dequant = vld1q_s16(dequant_ptr); // Process first 8 values which include a dc component. @@ -132,7 +133,7 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, } while (n_coeffs > 0); } -#ifdef __aarch64__ +#if VPX_ARCH_AARCH64 *eob_ptr = vmaxvq_u16(eob_max); #else { @@ -142,10 +143,7 @@ void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1); vst1_lane_u16(eob_ptr, eob_max_2, 0); } -#endif // __aarch64__ - // Need these here, else the compiler complains about mixing declarations and - // code in C90 - (void)scan; +#endif // VPX_ARCH_AARCH64 } static INLINE int32x4_t extract_sign_bit(int32x4_t a) { @@ -213,23 +211,21 @@ quantize_b_32x32_neon(const tran_low_t *coeff_ptr, tran_low_t *qcoeff_ptr, // Main difference is that zbin values are halved before comparison and dqcoeff // values are divided by 2. zbin is rounded but dqcoeff is not. -void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, - const int16_t *zbin_ptr, - const int16_t *round_ptr, - const int16_t *quant_ptr, - const int16_t *quant_shift_ptr, +void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, + const struct macroblock_plane *mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, - const int16_t *scan, const int16_t *iscan) { + const struct ScanOrder *const scan_order) { const int16x8_t neg_one = vdupq_n_s16(-1); uint16x8_t eob_max; int i; + const int16_t *iscan = scan_order->iscan; // Only the first element of each vector is DC. - int16x8_t zbin = vrshrq_n_s16(vld1q_s16(zbin_ptr), 1); - int16x8_t round = vrshrq_n_s16(vld1q_s16(round_ptr), 1); - int16x8_t quant = vld1q_s16(quant_ptr); - int16x8_t quant_shift = vld1q_s16(quant_shift_ptr); + int16x8_t zbin = vrshrq_n_s16(vld1q_s16(mb_plane->zbin), 1); + int16x8_t round = vrshrq_n_s16(vld1q_s16(mb_plane->round), 1); + int16x8_t quant = vld1q_s16(mb_plane->quant); + int16x8_t quant_shift = vld1q_s16(mb_plane->quant_shift); int16x8_t dequant = vld1q_s16(dequant_ptr); // Process first 8 values which include a dc component. @@ -276,7 +272,7 @@ void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, } } -#ifdef __aarch64__ +#if VPX_ARCH_AARCH64 *eob_ptr = vmaxvq_u16(eob_max); #else { @@ -286,9 +282,5 @@ void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1); vst1_lane_u16(eob_ptr, eob_max_2, 0); } -#endif // __aarch64__ - // Need these here, else the compiler complains about mixing declarations and - // code in C90 - (void)n_coeffs; - (void)scan; +#endif // VPX_ARCH_AARCH64 } |