diff options
Diffstat (limited to 'vpx_dsp/x86/quantize_sse2.h')
-rw-r--r-- | vpx_dsp/x86/quantize_sse2.h | 51 |
1 files changed, 39 insertions, 12 deletions
diff --git a/vpx_dsp/x86/quantize_sse2.h b/vpx_dsp/x86/quantize_sse2.h index 27bfb4e41..82c755a0c 100644 --- a/vpx_dsp/x86/quantize_sse2.h +++ b/vpx_dsp/x86/quantize_sse2.h @@ -15,26 +15,53 @@ #include "./vpx_config.h" #include "vpx/vpx_integer.h" +#include "vp9/encoder/vp9_block.h" -static INLINE void load_b_values(const int16_t *zbin_ptr, __m128i *zbin, - const int16_t *round_ptr, __m128i *round, - const int16_t *quant_ptr, __m128i *quant, +static INLINE void load_b_values(const struct macroblock_plane *const mb_plane, + __m128i *zbin, __m128i *round, __m128i *quant, const int16_t *dequant_ptr, __m128i *dequant, - const int16_t *shift_ptr, __m128i *shift) { - *zbin = _mm_load_si128((const __m128i *)zbin_ptr); - *round = _mm_load_si128((const __m128i *)round_ptr); - *quant = _mm_load_si128((const __m128i *)quant_ptr); + __m128i *shift) { + *zbin = _mm_load_si128((const __m128i *)mb_plane->zbin); + *round = _mm_load_si128((const __m128i *)mb_plane->round); + *quant = _mm_load_si128((const __m128i *)mb_plane->quant); *zbin = _mm_sub_epi16(*zbin, _mm_set1_epi16(1)); *dequant = _mm_load_si128((const __m128i *)dequant_ptr); - *shift = _mm_load_si128((const __m128i *)shift_ptr); + *shift = _mm_load_si128((const __m128i *)mb_plane->quant_shift); } -static INLINE void load_fp_values(const int16_t *round_ptr, __m128i *round, - const int16_t *quant_ptr, __m128i *quant, +static INLINE void load_b_values32x32( + const struct macroblock_plane *const mb_plane, __m128i *zbin, + __m128i *round, __m128i *quant, const int16_t *dequant_ptr, + __m128i *dequant, __m128i *shift) { + const __m128i one = _mm_set1_epi16(1); + // The 32x32 halves zbin and round. + *zbin = _mm_load_si128((const __m128i *)mb_plane->zbin); + // Shift with rounding. + *zbin = _mm_add_epi16(*zbin, one); + *zbin = _mm_srli_epi16(*zbin, 1); + // x86 has no "greater *or equal*" comparison. Subtract 1 from zbin so + // it is a strict "greater" comparison. + *zbin = _mm_sub_epi16(*zbin, one); + + *round = _mm_load_si128((const __m128i *)mb_plane->round); + *round = _mm_add_epi16(*round, one); + *round = _mm_srli_epi16(*round, 1); + + *quant = _mm_load_si128((const __m128i *)mb_plane->quant); + *dequant = _mm_load_si128((const __m128i *)dequant_ptr); + *shift = _mm_load_si128((const __m128i *)mb_plane->quant_shift); + // I suspect this is not technically OK because quant_shift can be up + // to 1 << 16 and shifting up again will outrange that, but the test is not + // comprehensive enough to catch that and "it's been that way forever" + *shift = _mm_slli_epi16(*shift, 1); +} + +static INLINE void load_fp_values(const struct macroblock_plane *mb_plane, + __m128i *round, __m128i *quant, const int16_t *dequant_ptr, __m128i *dequant) { - *round = _mm_load_si128((const __m128i *)round_ptr); - *quant = _mm_load_si128((const __m128i *)quant_ptr); + *round = _mm_load_si128((const __m128i *)mb_plane->round_fp); + *quant = _mm_load_si128((const __m128i *)mb_plane->quant_fp); *dequant = _mm_load_si128((const __m128i *)dequant_ptr); } |