aboutsummaryrefslogtreecommitdiff
path: root/vpx_dsp/x86/quantize_sse2.h
diff options
context:
space:
mode:
Diffstat (limited to 'vpx_dsp/x86/quantize_sse2.h')
-rw-r--r--vpx_dsp/x86/quantize_sse2.h51
1 files changed, 39 insertions, 12 deletions
diff --git a/vpx_dsp/x86/quantize_sse2.h b/vpx_dsp/x86/quantize_sse2.h
index 27bfb4e41..82c755a0c 100644
--- a/vpx_dsp/x86/quantize_sse2.h
+++ b/vpx_dsp/x86/quantize_sse2.h
@@ -15,26 +15,53 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+#include "vp9/encoder/vp9_block.h"
-static INLINE void load_b_values(const int16_t *zbin_ptr, __m128i *zbin,
- const int16_t *round_ptr, __m128i *round,
- const int16_t *quant_ptr, __m128i *quant,
+static INLINE void load_b_values(const struct macroblock_plane *const mb_plane,
+ __m128i *zbin, __m128i *round, __m128i *quant,
const int16_t *dequant_ptr, __m128i *dequant,
- const int16_t *shift_ptr, __m128i *shift) {
- *zbin = _mm_load_si128((const __m128i *)zbin_ptr);
- *round = _mm_load_si128((const __m128i *)round_ptr);
- *quant = _mm_load_si128((const __m128i *)quant_ptr);
+ __m128i *shift) {
+ *zbin = _mm_load_si128((const __m128i *)mb_plane->zbin);
+ *round = _mm_load_si128((const __m128i *)mb_plane->round);
+ *quant = _mm_load_si128((const __m128i *)mb_plane->quant);
*zbin = _mm_sub_epi16(*zbin, _mm_set1_epi16(1));
*dequant = _mm_load_si128((const __m128i *)dequant_ptr);
- *shift = _mm_load_si128((const __m128i *)shift_ptr);
+ *shift = _mm_load_si128((const __m128i *)mb_plane->quant_shift);
}
-static INLINE void load_fp_values(const int16_t *round_ptr, __m128i *round,
- const int16_t *quant_ptr, __m128i *quant,
+static INLINE void load_b_values32x32(
+ const struct macroblock_plane *const mb_plane, __m128i *zbin,
+ __m128i *round, __m128i *quant, const int16_t *dequant_ptr,
+ __m128i *dequant, __m128i *shift) {
+ const __m128i one = _mm_set1_epi16(1);
+ // The 32x32 halves zbin and round.
+ *zbin = _mm_load_si128((const __m128i *)mb_plane->zbin);
+ // Shift with rounding.
+ *zbin = _mm_add_epi16(*zbin, one);
+ *zbin = _mm_srli_epi16(*zbin, 1);
+ // x86 has no "greater *or equal*" comparison. Subtract 1 from zbin so
+ // it is a strict "greater" comparison.
+ *zbin = _mm_sub_epi16(*zbin, one);
+
+ *round = _mm_load_si128((const __m128i *)mb_plane->round);
+ *round = _mm_add_epi16(*round, one);
+ *round = _mm_srli_epi16(*round, 1);
+
+ *quant = _mm_load_si128((const __m128i *)mb_plane->quant);
+ *dequant = _mm_load_si128((const __m128i *)dequant_ptr);
+ *shift = _mm_load_si128((const __m128i *)mb_plane->quant_shift);
+ // I suspect this is not technically OK because quant_shift can be up
+ // to 1 << 16 and shifting up again will outrange that, but the test is not
+ // comprehensive enough to catch that and "it's been that way forever"
+ *shift = _mm_slli_epi16(*shift, 1);
+}
+
+static INLINE void load_fp_values(const struct macroblock_plane *mb_plane,
+ __m128i *round, __m128i *quant,
const int16_t *dequant_ptr,
__m128i *dequant) {
- *round = _mm_load_si128((const __m128i *)round_ptr);
- *quant = _mm_load_si128((const __m128i *)quant_ptr);
+ *round = _mm_load_si128((const __m128i *)mb_plane->round_fp);
+ *quant = _mm_load_si128((const __m128i *)mb_plane->quant_fp);
*dequant = _mm_load_si128((const __m128i *)dequant_ptr);
}