aboutsummaryrefslogtreecommitdiff
path: root/vp9/encoder/x86/vp9_quantize_avx2.c
diff options
context:
space:
mode:
Diffstat (limited to 'vp9/encoder/x86/vp9_quantize_avx2.c')
-rw-r--r--vp9/encoder/x86/vp9_quantize_avx2.c65
1 files changed, 32 insertions, 33 deletions
diff --git a/vp9/encoder/x86/vp9_quantize_avx2.c b/vp9/encoder/x86/vp9_quantize_avx2.c
index da285be8e..bf44b0867 100644
--- a/vp9/encoder/x86/vp9_quantize_avx2.c
+++ b/vp9/encoder/x86/vp9_quantize_avx2.c
@@ -16,6 +16,8 @@
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_dsp/x86/bitdepth_conversion_avx2.h"
#include "vpx_dsp/x86/quantize_sse2.h"
+#include "vp9/common/vp9_scan.h"
+#include "vp9/encoder/vp9_block.h"
// Zero fill 8 positions in the output buffer.
static VPX_FORCE_INLINE void store_zero_tran_low(tran_low_t *a) {
@@ -29,11 +31,13 @@ static VPX_FORCE_INLINE void store_zero_tran_low(tran_low_t *a) {
}
static VPX_FORCE_INLINE void load_fp_values_avx2(
- const int16_t *round_ptr, __m256i *round, const int16_t *quant_ptr,
- __m256i *quant, const int16_t *dequant_ptr, __m256i *dequant) {
- *round = _mm256_castsi128_si256(_mm_load_si128((const __m128i *)round_ptr));
+ const struct macroblock_plane *mb_plane, __m256i *round, __m256i *quant,
+ const int16_t *dequant_ptr, __m256i *dequant) {
+ *round = _mm256_castsi128_si256(
+ _mm_load_si128((const __m128i *)mb_plane->round_fp));
*round = _mm256_permute4x64_epi64(*round, 0x54);
- *quant = _mm256_castsi128_si256(_mm_load_si128((const __m128i *)quant_ptr));
+ *quant = _mm256_castsi128_si256(
+ _mm_load_si128((const __m128i *)mb_plane->quant_fp));
*quant = _mm256_permute4x64_epi64(*quant, 0x54);
*dequant =
_mm256_castsi128_si256(_mm_load_si128((const __m128i *)dequant_ptr));
@@ -98,13 +102,13 @@ static VPX_FORCE_INLINE void quantize_fp_16(
}
void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *round_ptr, const int16_t *quant_ptr,
+ const struct macroblock_plane *const mb_plane,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
+ const struct ScanOrder *const scan_order) {
__m256i round, quant, dequant, thr;
__m256i eob_max = _mm256_setzero_si256();
- (void)scan;
+ const int16_t *iscan = scan_order->iscan;
coeff_ptr += n_coeffs;
iscan += n_coeffs;
@@ -113,8 +117,7 @@ void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
n_coeffs = -n_coeffs;
// Setup global values
- load_fp_values_avx2(round_ptr, &round, quant_ptr, &quant, dequant_ptr,
- &dequant);
+ load_fp_values_avx2(mb_plane, &round, &quant, dequant_ptr, &dequant);
thr = _mm256_setzero_si256();
quantize_fp_16(&round, &quant, &dequant, &thr, coeff_ptr + n_coeffs,
@@ -203,14 +206,13 @@ static VPX_FORCE_INLINE void quantize_fp_32x32_16(
}
void vp9_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *round_ptr,
- const int16_t *quant_ptr,
+ const struct macroblock_plane *const mb_plane,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
+ const struct ScanOrder *const scan_order) {
__m256i round, quant, dequant, thr;
__m256i eob_max = _mm256_setzero_si256();
- (void)scan;
+ const int16_t *iscan = scan_order->iscan;
coeff_ptr += n_coeffs;
iscan += n_coeffs;
@@ -219,8 +221,7 @@ void vp9_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
n_coeffs = -n_coeffs;
// Setup global values
- load_fp_values_avx2(round_ptr, &round, quant_ptr, &quant, dequant_ptr,
- &dequant);
+ load_fp_values_avx2(mb_plane, &round, &quant, dequant_ptr, &dequant);
thr = _mm256_srli_epi16(dequant, 2);
quant = _mm256_slli_epi16(quant, 1);
{
@@ -286,16 +287,17 @@ static VPX_FORCE_INLINE __m256i highbd_init_256(const int16_t *val_ptr) {
}
static VPX_FORCE_INLINE void highbd_load_fp_values(
- const int16_t *round_ptr, __m256i *round, const int16_t *quant_ptr,
- __m256i *quant, const int16_t *dequant_ptr, __m256i *dequant) {
- *round = highbd_init_256(round_ptr);
- *quant = highbd_init_256(quant_ptr);
+ const struct macroblock_plane *mb_plane, __m256i *round, __m256i *quant,
+ const int16_t *dequant_ptr, __m256i *dequant) {
+ *round = highbd_init_256(mb_plane->round_fp);
+ *quant = highbd_init_256(mb_plane->quant_fp);
*dequant = highbd_init_256(dequant_ptr);
}
static VPX_FORCE_INLINE __m256i highbd_get_max_lane_eob(
const int16_t *iscan_ptr, __m256i eobmax, __m256i nz_mask) {
- const __m256i packed_nz_mask = _mm256_packs_epi32(nz_mask, nz_mask);
+ const __m256i packed_nz_mask =
+ _mm256_packs_epi32(nz_mask, _mm256_setzero_si256());
const __m256i packed_nz_mask_perm =
_mm256_permute4x64_epi64(packed_nz_mask, 0xD8);
const __m256i iscan =
@@ -324,16 +326,15 @@ static VPX_FORCE_INLINE void highbd_quantize_fp(
}
void vp9_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
- const int16_t *round_ptr,
- const int16_t *quant_ptr,
+ const struct macroblock_plane *const mb_plane,
tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan) {
+ const struct ScanOrder *const scan_order) {
const int step = 8;
__m256i round, quant, dequant;
__m256i eob_max = _mm256_setzero_si256();
- (void)scan;
+ const int16_t *iscan = scan_order->iscan;
coeff_ptr += n_coeffs;
iscan += n_coeffs;
@@ -342,8 +343,7 @@ void vp9_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
n_coeffs = -n_coeffs;
// Setup global values
- highbd_load_fp_values(round_ptr, &round, quant_ptr, &quant, dequant_ptr,
- &dequant);
+ highbd_load_fp_values(mb_plane, &round, &quant, dequant_ptr, &dequant);
highbd_quantize_fp(&round, &quant, &dequant, coeff_ptr + n_coeffs,
iscan + n_coeffs, qcoeff_ptr + n_coeffs,
@@ -390,14 +390,14 @@ static VPX_FORCE_INLINE void highbd_quantize_fp_32x32(
}
void vp9_highbd_quantize_fp_32x32_avx2(
- const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr,
- const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
- const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan,
- const int16_t *iscan) {
+ const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const struct ScanOrder *const scan_order) {
const int step = 8;
__m256i round, quant, dequant, thr;
__m256i eob_max = _mm256_setzero_si256();
- (void)scan;
+ const int16_t *iscan = scan_order->iscan;
coeff_ptr += n_coeffs;
iscan += n_coeffs;
@@ -406,8 +406,7 @@ void vp9_highbd_quantize_fp_32x32_avx2(
n_coeffs = -n_coeffs;
// Setup global values
- highbd_load_fp_values(round_ptr, &round, quant_ptr, &quant, dequant_ptr,
- &dequant);
+ highbd_load_fp_values(mb_plane, &round, &quant, dequant_ptr, &dequant);
thr = _mm256_srli_epi32(dequant, 2);
// Subtracting 1 here eliminates a _mm256_cmpeq_epi32() instruction when
// calculating the zbin mask.