1 files changed, 32 insertions, 33 deletions
diff --git a/vp9/encoder/x86/vp9_quantize_avx2.c b/vp9/encoder/x86/vp9_quantize_avx2.c
index da285be8e..bf44b0867 100644
--- a/vp9/encoder/x86/vp9_quantize_avx2.c
+++ b/vp9/encoder/x86/vp9_quantize_avx2.c
@@ -16,6 +16,8 @@
 #include "vpx_dsp/vpx_dsp_common.h"
 #include "vpx_dsp/x86/bitdepth_conversion_avx2.h"
 #include "vpx_dsp/x86/quantize_sse2.h"
+#include "vp9/common/vp9_scan.h"
+#include "vp9/encoder/vp9_block.h"
 
 // Zero fill 8 positions in the output buffer.
 static VPX_FORCE_INLINE void store_zero_tran_low(tran_low_t *a) {
@@ -29,11 +31,13 @@ static VPX_FORCE_INLINE void store_zero_tran_low(tran_low_t *a) {
 }
 
 static VPX_FORCE_INLINE void load_fp_values_avx2(
-    const int16_t *round_ptr, __m256i *round, const int16_t *quant_ptr,
-    __m256i *quant, const int16_t *dequant_ptr, __m256i *dequant) {
-  *round = _mm256_castsi128_si256(_mm_load_si128((const __m128i *)round_ptr));
+    const struct macroblock_plane *mb_plane, __m256i *round, __m256i *quant,
+    const int16_t *dequant_ptr, __m256i *dequant) {
+  *round = _mm256_castsi128_si256(
+      _mm_load_si128((const __m128i *)mb_plane->round_fp));
   *round = _mm256_permute4x64_epi64(*round, 0x54);
-  *quant = _mm256_castsi128_si256(_mm_load_si128((const __m128i *)quant_ptr));
+  *quant = _mm256_castsi128_si256(
+      _mm_load_si128((const __m128i *)mb_plane->quant_fp));
   *quant = _mm256_permute4x64_epi64(*quant, 0x54);
   *dequant =
       _mm256_castsi128_si256(_mm_load_si128((const __m128i *)dequant_ptr));
@@ -98,13 +102,13 @@ static VPX_FORCE_INLINE void quantize_fp_16(
 }
 
 void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                          const int16_t *round_ptr, const int16_t *quant_ptr,
+                          const struct macroblock_plane *const mb_plane,
                           tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                           const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                          const int16_t *scan, const int16_t *iscan) {
+                          const struct ScanOrder *const scan_order) {
   __m256i round, quant, dequant, thr;
   __m256i eob_max = _mm256_setzero_si256();
-  (void)scan;
+  const int16_t *iscan = scan_order->iscan;
 
   coeff_ptr += n_coeffs;
   iscan += n_coeffs;
@@ -113,8 +117,7 @@ void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
   n_coeffs = -n_coeffs;
 
   // Setup global values
-  load_fp_values_avx2(round_ptr, &round, quant_ptr, &quant, dequant_ptr,
-                      &dequant);
+  load_fp_values_avx2(mb_plane, &round, &quant, dequant_ptr, &dequant);
   thr = _mm256_setzero_si256();
 
   quantize_fp_16(&round, &quant, &dequant, &thr, coeff_ptr + n_coeffs,
@@ -203,14 +206,13 @@ static VPX_FORCE_INLINE void quantize_fp_32x32_16(
 }
 
 void vp9_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                                const int16_t *round_ptr,
-                                const int16_t *quant_ptr,
+                                const struct macroblock_plane *const mb_plane,
                                 tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
                                 const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                                const int16_t *scan, const int16_t *iscan) {
+                                const struct ScanOrder *const scan_order) {
   __m256i round, quant, dequant, thr;
   __m256i eob_max = _mm256_setzero_si256();
-  (void)scan;
+  const int16_t *iscan = scan_order->iscan;
 
   coeff_ptr += n_coeffs;
   iscan += n_coeffs;
@@ -219,8 +221,7 @@ void vp9_quantize_fp_32x32_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
   n_coeffs = -n_coeffs;
 
   // Setup global values
-  load_fp_values_avx2(round_ptr, &round, quant_ptr, &quant, dequant_ptr,
-                      &dequant);
+  load_fp_values_avx2(mb_plane, &round, &quant, dequant_ptr, &dequant);
   thr = _mm256_srli_epi16(dequant, 2);
   quant = _mm256_slli_epi16(quant, 1);
   {
@@ -286,16 +287,17 @@ static VPX_FORCE_INLINE __m256i highbd_init_256(const int16_t *val_ptr) {
 }
 
 static VPX_FORCE_INLINE void highbd_load_fp_values(
-    const int16_t *round_ptr, __m256i *round, const int16_t *quant_ptr,
-    __m256i *quant, const int16_t *dequant_ptr, __m256i *dequant) {
-  *round = highbd_init_256(round_ptr);
-  *quant = highbd_init_256(quant_ptr);
+    const struct macroblock_plane *mb_plane, __m256i *round, __m256i *quant,
+    const int16_t *dequant_ptr, __m256i *dequant) {
+  *round = highbd_init_256(mb_plane->round_fp);
+  *quant = highbd_init_256(mb_plane->quant_fp);
   *dequant = highbd_init_256(dequant_ptr);
 }
 
 static VPX_FORCE_INLINE __m256i highbd_get_max_lane_eob(
     const int16_t *iscan_ptr, __m256i eobmax, __m256i nz_mask) {
-  const __m256i packed_nz_mask = _mm256_packs_epi32(nz_mask, nz_mask);
+  const __m256i packed_nz_mask =
+      _mm256_packs_epi32(nz_mask, _mm256_setzero_si256());
   const __m256i packed_nz_mask_perm =
       _mm256_permute4x64_epi64(packed_nz_mask, 0xD8);
   const __m256i iscan =
@@ -324,16 +326,15 @@ static VPX_FORCE_INLINE void highbd_quantize_fp(
 }
 
 void vp9_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                                 const int16_t *round_ptr,
-                                 const int16_t *quant_ptr,
+                                 const struct macroblock_plane *const mb_plane,
                                  tran_low_t *qcoeff_ptr,
                                  tran_low_t *dqcoeff_ptr,
                                  const int16_t *dequant_ptr, uint16_t *eob_ptr,
-                                 const int16_t *scan, const int16_t *iscan) {
+                                 const struct ScanOrder *const scan_order) {
   const int step = 8;
   __m256i round, quant, dequant;
   __m256i eob_max = _mm256_setzero_si256();
-  (void)scan;
+  const int16_t *iscan = scan_order->iscan;
 
   coeff_ptr += n_coeffs;
   iscan += n_coeffs;
@@ -342,8 +343,7 @@ void vp9_highbd_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
   n_coeffs = -n_coeffs;
 
   // Setup global values
-  highbd_load_fp_values(round_ptr, &round, quant_ptr, &quant, dequant_ptr,
-                        &dequant);
+  highbd_load_fp_values(mb_plane, &round, &quant, dequant_ptr, &dequant);
 
   highbd_quantize_fp(&round, &quant, &dequant, coeff_ptr + n_coeffs,
                      iscan + n_coeffs, qcoeff_ptr + n_coeffs,
@@ -390,14 +390,14 @@ static VPX_FORCE_INLINE void highbd_quantize_fp_32x32(
 }
 
 void vp9_highbd_quantize_fp_32x32_avx2(
-    const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr,
-    const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
-    const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan,
-    const int16_t *iscan) {
+    const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+    const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr,
+    tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr,
+    const struct ScanOrder *const scan_order) {
   const int step = 8;
   __m256i round, quant, dequant, thr;
   __m256i eob_max = _mm256_setzero_si256();
-  (void)scan;
+  const int16_t *iscan = scan_order->iscan;
 
   coeff_ptr += n_coeffs;
   iscan += n_coeffs;
@@ -406,8 +406,7 @@ void vp9_highbd_quantize_fp_32x32_avx2(
   n_coeffs = -n_coeffs;
 
   // Setup global values
-  highbd_load_fp_values(round_ptr, &round, quant_ptr, &quant, dequant_ptr,
-                        &dequant);
+  highbd_load_fp_values(mb_plane, &round, &quant, dequant_ptr, &dequant);
   thr = _mm256_srli_epi32(dequant, 2);
   // Subtracting 1 here eliminates a _mm256_cmpeq_epi32() instruction when
   // calculating the zbin mask.