aboutsummaryrefslogtreecommitdiff
path: root/test/vp9_quantize_test.cc
diff options
context:
space:
mode:
Diffstat (limited to 'test/vp9_quantize_test.cc')
-rw-r--r--test/vp9_quantize_test.cc309
1 files changed, 205 insertions, 104 deletions
diff --git a/test/vp9_quantize_test.cc b/test/vp9_quantize_test.cc
index ca1062a76..587cec692 100644
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -67,6 +67,45 @@ void QuantFPWrapper(const tran_low_t *coeff, intptr_t count,
fn(coeff, count, round, quant, qcoeff, dqcoeff, dequant, eob, scan, iscan);
}
+void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
+ int16_t *quant, int16_t *quant_shift,
+ int16_t *dequant, int16_t *round_fp,
+ int16_t *quant_fp) {
+ // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
+ constexpr int kMaxQRoundingFactorFp = 64;
+
+ for (int j = 0; j < 2; j++) {
+ // The range is 4 to 1828 in the VP9 tables.
+ const int qlookup = rnd->RandRange(1825) + 4;
+ round_fp[j] = (kMaxQRoundingFactorFp * qlookup) >> 7;
+ quant_fp[j] = (1 << 16) / qlookup;
+
+ // Values determined by deconstructing vp9_init_quantizer().
+ // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
+ // values or U/V values of any bit depth. This is because y_delta is not
+ // factored into the vp9_ac_quant() call.
+ zbin[j] = rnd->RandRange(1200);
+
+ // round may be up to 685 for Y values or 914 for U/V.
+ round[j] = rnd->RandRange(914);
+ // quant ranges from 1 to -32703
+ quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
+ // quant_shift goes up to 1 << 16.
+ quant_shift[j] = rnd->RandRange(16384);
+ // dequant maxes out at 1828 for all cases.
+ dequant[j] = rnd->RandRange(1828);
+ }
+ for (int j = 2; j < 8; j++) {
+ zbin[j] = zbin[1];
+ round_fp[j] = round_fp[1];
+ quant_fp[j] = quant_fp[1];
+ round[j] = round[1];
+ quant[j] = quant[1];
+ quant_shift[j] = quant_shift[1];
+ dequant[j] = dequant[1];
+ }
+}
+
class VP9QuantizeBase : public AbstractBench {
public:
VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
@@ -148,6 +187,7 @@ class VP9QuantizeTest : public VP9QuantizeBase,
protected:
virtual void Run();
+ void Speed(bool is_median);
const QuantizeFunc quantize_op_;
const QuantizeFunc ref_quantize_op_;
};
@@ -159,6 +199,101 @@ void VP9QuantizeTest::Run() {
scan_->iscan);
}
+void VP9QuantizeTest::Speed(bool is_median) {
+ ACMRandom rnd(ACMRandom::DeterministicSeed());
+ ASSERT_TRUE(coeff_.Init());
+ ASSERT_TRUE(qcoeff_.Init());
+ ASSERT_TRUE(dqcoeff_.Init());
+ TX_SIZE starting_sz, ending_sz;
+
+ if (max_size_ == 16) {
+ starting_sz = TX_4X4;
+ ending_sz = TX_16X16;
+ } else {
+ starting_sz = TX_32X32;
+ ending_sz = TX_32X32;
+ }
+
+ for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
+ // zbin > coeff, zbin < coeff.
+ for (int i = 0; i < 2; ++i) {
+ // TX_TYPE defines the scan order. That is not relevant to the speed test.
+ // Pick the first one.
+ const TX_TYPE tx_type = DCT_DCT;
+ count_ = (4 << sz) * (4 << sz);
+ scan_ = &vp9_scan_orders[sz][tx_type];
+
+ GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
+ quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
+ quant_fp_ptr_);
+
+ if (i == 0) {
+ // When |coeff values| are less than zbin the results are 0.
+ int threshold = 100;
+ if (max_size_ == 32) {
+ // For 32x32, the threshold is halved. Double it to keep the values
+ // from clearing it.
+ threshold = 200;
+ }
+ for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
+ coeff_.Set(&rnd, -99, 99);
+ } else if (i == 1) {
+ for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
+ coeff_.Set(&rnd, -500, 500);
+ }
+
+ const char *type =
+ (i == 0) ? "Bypass calculations " : "Full calculations ";
+ char block_size[16];
+ snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
+ char title[100];
+ snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
+
+ if (is_median) {
+ RunNTimes(10000000 / count_);
+ PrintMedian(title);
+ } else {
+ Buffer<tran_low_t> ref_qcoeff =
+ Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+ ASSERT_TRUE(ref_qcoeff.Init());
+ Buffer<tran_low_t> ref_dqcoeff =
+ Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
+ ASSERT_TRUE(ref_dqcoeff.Init());
+ uint16_t ref_eob = 0;
+
+ const int kNumTests = 5000000;
+ vpx_usec_timer timer, simd_timer;
+
+ vpx_usec_timer_start(&timer);
+ for (int n = 0; n < kNumTests; ++n) {
+ ref_quantize_op_(coeff_.TopLeftPixel(), count_, zbin_ptr_, r_ptr_,
+ q_ptr_, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
+ ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
+ scan_->scan, scan_->iscan);
+ }
+ vpx_usec_timer_mark(&timer);
+
+ vpx_usec_timer_start(&simd_timer);
+ for (int n = 0; n < kNumTests; ++n) {
+ quantize_op_(coeff_.TopLeftPixel(), count_, zbin_ptr_, r_ptr_, q_ptr_,
+ quant_shift_ptr_, qcoeff_.TopLeftPixel(),
+ dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_,
+ scan_->scan, scan_->iscan);
+ }
+ vpx_usec_timer_mark(&simd_timer);
+
+ const int elapsed_time =
+ static_cast<int>(vpx_usec_timer_elapsed(&timer));
+ const int simd_elapsed_time =
+ static_cast<int>(vpx_usec_timer_elapsed(&simd_timer));
+ printf("%s c_time = %d \t simd_time = %d \t Gain = %f \n", title,
+ elapsed_time, simd_elapsed_time,
+ ((float)elapsed_time / simd_elapsed_time));
+ }
+ }
+ }
+}
+
// This quantizer compares the AC coefficients to the quantization step size to
// determine if further multiplication operations are needed.
// Based on vp9_quantize_fp_sse2().
@@ -254,45 +389,6 @@ void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1);
}
-void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
- int16_t *quant, int16_t *quant_shift,
- int16_t *dequant, int16_t *round_fp,
- int16_t *quant_fp) {
- // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V.
- const int max_qrounding_factor_fp = 64;
-
- for (int j = 0; j < 2; j++) {
- // The range is 4 to 1828 in the VP9 tables.
- const int qlookup = rnd->RandRange(1825) + 4;
- round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
- quant_fp[j] = (1 << 16) / qlookup;
-
- // Values determined by deconstructing vp9_init_quantizer().
- // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
- // values or U/V values of any bit depth. This is because y_delta is not
- // factored into the vp9_ac_quant() call.
- zbin[j] = rnd->RandRange(1200);
-
- // round may be up to 685 for Y values or 914 for U/V.
- round[j] = rnd->RandRange(914);
- // quant ranges from 1 to -32703
- quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
- // quant_shift goes up to 1 << 16.
- quant_shift[j] = rnd->RandRange(16384);
- // dequant maxes out at 1828 for all cases.
- dequant[j] = rnd->RandRange(1828);
- }
- for (int j = 2; j < 8; j++) {
- zbin[j] = zbin[1];
- round_fp[j] = round_fp[1];
- quant_fp[j] = quant_fp[1];
- round[j] = round[1];
- quant[j] = quant[1];
- quant_shift[j] = quant_shift[1];
- dequant[j] = dequant[1];
- }
-}
-
TEST_P(VP9QuantizeTest, OperationCheck) {
ACMRandom rnd(ACMRandom::DeterministicSeed());
ASSERT_TRUE(coeff_.Init());
@@ -403,60 +499,9 @@ TEST_P(VP9QuantizeTest, EOBCheck) {
}
}
-TEST_P(VP9QuantizeTest, DISABLED_Speed) {
- ACMRandom rnd(ACMRandom::DeterministicSeed());
- ASSERT_TRUE(coeff_.Init());
- ASSERT_TRUE(qcoeff_.Init());
- ASSERT_TRUE(dqcoeff_.Init());
- TX_SIZE starting_sz, ending_sz;
-
- if (max_size_ == 16) {
- starting_sz = TX_4X4;
- ending_sz = TX_16X16;
- } else {
- starting_sz = TX_32X32;
- ending_sz = TX_32X32;
- }
-
- for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
- // zbin > coeff, zbin < coeff.
- for (int i = 0; i < 2; ++i) {
- // TX_TYPE defines the scan order. That is not relevant to the speed test.
- // Pick the first one.
- const TX_TYPE tx_type = DCT_DCT;
- count_ = (4 << sz) * (4 << sz);
- scan_ = &vp9_scan_orders[sz][tx_type];
-
- GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
- quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
- quant_fp_ptr_);
-
- if (i == 0) {
- // When |coeff values| are less than zbin the results are 0.
- int threshold = 100;
- if (max_size_ == 32) {
- // For 32x32, the threshold is halved. Double it to keep the values
- // from clearing it.
- threshold = 200;
- }
- for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
- coeff_.Set(&rnd, -99, 99);
- } else if (i == 1) {
- for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
- coeff_.Set(&rnd, -500, 500);
- }
+TEST_P(VP9QuantizeTest, DISABLED_Speed) { Speed(false); }
- RunNTimes(10000000 / count_);
- const char *type =
- (i == 0) ? "Bypass calculations " : "Full calculations ";
- char block_size[16];
- snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz);
- char title[100];
- snprintf(title, sizeof(title), "%25s %8s ", type, block_size);
- PrintMedian(title);
- }
- }
-}
+TEST_P(VP9QuantizeTest, DISABLED_SpeedMedian) { Speed(true); }
using std::make_tuple;
@@ -467,6 +512,8 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(
make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c, VPX_BITS_8, 16,
false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
VPX_BITS_8, 16, false),
make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
@@ -492,7 +539,6 @@ INSTANTIATE_TEST_SUITE_P(
#endif // HAVE_SSE2
#if HAVE_SSSE3
-#if VPX_ARCH_X86_64
INSTANTIATE_TEST_SUITE_P(
SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
@@ -506,16 +552,6 @@ INSTANTIATE_TEST_SUITE_P(
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
&QuantFPWrapper<quantize_fp_32x32_nz_c>,
VPX_BITS_8, 32, true)));
-#else
-INSTANTIATE_TEST_SUITE_P(
- SSSE3, VP9QuantizeTest,
- ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
- VPX_BITS_8, 16, false),
- make_tuple(&vpx_quantize_b_32x32_ssse3,
- &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
- false)));
-
-#endif // VPX_ARCH_X86_64
#endif // HAVE_SSSE3
#if HAVE_AVX
@@ -529,14 +565,78 @@ INSTANTIATE_TEST_SUITE_P(AVX, VP9QuantizeTest,
#endif // HAVE_AVX
#if VPX_ARCH_X86_64 && HAVE_AVX2
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+ AVX2, VP9QuantizeTest,
+ ::testing::Values(
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
+ &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
+ make_tuple(&QuantFPWrapper<vp9_highbd_quantize_fp_avx2>,
+ &QuantFPWrapper<vp9_highbd_quantize_fp_c>, VPX_BITS_12, 16,
+ true),
+ make_tuple(&QuantFPWrapper<vp9_highbd_quantize_fp_32x32_avx2>,
+ &QuantFPWrapper<vp9_highbd_quantize_fp_32x32_c>, VPX_BITS_12,
+ 32, true),
+ make_tuple(&vpx_quantize_b_avx2, &vpx_quantize_b_c, VPX_BITS_8, 16,
+ false),
+ make_tuple(&vpx_highbd_quantize_b_avx2, &vpx_highbd_quantize_b_c,
+ VPX_BITS_8, 16, false),
+ make_tuple(&vpx_highbd_quantize_b_avx2, &vpx_highbd_quantize_b_c,
+ VPX_BITS_10, 16, false),
+ make_tuple(&vpx_highbd_quantize_b_avx2, &vpx_highbd_quantize_b_c,
+ VPX_BITS_12, 16, false),
+ make_tuple(&vpx_quantize_b_32x32_avx2, &vpx_quantize_b_32x32_c,
+ VPX_BITS_8, 32, false),
+ make_tuple(&vpx_highbd_quantize_b_32x32_avx2,
+ &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
+ make_tuple(&vpx_highbd_quantize_b_32x32_avx2,
+ &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
+ make_tuple(&vpx_highbd_quantize_b_32x32_avx2,
+ &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));
+#else
INSTANTIATE_TEST_SUITE_P(
AVX2, VP9QuantizeTest,
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
&QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
- 16, true)));
+ 16, true),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_avx2>,
+ &QuantFPWrapper<quantize_fp_32x32_nz_c>,
+ VPX_BITS_8, 32, true),
+ make_tuple(&vpx_quantize_b_avx2, &vpx_quantize_b_c,
+ VPX_BITS_8, 16, false),
+ make_tuple(&vpx_quantize_b_32x32_avx2,
+ &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
+ false)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_AVX2
#if HAVE_NEON
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_SUITE_P(
+ NEON, VP9QuantizeTest,
+ ::testing::Values(
+ make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, VPX_BITS_8, 16,
+ false),
+ make_tuple(&vpx_highbd_quantize_b_neon, &vpx_highbd_quantize_b_c,
+ VPX_BITS_8, 16, false),
+ make_tuple(&vpx_highbd_quantize_b_neon, &vpx_highbd_quantize_b_c,
+ VPX_BITS_10, 16, false),
+ make_tuple(&vpx_highbd_quantize_b_neon, &vpx_highbd_quantize_b_c,
+ VPX_BITS_12, 16, false),
+ make_tuple(&vpx_quantize_b_32x32_neon, &vpx_quantize_b_32x32_c,
+ VPX_BITS_8, 32, false),
+ make_tuple(&vpx_highbd_quantize_b_32x32_neon,
+ &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
+ make_tuple(&vpx_highbd_quantize_b_32x32_neon,
+ &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
+ make_tuple(&vpx_highbd_quantize_b_32x32_neon,
+ &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
+ &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
+ make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
+ &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
+ true)));
+#else
INSTANTIATE_TEST_SUITE_P(
NEON, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
@@ -550,6 +650,7 @@ INSTANTIATE_TEST_SUITE_P(
make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
VPX_BITS_8, 32, true)));
+#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON
#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH