aboutsummaryrefslogtreecommitdiff
path: root/libvpx/vp9
diff options
context:
space:
mode:
Diffstat (limited to 'libvpx/vp9')
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c445
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c181
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c345
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c279
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c229
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c546
-rw-r--r--libvpx/vp9/common/arm/neon/vp9_iht_neon.h272
-rw-r--r--libvpx/vp9/common/ppc/vp9_idct_vsx.c115
-rw-r--r--libvpx/vp9/common/vp9_alloccommon.h6
-rw-r--r--libvpx/vp9/common/vp9_blockd.h11
-rw-r--r--libvpx/vp9/common/vp9_common.h6
-rw-r--r--libvpx/vp9/common/vp9_common_data.c2
-rw-r--r--libvpx/vp9/common/vp9_common_data.h6
-rw-r--r--libvpx/vp9/common/vp9_entropy.c2
-rw-r--r--libvpx/vp9/common/vp9_entropy.h7
-rw-r--r--libvpx/vp9/common/vp9_entropymode.c3
-rw-r--r--libvpx/vp9/common/vp9_entropymode.h6
-rw-r--r--libvpx/vp9/common/vp9_entropymv.c4
-rw-r--r--libvpx/vp9/common/vp9_entropymv.h10
-rw-r--r--libvpx/vp9/common/vp9_enums.h6
-rw-r--r--libvpx/vp9/common/vp9_filter.c18
-rw-r--r--libvpx/vp9/common/vp9_filter.h9
-rw-r--r--libvpx/vp9/common/vp9_frame_buffers.h6
-rw-r--r--libvpx/vp9/common/vp9_idct.h6
-rw-r--r--libvpx/vp9/common/vp9_loopfilter.c24
-rw-r--r--libvpx/vp9/common/vp9_loopfilter.h10
-rw-r--r--libvpx/vp9/common/vp9_mfqe.h6
-rw-r--r--libvpx/vp9/common/vp9_mv.h6
-rw-r--r--libvpx/vp9/common/vp9_mvref_common.h6
-rw-r--r--libvpx/vp9/common/vp9_onyxc_int.h24
-rw-r--r--libvpx/vp9/common/vp9_postproc.c4
-rw-r--r--libvpx/vp9/common/vp9_postproc.h8
-rw-r--r--libvpx/vp9/common/vp9_ppflags.h6
-rw-r--r--libvpx/vp9/common/vp9_pred_common.c31
-rw-r--r--libvpx/vp9/common/vp9_pred_common.h16
-rw-r--r--libvpx/vp9/common/vp9_quant_common.h6
-rw-r--r--libvpx/vp9/common/vp9_reconinter.h12
-rw-r--r--libvpx/vp9/common/vp9_reconintra.h6
-rw-r--r--libvpx/vp9/common/vp9_rtcd_defs.pl28
-rw-r--r--libvpx/vp9/common/vp9_scale.h8
-rw-r--r--libvpx/vp9/common/vp9_scan.h6
-rw-r--r--libvpx/vp9/common/vp9_seg_common.h6
-rw-r--r--libvpx/vp9/common/vp9_thread_common.c214
-rw-r--r--libvpx/vp9/common/vp9_thread_common.h29
-rw-r--r--libvpx/vp9/common/vp9_tile_common.h6
-rw-r--r--libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c419
-rw-r--r--libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c131
-rw-r--r--libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c255
-rw-r--r--libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c40
-rw-r--r--libvpx/vp9/decoder/vp9_decodeframe.c557
-rw-r--r--libvpx/vp9/decoder/vp9_decodeframe.h6
-rw-r--r--libvpx/vp9/decoder/vp9_decodemv.h6
-rw-r--r--libvpx/vp9/decoder/vp9_decoder.c122
-rw-r--r--libvpx/vp9/decoder/vp9_decoder.h30
-rw-r--r--libvpx/vp9/decoder/vp9_detokenize.h6
-rw-r--r--libvpx/vp9/decoder/vp9_dsubexp.h6
-rw-r--r--libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c8
-rw-r--r--libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c26
-rw-r--r--libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h6
-rw-r--r--libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c292
-rw-r--r--libvpx/vp9/encoder/vp9_alt_ref_aq.h6
-rw-r--r--libvpx/vp9/encoder/vp9_aq_360.h6
-rw-r--r--libvpx/vp9/encoder/vp9_aq_complexity.h6
-rw-r--r--libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c133
-rw-r--r--libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h10
-rw-r--r--libvpx/vp9/encoder/vp9_aq_variance.c35
-rw-r--r--libvpx/vp9/encoder/vp9_aq_variance.h10
-rw-r--r--libvpx/vp9/encoder/vp9_bitstream.c65
-rw-r--r--libvpx/vp9/encoder/vp9_bitstream.h14
-rw-r--r--libvpx/vp9/encoder/vp9_block.h20
-rw-r--r--libvpx/vp9/encoder/vp9_blockiness.c1
-rw-r--r--libvpx/vp9/encoder/vp9_blockiness.h26
-rw-r--r--libvpx/vp9/encoder/vp9_context_tree.c28
-rw-r--r--libvpx/vp9/encoder/vp9_context_tree.h9
-rw-r--r--libvpx/vp9/encoder/vp9_cost.h6
-rw-r--r--libvpx/vp9/encoder/vp9_denoiser.c224
-rw-r--r--libvpx/vp9/encoder/vp9_denoiser.h30
-rw-r--r--libvpx/vp9/encoder/vp9_encodeframe.c1540
-rw-r--r--libvpx/vp9/encoder/vp9_encodeframe.h6
-rw-r--r--libvpx/vp9/encoder/vp9_encodemb.c97
-rw-r--r--libvpx/vp9/encoder/vp9_encodemb.h6
-rw-r--r--libvpx/vp9/encoder/vp9_encodemv.h8
-rw-r--r--libvpx/vp9/encoder/vp9_encoder.c2130
-rw-r--r--libvpx/vp9/encoder/vp9_encoder.h198
-rw-r--r--libvpx/vp9/encoder/vp9_ethread.c69
-rw-r--r--libvpx/vp9/encoder/vp9_ethread.h10
-rw-r--r--libvpx/vp9/encoder/vp9_extend.h6
-rw-r--r--libvpx/vp9/encoder/vp9_firstpass.c941
-rw-r--r--libvpx/vp9/encoder/vp9_firstpass.h40
-rw-r--r--libvpx/vp9/encoder/vp9_job_queue.h6
-rw-r--r--libvpx/vp9/encoder/vp9_lookahead.h10
-rw-r--r--libvpx/vp9/encoder/vp9_mbgraph.c5
-rw-r--r--libvpx/vp9/encoder/vp9_mbgraph.h10
-rw-r--r--libvpx/vp9/encoder/vp9_mcomp.c664
-rw-r--r--libvpx/vp9/encoder/vp9_mcomp.h46
-rw-r--r--libvpx/vp9/encoder/vp9_multi_thread.c50
-rw-r--r--libvpx/vp9/encoder/vp9_multi_thread.h9
-rw-r--r--libvpx/vp9/encoder/vp9_noise_estimate.c10
-rw-r--r--libvpx/vp9/encoder/vp9_noise_estimate.h6
-rw-r--r--libvpx/vp9/encoder/vp9_partition_models.h1172
-rw-r--r--libvpx/vp9/encoder/vp9_picklpf.c10
-rw-r--r--libvpx/vp9/encoder/vp9_picklpf.h6
-rw-r--r--libvpx/vp9/encoder/vp9_pickmode.c785
-rw-r--r--libvpx/vp9/encoder/vp9_pickmode.h6
-rw-r--r--libvpx/vp9/encoder/vp9_quantize.c18
-rw-r--r--libvpx/vp9/encoder/vp9_quantize.h6
-rw-r--r--libvpx/vp9/encoder/vp9_ratectrl.c1166
-rw-r--r--libvpx/vp9/encoder/vp9_ratectrl.h42
-rw-r--r--libvpx/vp9/encoder/vp9_rd.c127
-rw-r--r--libvpx/vp9/encoder/vp9_rd.h22
-rw-r--r--libvpx/vp9/encoder/vp9_rdopt.c313
-rw-r--r--libvpx/vp9/encoder/vp9_rdopt.h6
-rw-r--r--libvpx/vp9/encoder/vp9_resize.c12
-rw-r--r--libvpx/vp9/encoder/vp9_resize.h6
-rw-r--r--libvpx/vp9/encoder/vp9_segmentation.c13
-rw-r--r--libvpx/vp9/encoder/vp9_segmentation.h8
-rw-r--r--libvpx/vp9/encoder/vp9_skin_detection.h6
-rw-r--r--libvpx/vp9/encoder/vp9_speed_features.c260
-rw-r--r--libvpx/vp9/encoder/vp9_speed_features.h125
-rw-r--r--libvpx/vp9/encoder/vp9_subexp.h6
-rw-r--r--libvpx/vp9/encoder/vp9_svc_layercontext.c728
-rw-r--r--libvpx/vp9/encoder/vp9_svc_layercontext.h126
-rw-r--r--libvpx/vp9/encoder/vp9_temporal_filter.c645
-rw-r--r--libvpx/vp9/encoder/vp9_temporal_filter.h21
-rw-r--r--libvpx/vp9/encoder/vp9_tokenize.h6
-rw-r--r--libvpx/vp9/encoder/vp9_treewriter.h6
-rw-r--r--libvpx/vp9/encoder/x86/temporal_filter_sse4.c2
-rw-r--r--libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c59
-rw-r--r--libvpx/vp9/encoder/x86/vp9_dct_ssse3.c24
-rw-r--r--libvpx/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c17
-rw-r--r--libvpx/vp9/encoder/x86/vp9_quantize_avx2.c139
-rw-r--r--libvpx/vp9/encoder/x86/vp9_quantize_sse2.c16
-rw-r--r--libvpx/vp9/vp9_common.mk39
-rw-r--r--libvpx/vp9/vp9_cx_iface.c269
-rw-r--r--libvpx/vp9/vp9_dx_iface.c39
-rw-r--r--libvpx/vp9/vp9_dx_iface.h8
-rw-r--r--libvpx/vp9/vp9_iface_common.h12
-rw-r--r--libvpx/vp9/vp9cx.mk5
138 files changed, 13738 insertions, 3926 deletions
diff --git a/libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c b/libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c
new file mode 100644
index 000000000..057d2e9c0
--- /dev/null
+++ b/libvpx/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vp9/common/vp9_enums.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/highbd_idct_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+#include "vpx_dsp/inv_txfm.h"
+
+// Use macros to make sure argument lane is passed in as an constant integer.
+
+#define vmull_lane_s32_dual(in, c, lane, out) \
+ do { \
+ out[0].val[0] = vmull_lane_s32(vget_low_s32(in.val[0]), c, lane); \
+ out[0].val[1] = vmull_lane_s32(vget_low_s32(in.val[1]), c, lane); \
+ out[1].val[0] = vmull_lane_s32(vget_high_s32(in.val[0]), c, lane); \
+ out[1].val[1] = vmull_lane_s32(vget_high_s32(in.val[1]), c, lane); \
+ } while (0)
+
+#define vmlal_lane_s32_dual(in, c, lane, out) \
+ do { \
+ out[0].val[0] = \
+ vmlal_lane_s32(out[0].val[0], vget_low_s32(in.val[0]), c, lane); \
+ out[0].val[1] = \
+ vmlal_lane_s32(out[0].val[1], vget_low_s32(in.val[1]), c, lane); \
+ out[1].val[0] = \
+ vmlal_lane_s32(out[1].val[0], vget_high_s32(in.val[0]), c, lane); \
+ out[1].val[1] = \
+ vmlal_lane_s32(out[1].val[1], vget_high_s32(in.val[1]), c, lane); \
+ } while (0)
+
+#define vmlsl_lane_s32_dual(in, c, lane, out) \
+ do { \
+ out[0].val[0] = \
+ vmlsl_lane_s32(out[0].val[0], vget_low_s32(in.val[0]), c, lane); \
+ out[0].val[1] = \
+ vmlsl_lane_s32(out[0].val[1], vget_low_s32(in.val[1]), c, lane); \
+ out[1].val[0] = \
+ vmlsl_lane_s32(out[1].val[0], vget_high_s32(in.val[0]), c, lane); \
+ out[1].val[1] = \
+ vmlsl_lane_s32(out[1].val[1], vget_high_s32(in.val[1]), c, lane); \
+ } while (0)
+
+static INLINE int32x4x2_t
+highbd_dct_const_round_shift_low_8(const int64x2x2_t *const in) {
+ int32x4x2_t out;
+ out.val[0] = vcombine_s32(vrshrn_n_s64(in[0].val[0], DCT_CONST_BITS),
+ vrshrn_n_s64(in[1].val[0], DCT_CONST_BITS));
+ out.val[1] = vcombine_s32(vrshrn_n_s64(in[0].val[1], DCT_CONST_BITS),
+ vrshrn_n_s64(in[1].val[1], DCT_CONST_BITS));
+ return out;
+}
+
+#define highbd_iadst_half_butterfly(in, c, lane, out) \
+ do { \
+ int64x2x2_t t[2]; \
+ vmull_lane_s32_dual(in, c, lane, t); \
+ out = highbd_dct_const_round_shift_low_8(t); \
+ } while (0)
+
+#define highbd_iadst_butterfly(in0, in1, c, lane0, lane1, s0, s1) \
+ do { \
+ vmull_lane_s32_dual(in0, c, lane0, s0); \
+ vmull_lane_s32_dual(in0, c, lane1, s1); \
+ vmlal_lane_s32_dual(in1, c, lane1, s0); \
+ vmlsl_lane_s32_dual(in1, c, lane0, s1); \
+ } while (0)
+
+static INLINE int32x4x2_t vaddq_s32_dual(const int32x4x2_t in0,
+ const int32x4x2_t in1) {
+ int32x4x2_t out;
+ out.val[0] = vaddq_s32(in0.val[0], in1.val[0]);
+ out.val[1] = vaddq_s32(in0.val[1], in1.val[1]);
+ return out;
+}
+
+static INLINE int64x2x2_t vaddq_s64_dual(const int64x2x2_t in0,
+ const int64x2x2_t in1) {
+ int64x2x2_t out;
+ out.val[0] = vaddq_s64(in0.val[0], in1.val[0]);
+ out.val[1] = vaddq_s64(in0.val[1], in1.val[1]);
+ return out;
+}
+
+static INLINE int32x4x2_t vsubq_s32_dual(const int32x4x2_t in0,
+ const int32x4x2_t in1) {
+ int32x4x2_t out;
+ out.val[0] = vsubq_s32(in0.val[0], in1.val[0]);
+ out.val[1] = vsubq_s32(in0.val[1], in1.val[1]);
+ return out;
+}
+
+static INLINE int64x2x2_t vsubq_s64_dual(const int64x2x2_t in0,
+ const int64x2x2_t in1) {
+ int64x2x2_t out;
+ out.val[0] = vsubq_s64(in0.val[0], in1.val[0]);
+ out.val[1] = vsubq_s64(in0.val[1], in1.val[1]);
+ return out;
+}
+
+static INLINE int32x4x2_t vcombine_s32_dual(const int32x2x2_t in0,
+ const int32x2x2_t in1) {
+ int32x4x2_t out;
+ out.val[0] = vcombine_s32(in0.val[0], in1.val[0]);
+ out.val[1] = vcombine_s32(in0.val[1], in1.val[1]);
+ return out;
+}
+
+static INLINE int32x4x2_t highbd_add_dct_const_round_shift_low_8(
+ const int64x2x2_t *const in0, const int64x2x2_t *const in1) {
+ const int64x2x2_t sum_lo = vaddq_s64_dual(in0[0], in1[0]);
+ const int64x2x2_t sum_hi = vaddq_s64_dual(in0[1], in1[1]);
+ int32x2x2_t out_lo, out_hi;
+
+ out_lo.val[0] = vrshrn_n_s64(sum_lo.val[0], DCT_CONST_BITS);
+ out_lo.val[1] = vrshrn_n_s64(sum_lo.val[1], DCT_CONST_BITS);
+ out_hi.val[0] = vrshrn_n_s64(sum_hi.val[0], DCT_CONST_BITS);
+ out_hi.val[1] = vrshrn_n_s64(sum_hi.val[1], DCT_CONST_BITS);
+ return vcombine_s32_dual(out_lo, out_hi);
+}
+
+static INLINE int32x4x2_t highbd_sub_dct_const_round_shift_low_8(
+ const int64x2x2_t *const in0, const int64x2x2_t *const in1) {
+ const int64x2x2_t sub_lo = vsubq_s64_dual(in0[0], in1[0]);
+ const int64x2x2_t sub_hi = vsubq_s64_dual(in0[1], in1[1]);
+ int32x2x2_t out_lo, out_hi;
+
+ out_lo.val[0] = vrshrn_n_s64(sub_lo.val[0], DCT_CONST_BITS);
+ out_lo.val[1] = vrshrn_n_s64(sub_lo.val[1], DCT_CONST_BITS);
+ out_hi.val[0] = vrshrn_n_s64(sub_hi.val[0], DCT_CONST_BITS);
+ out_hi.val[1] = vrshrn_n_s64(sub_hi.val[1], DCT_CONST_BITS);
+ return vcombine_s32_dual(out_lo, out_hi);
+}
+
+static INLINE int32x4x2_t vnegq_s32_dual(const int32x4x2_t in) {
+ int32x4x2_t out;
+ out.val[0] = vnegq_s32(in.val[0]);
+ out.val[1] = vnegq_s32(in.val[1]);
+ return out;
+}
+
+void vpx_highbd_iadst16_neon(const int32_t *input, int32_t *output,
+ uint16_t *dest, const int stride, const int bd) {
+ const int32x4_t c_1_31_5_27 =
+ create_s32x4_neon(cospi_1_64, cospi_31_64, cospi_5_64, cospi_27_64);
+ const int32x4_t c_9_23_13_19 =
+ create_s32x4_neon(cospi_9_64, cospi_23_64, cospi_13_64, cospi_19_64);
+ const int32x4_t c_17_15_21_11 =
+ create_s32x4_neon(cospi_17_64, cospi_15_64, cospi_21_64, cospi_11_64);
+ const int32x4_t c_25_7_29_3 =
+ create_s32x4_neon(cospi_25_64, cospi_7_64, cospi_29_64, cospi_3_64);
+ const int32x4_t c_4_28_20_12 =
+ create_s32x4_neon(cospi_4_64, cospi_28_64, cospi_20_64, cospi_12_64);
+ const int32x4_t c_16_n16_8_24 =
+ create_s32x4_neon(cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64);
+ int32x4x2_t in[16], out[16];
+ int32x4x2_t x[16], t[12];
+ int64x2x2_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
+ int64x2x2_t s8[2], s9[2], s10[2], s11[2], s12[2], s13[2], s14[2], s15[2];
+
+ // Load input (16x8)
+ in[0].val[0] = vld1q_s32(input);
+ in[0].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[8].val[0] = vld1q_s32(input);
+ in[8].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[1].val[0] = vld1q_s32(input);
+ in[1].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[9].val[0] = vld1q_s32(input);
+ in[9].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[2].val[0] = vld1q_s32(input);
+ in[2].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[10].val[0] = vld1q_s32(input);
+ in[10].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[3].val[0] = vld1q_s32(input);
+ in[3].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[11].val[0] = vld1q_s32(input);
+ in[11].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[4].val[0] = vld1q_s32(input);
+ in[4].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[12].val[0] = vld1q_s32(input);
+ in[12].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[5].val[0] = vld1q_s32(input);
+ in[5].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[13].val[0] = vld1q_s32(input);
+ in[13].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[6].val[0] = vld1q_s32(input);
+ in[6].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[14].val[0] = vld1q_s32(input);
+ in[14].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[7].val[0] = vld1q_s32(input);
+ in[7].val[1] = vld1q_s32(input + 4);
+ input += 8;
+ in[15].val[0] = vld1q_s32(input);
+ in[15].val[1] = vld1q_s32(input + 4);
+
+ // Transpose
+ transpose_s32_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6],
+ &in[7]);
+ transpose_s32_8x8(&in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14],
+ &in[15]);
+
+ x[0] = in[15];
+ x[1] = in[0];
+ x[2] = in[13];
+ x[3] = in[2];
+ x[4] = in[11];
+ x[5] = in[4];
+ x[6] = in[9];
+ x[7] = in[6];
+ x[8] = in[7];
+ x[9] = in[8];
+ x[10] = in[5];
+ x[11] = in[10];
+ x[12] = in[3];
+ x[13] = in[12];
+ x[14] = in[1];
+ x[15] = in[14];
+
+ // stage 1
+ highbd_iadst_butterfly(x[0], x[1], vget_low_s32(c_1_31_5_27), 0, 1, s0, s1);
+ highbd_iadst_butterfly(x[2], x[3], vget_high_s32(c_1_31_5_27), 0, 1, s2, s3);
+ highbd_iadst_butterfly(x[4], x[5], vget_low_s32(c_9_23_13_19), 0, 1, s4, s5);
+ highbd_iadst_butterfly(x[6], x[7], vget_high_s32(c_9_23_13_19), 0, 1, s6, s7);
+ highbd_iadst_butterfly(x[8], x[9], vget_low_s32(c_17_15_21_11), 0, 1, s8, s9);
+ highbd_iadst_butterfly(x[10], x[11], vget_high_s32(c_17_15_21_11), 0, 1, s10,
+ s11);
+ highbd_iadst_butterfly(x[12], x[13], vget_low_s32(c_25_7_29_3), 0, 1, s12,
+ s13);
+ highbd_iadst_butterfly(x[14], x[15], vget_high_s32(c_25_7_29_3), 0, 1, s14,
+ s15);
+
+ x[0] = highbd_add_dct_const_round_shift_low_8(s0, s8);
+ x[1] = highbd_add_dct_const_round_shift_low_8(s1, s9);
+ x[2] = highbd_add_dct_const_round_shift_low_8(s2, s10);
+ x[3] = highbd_add_dct_const_round_shift_low_8(s3, s11);
+ x[4] = highbd_add_dct_const_round_shift_low_8(s4, s12);
+ x[5] = highbd_add_dct_const_round_shift_low_8(s5, s13);
+ x[6] = highbd_add_dct_const_round_shift_low_8(s6, s14);
+ x[7] = highbd_add_dct_const_round_shift_low_8(s7, s15);
+ x[8] = highbd_sub_dct_const_round_shift_low_8(s0, s8);
+ x[9] = highbd_sub_dct_const_round_shift_low_8(s1, s9);
+ x[10] = highbd_sub_dct_const_round_shift_low_8(s2, s10);
+ x[11] = highbd_sub_dct_const_round_shift_low_8(s3, s11);
+ x[12] = highbd_sub_dct_const_round_shift_low_8(s4, s12);
+ x[13] = highbd_sub_dct_const_round_shift_low_8(s5, s13);
+ x[14] = highbd_sub_dct_const_round_shift_low_8(s6, s14);
+ x[15] = highbd_sub_dct_const_round_shift_low_8(s7, s15);
+
+ // stage 2
+ t[0] = x[0];
+ t[1] = x[1];
+ t[2] = x[2];
+ t[3] = x[3];
+ t[4] = x[4];
+ t[5] = x[5];
+ t[6] = x[6];
+ t[7] = x[7];
+ highbd_iadst_butterfly(x[8], x[9], vget_low_s32(c_4_28_20_12), 0, 1, s8, s9);
+ highbd_iadst_butterfly(x[10], x[11], vget_high_s32(c_4_28_20_12), 0, 1, s10,
+ s11);
+ highbd_iadst_butterfly(x[13], x[12], vget_low_s32(c_4_28_20_12), 1, 0, s13,
+ s12);
+ highbd_iadst_butterfly(x[15], x[14], vget_high_s32(c_4_28_20_12), 1, 0, s15,
+ s14);
+
+ x[0] = vaddq_s32_dual(t[0], t[4]);
+ x[1] = vaddq_s32_dual(t[1], t[5]);
+ x[2] = vaddq_s32_dual(t[2], t[6]);
+ x[3] = vaddq_s32_dual(t[3], t[7]);
+ x[4] = vsubq_s32_dual(t[0], t[4]);
+ x[5] = vsubq_s32_dual(t[1], t[5]);
+ x[6] = vsubq_s32_dual(t[2], t[6]);
+ x[7] = vsubq_s32_dual(t[3], t[7]);
+ x[8] = highbd_add_dct_const_round_shift_low_8(s8, s12);
+ x[9] = highbd_add_dct_const_round_shift_low_8(s9, s13);
+ x[10] = highbd_add_dct_const_round_shift_low_8(s10, s14);
+ x[11] = highbd_add_dct_const_round_shift_low_8(s11, s15);
+ x[12] = highbd_sub_dct_const_round_shift_low_8(s8, s12);
+ x[13] = highbd_sub_dct_const_round_shift_low_8(s9, s13);
+ x[14] = highbd_sub_dct_const_round_shift_low_8(s10, s14);
+ x[15] = highbd_sub_dct_const_round_shift_low_8(s11, s15);
+
+ // stage 3
+ t[0] = x[0];
+ t[1] = x[1];
+ t[2] = x[2];
+ t[3] = x[3];
+ highbd_iadst_butterfly(x[4], x[5], vget_high_s32(c_16_n16_8_24), 0, 1, s4,
+ s5);
+ highbd_iadst_butterfly(x[7], x[6], vget_high_s32(c_16_n16_8_24), 1, 0, s7,
+ s6);
+ t[8] = x[8];
+ t[9] = x[9];
+ t[10] = x[10];
+ t[11] = x[11];
+ highbd_iadst_butterfly(x[12], x[13], vget_high_s32(c_16_n16_8_24), 0, 1, s12,
+ s13);
+ highbd_iadst_butterfly(x[15], x[14], vget_high_s32(c_16_n16_8_24), 1, 0, s15,
+ s14);
+
+ x[0] = vaddq_s32_dual(t[0], t[2]);
+ x[1] = vaddq_s32_dual(t[1], t[3]);
+ x[2] = vsubq_s32_dual(t[0], t[2]);
+ x[3] = vsubq_s32_dual(t[1], t[3]);
+ x[4] = highbd_add_dct_const_round_shift_low_8(s4, s6);
+ x[5] = highbd_add_dct_const_round_shift_low_8(s5, s7);
+ x[6] = highbd_sub_dct_const_round_shift_low_8(s4, s6);
+ x[7] = highbd_sub_dct_const_round_shift_low_8(s5, s7);
+ x[8] = vaddq_s32_dual(t[8], t[10]);
+ x[9] = vaddq_s32_dual(t[9], t[11]);
+ x[10] = vsubq_s32_dual(t[8], t[10]);
+ x[11] = vsubq_s32_dual(t[9], t[11]);
+ x[12] = highbd_add_dct_const_round_shift_low_8(s12, s14);
+ x[13] = highbd_add_dct_const_round_shift_low_8(s13, s15);
+ x[14] = highbd_sub_dct_const_round_shift_low_8(s12, s14);
+ x[15] = highbd_sub_dct_const_round_shift_low_8(s13, s15);
+
+ // stage 4
+ {
+ const int32x4x2_t sum = vaddq_s32_dual(x[2], x[3]);
+ const int32x4x2_t sub = vsubq_s32_dual(x[2], x[3]);
+ highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 1, x[2]);
+ highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[3]);
+ }
+ {
+ const int32x4x2_t sum = vaddq_s32_dual(x[7], x[6]);
+ const int32x4x2_t sub = vsubq_s32_dual(x[7], x[6]);
+ highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 0, x[6]);
+ highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[7]);
+ }
+ {
+ const int32x4x2_t sum = vaddq_s32_dual(x[11], x[10]);
+ const int32x4x2_t sub = vsubq_s32_dual(x[11], x[10]);
+ highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 0, x[10]);
+ highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[11]);
+ }
+ {
+ const int32x4x2_t sum = vaddq_s32_dual(x[14], x[15]);
+ const int32x4x2_t sub = vsubq_s32_dual(x[14], x[15]);
+ highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 1, x[14]);
+ highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[15]);
+ }
+
+ out[0] = x[0];
+ out[1] = vnegq_s32_dual(x[8]);
+ out[2] = x[12];
+ out[3] = vnegq_s32_dual(x[4]);
+ out[4] = x[6];
+ out[5] = x[14];
+ out[6] = x[10];
+ out[7] = x[2];
+ out[8] = x[3];
+ out[9] = x[11];
+ out[10] = x[15];
+ out[11] = x[7];
+ out[12] = x[5];
+ out[13] = vnegq_s32_dual(x[13]);
+ out[14] = x[9];
+ out[15] = vnegq_s32_dual(x[1]);
+
+ if (output) {
+ highbd_idct16x16_store_pass1(out, output);
+ } else {
+ highbd_idct16x16_add_store(out, dest, stride, bd);
+ }
+}
+
+typedef void (*highbd_iht_1d)(const int32_t *input, int32_t *output,
+ uint16_t *dest, const int stride, const int bd);
+
+typedef struct {
+ highbd_iht_1d cols, rows; // vertical and horizontal
+} highbd_iht_2d;
+
+void vp9_highbd_iht16x16_256_add_neon(const tran_low_t *input, uint16_t *dest,
+ int stride, int tx_type, int bd) {
+ if (bd == 8) {
+ static const iht_2d IHT_16[] = {
+ { vpx_idct16x16_256_add_half1d,
+ vpx_idct16x16_256_add_half1d }, // DCT_DCT = 0
+ { vpx_iadst16x16_256_add_half1d,
+ vpx_idct16x16_256_add_half1d }, // ADST_DCT = 1
+ { vpx_idct16x16_256_add_half1d,
+ vpx_iadst16x16_256_add_half1d }, // DCT_ADST = 2
+ { vpx_iadst16x16_256_add_half1d,
+ vpx_iadst16x16_256_add_half1d } // ADST_ADST = 3
+ };
+ const iht_2d ht = IHT_16[tx_type];
+ int16_t row_output[16 * 16];
+
+ // pass 1
+ ht.rows(input, row_output, dest, stride, 1); // upper 8 rows
+ ht.rows(input + 8 * 16, row_output + 8, dest, stride, 1); // lower 8 rows
+
+ // pass 2
+ ht.cols(row_output, NULL, dest, stride, 1); // left 8 columns
+ ht.cols(row_output + 16 * 8, NULL, dest + 8, stride, 1); // right 8 columns
+ } else {
+ static const highbd_iht_2d IHT_16[] = {
+ { vpx_highbd_idct16x16_256_add_half1d,
+ vpx_highbd_idct16x16_256_add_half1d }, // DCT_DCT = 0
+ { vpx_highbd_iadst16_neon,
+ vpx_highbd_idct16x16_256_add_half1d }, // ADST_DCT = 1
+ { vpx_highbd_idct16x16_256_add_half1d,
+ vpx_highbd_iadst16_neon }, // DCT_ADST = 2
+ { vpx_highbd_iadst16_neon, vpx_highbd_iadst16_neon } // ADST_ADST = 3
+ };
+ const highbd_iht_2d ht = IHT_16[tx_type];
+ int32_t row_output[16 * 16];
+
+ // pass 1
+ ht.rows(input, row_output, dest, stride, bd); // upper 8 rows
+ ht.rows(input + 8 * 16, row_output + 8, dest, stride, bd); // lower 8 rows
+
+ // pass 2
+ ht.cols(row_output, NULL, dest, stride, bd); // left 8 columns
+ ht.cols(row_output + 8 * 16, NULL, dest + 8, stride,
+ bd); // right 8 columns
+ }
+}
diff --git a/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c b/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c
new file mode 100644
index 000000000..52c4f1937
--- /dev/null
+++ b/libvpx/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/highbd_idct_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/txfm_common.h"
+
+static INLINE void highbd_iadst4(int32x4_t *const io) {
+ const int32_t sinpis[4] = { sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9 };
+ const int32x4_t sinpi = vld1q_s32(sinpis);
+ int64x2x2_t s[7], t[4];
+ int32x4_t s7;
+
+ s[0].val[0] = vmull_lane_s32(vget_low_s32(io[0]), vget_low_s32(sinpi), 0);
+ s[0].val[1] = vmull_lane_s32(vget_high_s32(io[0]), vget_low_s32(sinpi), 0);
+ s[1].val[0] = vmull_lane_s32(vget_low_s32(io[0]), vget_low_s32(sinpi), 1);
+ s[1].val[1] = vmull_lane_s32(vget_high_s32(io[0]), vget_low_s32(sinpi), 1);
+ s[2].val[0] = vmull_lane_s32(vget_low_s32(io[1]), vget_high_s32(sinpi), 0);
+ s[2].val[1] = vmull_lane_s32(vget_high_s32(io[1]), vget_high_s32(sinpi), 0);
+ s[3].val[0] = vmull_lane_s32(vget_low_s32(io[2]), vget_high_s32(sinpi), 1);
+ s[3].val[1] = vmull_lane_s32(vget_high_s32(io[2]), vget_high_s32(sinpi), 1);
+ s[4].val[0] = vmull_lane_s32(vget_low_s32(io[2]), vget_low_s32(sinpi), 0);
+ s[4].val[1] = vmull_lane_s32(vget_high_s32(io[2]), vget_low_s32(sinpi), 0);
+ s[5].val[0] = vmull_lane_s32(vget_low_s32(io[3]), vget_low_s32(sinpi), 1);
+ s[5].val[1] = vmull_lane_s32(vget_high_s32(io[3]), vget_low_s32(sinpi), 1);
+ s[6].val[0] = vmull_lane_s32(vget_low_s32(io[3]), vget_high_s32(sinpi), 1);
+ s[6].val[1] = vmull_lane_s32(vget_high_s32(io[3]), vget_high_s32(sinpi), 1);
+ s7 = vsubq_s32(io[0], io[2]);
+ s7 = vaddq_s32(s7, io[3]);
+
+ s[0].val[0] = vaddq_s64(s[0].val[0], s[3].val[0]);
+ s[0].val[1] = vaddq_s64(s[0].val[1], s[3].val[1]);
+ s[0].val[0] = vaddq_s64(s[0].val[0], s[5].val[0]);
+ s[0].val[1] = vaddq_s64(s[0].val[1], s[5].val[1]);
+ s[1].val[0] = vsubq_s64(s[1].val[0], s[4].val[0]);
+ s[1].val[1] = vsubq_s64(s[1].val[1], s[4].val[1]);
+ s[1].val[0] = vsubq_s64(s[1].val[0], s[6].val[0]);
+ s[1].val[1] = vsubq_s64(s[1].val[1], s[6].val[1]);
+ s[3] = s[2];
+ s[2].val[0] = vmull_lane_s32(vget_low_s32(s7), vget_high_s32(sinpi), 0);
+ s[2].val[1] = vmull_lane_s32(vget_high_s32(s7), vget_high_s32(sinpi), 0);
+
+ t[0].val[0] = vaddq_s64(s[0].val[0], s[3].val[0]);
+ t[0].val[1] = vaddq_s64(s[0].val[1], s[3].val[1]);
+ t[1].val[0] = vaddq_s64(s[1].val[0], s[3].val[0]);
+ t[1].val[1] = vaddq_s64(s[1].val[1], s[3].val[1]);
+ t[2] = s[2];
+ t[3].val[0] = vaddq_s64(s[0].val[0], s[1].val[0]);
+ t[3].val[1] = vaddq_s64(s[0].val[1], s[1].val[1]);
+ t[3].val[0] = vsubq_s64(t[3].val[0], s[3].val[0]);
+ t[3].val[1] = vsubq_s64(t[3].val[1], s[3].val[1]);
+ io[0] = vcombine_s32(vrshrn_n_s64(t[0].val[0], DCT_CONST_BITS),
+ vrshrn_n_s64(t[0].val[1], DCT_CONST_BITS));
+ io[1] = vcombine_s32(vrshrn_n_s64(t[1].val[0], DCT_CONST_BITS),
+ vrshrn_n_s64(t[1].val[1], DCT_CONST_BITS));
+ io[2] = vcombine_s32(vrshrn_n_s64(t[2].val[0], DCT_CONST_BITS),
+ vrshrn_n_s64(t[2].val[1], DCT_CONST_BITS));
+ io[3] = vcombine_s32(vrshrn_n_s64(t[3].val[0], DCT_CONST_BITS),
+ vrshrn_n_s64(t[3].val[1], DCT_CONST_BITS));
+}
+
+void vp9_highbd_iht4x4_16_add_neon(const tran_low_t *input, uint16_t *dest,
+ int stride, int tx_type, int bd) {
+ const int16x8_t max = vdupq_n_s16((1 << bd) - 1);
+ int16x8_t a[2];
+ int32x4_t c[4];
+
+ c[0] = vld1q_s32(input);
+ c[1] = vld1q_s32(input + 4);
+ c[2] = vld1q_s32(input + 8);
+ c[3] = vld1q_s32(input + 12);
+
+ if (bd == 8) {
+ a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1]));
+ a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3]));
+ transpose_s16_4x4q(&a[0], &a[1]);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ transpose_s16_4x4q(&a[0], &a[1]);
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ break;
+
+ case ADST_DCT:
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ transpose_s16_4x4q(&a[0], &a[1]);
+ iadst4(a);
+ break;
+
+ case DCT_ADST:
+ iadst4(a);
+ transpose_s16_4x4q(&a[0], &a[1]);
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ break;
+
+ default:
+ assert(tx_type == ADST_ADST);
+ iadst4(a);
+ transpose_s16_4x4q(&a[0], &a[1]);
+ iadst4(a);
+ break;
+ }
+ a[0] = vrshrq_n_s16(a[0], 4);
+ a[1] = vrshrq_n_s16(a[1], 4);
+ } else {
+ switch (tx_type) {
+ case DCT_DCT: {
+ const int32x4_t cospis = vld1q_s32(kCospi32);
+
+ if (bd == 10) {
+ idct4x4_16_kernel_bd10(cospis, c);
+ idct4x4_16_kernel_bd10(cospis, c);
+ } else {
+ idct4x4_16_kernel_bd12(cospis, c);
+ idct4x4_16_kernel_bd12(cospis, c);
+ }
+ break;
+ }
+
+ case ADST_DCT: {
+ const int32x4_t cospis = vld1q_s32(kCospi32);
+
+ if (bd == 10) {
+ idct4x4_16_kernel_bd10(cospis, c);
+ } else {
+ idct4x4_16_kernel_bd12(cospis, c);
+ }
+ transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+ highbd_iadst4(c);
+ break;
+ }
+
+ case DCT_ADST: {
+ const int32x4_t cospis = vld1q_s32(kCospi32);
+
+ transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+ highbd_iadst4(c);
+ if (bd == 10) {
+ idct4x4_16_kernel_bd10(cospis, c);
+ } else {
+ idct4x4_16_kernel_bd12(cospis, c);
+ }
+ break;
+ }
+
+ default: {
+ assert(tx_type == ADST_ADST);
+ transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+ highbd_iadst4(c);
+ transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]);
+ highbd_iadst4(c);
+ break;
+ }
+ }
+ a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4));
+ a[1] = vcombine_s16(vqrshrn_n_s32(c[2], 4), vqrshrn_n_s32(c[3], 4));
+ }
+
+ highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max);
+ highbd_idct4x4_1_add_kernel1(&dest, stride, a[1], max);
+}
diff --git a/libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c b/libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c
new file mode 100644
index 000000000..2232c6841
--- /dev/null
+++ b/libvpx/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+
+#include "./vpx_dsp_rtcd.h"
+#include "vp9/common/vp9_enums.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/highbd_idct_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+#include "vpx_dsp/inv_txfm.h"
+
+static INLINE void highbd_iadst_half_butterfly_neon(int32x4_t *const x,
+ const int32x2_t c) {
+ const int32x4_t sum = vaddq_s32(x[0], x[1]);
+ const int32x4_t sub = vsubq_s32(x[0], x[1]);
+ const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(sum), c, 0);
+ const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(sub), c, 0);
+ const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(sum), c, 0);
+ const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(sub), c, 0);
+ const int32x2_t out0_lo = vrshrn_n_s64(t0_lo, DCT_CONST_BITS);
+ const int32x2_t out1_lo = vrshrn_n_s64(t1_lo, DCT_CONST_BITS);
+ const int32x2_t out0_hi = vrshrn_n_s64(t0_hi, DCT_CONST_BITS);
+ const int32x2_t out1_hi = vrshrn_n_s64(t1_hi, DCT_CONST_BITS);
+
+ x[0] = vcombine_s32(out0_lo, out0_hi);
+ x[1] = vcombine_s32(out1_lo, out1_hi);
+}
+
+static INLINE void highbd_iadst_butterfly_lane_0_1_neon(const int32x4_t in0,
+ const int32x4_t in1,
+ const int32x2_t c,
+ int64x2_t *const s0,
+ int64x2_t *const s1) {
+ const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(in0), c, 0);
+ const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(in0), c, 1);
+ const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(in0), c, 0);
+ const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(in0), c, 1);
+
+ s0[0] = vmlal_lane_s32(t0_lo, vget_low_s32(in1), c, 1);
+ s1[0] = vmlsl_lane_s32(t1_lo, vget_low_s32(in1), c, 0);
+ s0[1] = vmlal_lane_s32(t0_hi, vget_high_s32(in1), c, 1);
+ s1[1] = vmlsl_lane_s32(t1_hi, vget_high_s32(in1), c, 0);
+}
+
+static INLINE void highbd_iadst_butterfly_lane_1_0_neon(const int32x4_t in0,
+ const int32x4_t in1,
+ const int32x2_t c,
+ int64x2_t *const s0,
+ int64x2_t *const s1) {
+ const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(in0), c, 1);
+ const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(in0), c, 0);
+ const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(in0), c, 1);
+ const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(in0), c, 0);
+
+ s0[0] = vmlal_lane_s32(t0_lo, vget_low_s32(in1), c, 0);
+ s1[0] = vmlsl_lane_s32(t1_lo, vget_low_s32(in1), c, 1);
+ s0[1] = vmlal_lane_s32(t0_hi, vget_high_s32(in1), c, 0);
+ s1[1] = vmlsl_lane_s32(t1_hi, vget_high_s32(in1), c, 1);
+}
+
+static INLINE int32x4_t highbd_add_dct_const_round_shift_low_8(
+ const int64x2_t *const in0, const int64x2_t *const in1) {
+ const int64x2_t sum_lo = vaddq_s64(in0[0], in1[0]);
+ const int64x2_t sum_hi = vaddq_s64(in0[1], in1[1]);
+ const int32x2_t out_lo = vrshrn_n_s64(sum_lo, DCT_CONST_BITS);
+ const int32x2_t out_hi = vrshrn_n_s64(sum_hi, DCT_CONST_BITS);
+ return vcombine_s32(out_lo, out_hi);
+}
+
+static INLINE int32x4_t highbd_sub_dct_const_round_shift_low_8(
+ const int64x2_t *const in0, const int64x2_t *const in1) {
+ const int64x2_t sub_lo = vsubq_s64(in0[0], in1[0]);
+ const int64x2_t sub_hi = vsubq_s64(in0[1], in1[1]);
+ const int32x2_t out_lo = vrshrn_n_s64(sub_lo, DCT_CONST_BITS);
+ const int32x2_t out_hi = vrshrn_n_s64(sub_hi, DCT_CONST_BITS);
+ return vcombine_s32(out_lo, out_hi);
+}
+
+static INLINE void highbd_iadst8(int32x4_t *const io0, int32x4_t *const io1,
+ int32x4_t *const io2, int32x4_t *const io3,
+ int32x4_t *const io4, int32x4_t *const io5,
+ int32x4_t *const io6, int32x4_t *const io7) {
+ const int32x4_t c0 =
+ create_s32x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64);
+ const int32x4_t c1 =
+ create_s32x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64);
+ const int32x4_t c2 =
+ create_s32x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64);
+ int32x4_t x[8], t[4];
+ int64x2_t s[8][2];
+
+ x[0] = *io7;
+ x[1] = *io0;
+ x[2] = *io5;
+ x[3] = *io2;
+ x[4] = *io3;
+ x[5] = *io4;
+ x[6] = *io1;
+ x[7] = *io6;
+
+ // stage 1
+ highbd_iadst_butterfly_lane_0_1_neon(x[0], x[1], vget_low_s32(c0), s[0],
+ s[1]);
+ highbd_iadst_butterfly_lane_0_1_neon(x[2], x[3], vget_high_s32(c0), s[2],
+ s[3]);
+ highbd_iadst_butterfly_lane_0_1_neon(x[4], x[5], vget_low_s32(c1), s[4],
+ s[5]);
+ highbd_iadst_butterfly_lane_0_1_neon(x[6], x[7], vget_high_s32(c1), s[6],
+ s[7]);
+
+ x[0] = highbd_add_dct_const_round_shift_low_8(s[0], s[4]);
+ x[1] = highbd_add_dct_const_round_shift_low_8(s[1], s[5]);
+ x[2] = highbd_add_dct_const_round_shift_low_8(s[2], s[6]);
+ x[3] = highbd_add_dct_const_round_shift_low_8(s[3], s[7]);
+ x[4] = highbd_sub_dct_const_round_shift_low_8(s[0], s[4]);
+ x[5] = highbd_sub_dct_const_round_shift_low_8(s[1], s[5]);
+ x[6] = highbd_sub_dct_const_round_shift_low_8(s[2], s[6]);
+ x[7] = highbd_sub_dct_const_round_shift_low_8(s[3], s[7]);
+
+ // stage 2
+ t[0] = x[0];
+ t[1] = x[1];
+ t[2] = x[2];
+ t[3] = x[3];
+ highbd_iadst_butterfly_lane_0_1_neon(x[4], x[5], vget_high_s32(c2), s[4],
+ s[5]);
+ highbd_iadst_butterfly_lane_1_0_neon(x[7], x[6], vget_high_s32(c2), s[7],
+ s[6]);
+
+ x[0] = vaddq_s32(t[0], t[2]);
+ x[1] = vaddq_s32(t[1], t[3]);
+ x[2] = vsubq_s32(t[0], t[2]);
+ x[3] = vsubq_s32(t[1], t[3]);
+ x[4] = highbd_add_dct_const_round_shift_low_8(s[4], s[6]);
+ x[5] = highbd_add_dct_const_round_shift_low_8(s[5], s[7]);
+ x[6] = highbd_sub_dct_const_round_shift_low_8(s[4], s[6]);
+ x[7] = highbd_sub_dct_const_round_shift_low_8(s[5], s[7]);
+
+ // stage 3
+ highbd_iadst_half_butterfly_neon(x + 2, vget_low_s32(c2));
+ highbd_iadst_half_butterfly_neon(x + 6, vget_low_s32(c2));
+
+ *io0 = x[0];
+ *io1 = vnegq_s32(x[4]);
+ *io2 = x[6];
+ *io3 = vnegq_s32(x[2]);
+ *io4 = x[3];
+ *io5 = vnegq_s32(x[7]);
+ *io6 = x[5];
+ *io7 = vnegq_s32(x[1]);
+}
+
+void vp9_highbd_iht8x8_64_add_neon(const tran_low_t *input, uint16_t *dest,
+ int stride, int tx_type, int bd) {
+ int32x4_t a[16];
+ int16x8_t c[8];
+
+ a[0] = vld1q_s32(input);
+ a[1] = vld1q_s32(input + 4);
+ a[2] = vld1q_s32(input + 8);
+ a[3] = vld1q_s32(input + 12);
+ a[4] = vld1q_s32(input + 16);
+ a[5] = vld1q_s32(input + 20);
+ a[6] = vld1q_s32(input + 24);
+ a[7] = vld1q_s32(input + 28);
+ a[8] = vld1q_s32(input + 32);
+ a[9] = vld1q_s32(input + 36);
+ a[10] = vld1q_s32(input + 40);
+ a[11] = vld1q_s32(input + 44);
+ a[12] = vld1q_s32(input + 48);
+ a[13] = vld1q_s32(input + 52);
+ a[14] = vld1q_s32(input + 56);
+ a[15] = vld1q_s32(input + 60);
+
+ if (bd == 8) {
+ c[0] = vcombine_s16(vmovn_s32(a[0]), vmovn_s32(a[1]));
+ c[1] = vcombine_s16(vmovn_s32(a[2]), vmovn_s32(a[3]));
+ c[2] = vcombine_s16(vmovn_s32(a[4]), vmovn_s32(a[5]));
+ c[3] = vcombine_s16(vmovn_s32(a[6]), vmovn_s32(a[7]));
+ c[4] = vcombine_s16(vmovn_s32(a[8]), vmovn_s32(a[9]));
+ c[5] = vcombine_s16(vmovn_s32(a[10]), vmovn_s32(a[11]));
+ c[6] = vcombine_s16(vmovn_s32(a[12]), vmovn_s32(a[13]));
+ c[7] = vcombine_s16(vmovn_s32(a[14]), vmovn_s32(a[15]));
+
+ switch (tx_type) {
+ case DCT_DCT: {
+ const int16x8_t cospis = vld1q_s16(kCospi);
+ const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
+ const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28
+
+ idct8x8_64_1d_bd8(cospis0, cospis1, c);
+ idct8x8_64_1d_bd8(cospis0, cospis1, c);
+ break;
+ }
+
+ case ADST_DCT: {
+ const int16x8_t cospis = vld1q_s16(kCospi);
+ const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
+ const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28
+
+ idct8x8_64_1d_bd8(cospis0, cospis1, c);
+ transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6],
+ &c[7]);
+ iadst8(c);
+ break;
+ }
+
+ case DCT_ADST: {
+ const int16x8_t cospis = vld1q_s16(kCospi);
+ const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
+ const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28
+
+ transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6],
+ &c[7]);
+ iadst8(c);
+ idct8x8_64_1d_bd8(cospis0, cospis1, c);
+ break;
+ }
+
+ default: {
+ transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6],
+ &c[7]);
+ iadst8(c);
+ transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6],
+ &c[7]);
+ iadst8(c);
+ break;
+ }
+ }
+
+ c[0] = vrshrq_n_s16(c[0], 5);
+ c[1] = vrshrq_n_s16(c[1], 5);
+ c[2] = vrshrq_n_s16(c[2], 5);
+ c[3] = vrshrq_n_s16(c[3], 5);
+ c[4] = vrshrq_n_s16(c[4], 5);
+ c[5] = vrshrq_n_s16(c[5], 5);
+ c[6] = vrshrq_n_s16(c[6], 5);
+ c[7] = vrshrq_n_s16(c[7], 5);
+ } else {
+ switch (tx_type) {
+ case DCT_DCT: {
+ const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24
+ const int32x4_t cospis1 =
+ vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28
+
+ if (bd == 10) {
+ idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_64_half1d_bd10(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11],
+ &a[12], &a[13], &a[14], &a[15]);
+ idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9],
+ &a[2], &a[10], &a[3], &a[11]);
+ idct8x8_64_half1d_bd10(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13],
+ &a[6], &a[14], &a[7], &a[15]);
+ } else {
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11],
+ &a[12], &a[13], &a[14], &a[15]);
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9],
+ &a[2], &a[10], &a[3], &a[11]);
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13],
+ &a[6], &a[14], &a[7], &a[15]);
+ }
+ break;
+ }
+
+ case ADST_DCT: {
+ const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24
+ const int32x4_t cospis1 =
+ vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28
+
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3],
+ &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11],
+ &a[12], &a[13], &a[14], &a[15]);
+ transpose_s32_8x4(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3],
+ &a[11]);
+ highbd_iadst8(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]);
+ transpose_s32_8x4(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7],
+ &a[15]);
+ highbd_iadst8(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7],
+ &a[15]);
+ break;
+ }
+
+ case DCT_ADST: {
+ const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24
+ const int32x4_t cospis1 =
+ vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28
+
+ transpose_s32_8x4(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
+ &a[7]);
+ highbd_iadst8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+ transpose_s32_8x4(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14],
+ &a[15]);
+ highbd_iadst8(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14],
+ &a[15]);
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9],
+ &a[2], &a[10], &a[3], &a[11]);
+ idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13],
+ &a[6], &a[14], &a[7], &a[15]);
+ break;
+ }
+
+ default: {
+ assert(tx_type == ADST_ADST);
+ transpose_s32_8x4(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
+ &a[7]);
+ highbd_iadst8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+ transpose_s32_8x4(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14],
+ &a[15]);
+ highbd_iadst8(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14],
+ &a[15]);
+ transpose_s32_8x4(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3],
+ &a[11]);
+ highbd_iadst8(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]);
+ transpose_s32_8x4(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7],
+ &a[15]);
+ highbd_iadst8(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7],
+ &a[15]);
+ break;
+ }
+ }
+
+ c[0] = vcombine_s16(vrshrn_n_s32(a[0], 5), vrshrn_n_s32(a[4], 5));
+ c[1] = vcombine_s16(vrshrn_n_s32(a[8], 5), vrshrn_n_s32(a[12], 5));
+ c[2] = vcombine_s16(vrshrn_n_s32(a[1], 5), vrshrn_n_s32(a[5], 5));
+ c[3] = vcombine_s16(vrshrn_n_s32(a[9], 5), vrshrn_n_s32(a[13], 5));
+ c[4] = vcombine_s16(vrshrn_n_s32(a[2], 5), vrshrn_n_s32(a[6], 5));
+ c[5] = vcombine_s16(vrshrn_n_s32(a[10], 5), vrshrn_n_s32(a[14], 5));
+ c[6] = vcombine_s16(vrshrn_n_s32(a[3], 5), vrshrn_n_s32(a[7], 5));
+ c[7] = vcombine_s16(vrshrn_n_s32(a[11], 5), vrshrn_n_s32(a[15], 5));
+ }
+ highbd_add8x8(c, dest, stride, bd);
+}
diff --git a/libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c b/libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c
new file mode 100644
index 000000000..db72ff116
--- /dev/null
+++ b/libvpx/vp9/common/arm/neon/vp9_iht16x16_add_neon.c
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <arm_neon.h>
+#include <assert.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/arm/transpose_neon.h"
+
+void vpx_iadst16x16_256_add_half1d(const void *const input, int16_t *output,
+ void *const dest, const int stride,
+ const int highbd_flag) {
+ int16x8_t in[16], out[16];
+ const int16x4_t c_1_31_5_27 =
+ create_s16x4_neon(cospi_1_64, cospi_31_64, cospi_5_64, cospi_27_64);
+ const int16x4_t c_9_23_13_19 =
+ create_s16x4_neon(cospi_9_64, cospi_23_64, cospi_13_64, cospi_19_64);
+ const int16x4_t c_17_15_21_11 =
+ create_s16x4_neon(cospi_17_64, cospi_15_64, cospi_21_64, cospi_11_64);
+ const int16x4_t c_25_7_29_3 =
+ create_s16x4_neon(cospi_25_64, cospi_7_64, cospi_29_64, cospi_3_64);
+ const int16x4_t c_4_28_20_12 =
+ create_s16x4_neon(cospi_4_64, cospi_28_64, cospi_20_64, cospi_12_64);
+ const int16x4_t c_16_n16_8_24 =
+ create_s16x4_neon(cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64);
+ int16x8_t x[16], t[12];
+ int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
+ int32x4_t s8[2], s9[2], s10[2], s11[2], s12[2], s13[2], s14[2], s15[2];
+
+ // Load input (16x8)
+ if (output) {
+ const tran_low_t *inputT = (const tran_low_t *)input;
+ in[0] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[8] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[1] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[9] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[2] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[10] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[3] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[11] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[4] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[12] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[5] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[13] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[6] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[14] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[7] = load_tran_low_to_s16q(inputT);
+ inputT += 8;
+ in[15] = load_tran_low_to_s16q(inputT);
+ } else {
+ const int16_t *inputT = (const int16_t *)input;
+ in[0] = vld1q_s16(inputT);
+ inputT += 8;
+ in[8] = vld1q_s16(inputT);
+ inputT += 8;
+ in[1] = vld1q_s16(inputT);
+ inputT += 8;
+ in[9] = vld1q_s16(inputT);
+ inputT += 8;
+ in[2] = vld1q_s16(inputT);
+ inputT += 8;
+ in[10] = vld1q_s16(inputT);
+ inputT += 8;
+ in[3] = vld1q_s16(inputT);
+ inputT += 8;
+ in[11] = vld1q_s16(inputT);
+ inputT += 8;
+ in[4] = vld1q_s16(inputT);
+ inputT += 8;
+ in[12] = vld1q_s16(inputT);
+ inputT += 8;
+ in[5] = vld1q_s16(inputT);
+ inputT += 8;
+ in[13] = vld1q_s16(inputT);
+ inputT += 8;
+ in[6] = vld1q_s16(inputT);
+ inputT += 8;
+ in[14] = vld1q_s16(inputT);
+ inputT += 8;
+ in[7] = vld1q_s16(inputT);
+ inputT += 8;
+ in[15] = vld1q_s16(inputT);
+ }
+
+ // Transpose
+ transpose_s16_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6],
+ &in[7]);
+ transpose_s16_8x8(&in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14],
+ &in[15]);
+
+ x[0] = in[15];
+ x[1] = in[0];
+ x[2] = in[13];
+ x[3] = in[2];
+ x[4] = in[11];
+ x[5] = in[4];
+ x[6] = in[9];
+ x[7] = in[6];
+ x[8] = in[7];
+ x[9] = in[8];
+ x[10] = in[5];
+ x[11] = in[10];
+ x[12] = in[3];
+ x[13] = in[12];
+ x[14] = in[1];
+ x[15] = in[14];
+
+ // stage 1
+ iadst_butterfly_lane_0_1_neon(x[0], x[1], c_1_31_5_27, s0, s1);
+ iadst_butterfly_lane_2_3_neon(x[2], x[3], c_1_31_5_27, s2, s3);
+ iadst_butterfly_lane_0_1_neon(x[4], x[5], c_9_23_13_19, s4, s5);
+ iadst_butterfly_lane_2_3_neon(x[6], x[7], c_9_23_13_19, s6, s7);
+ iadst_butterfly_lane_0_1_neon(x[8], x[9], c_17_15_21_11, s8, s9);
+ iadst_butterfly_lane_2_3_neon(x[10], x[11], c_17_15_21_11, s10, s11);
+ iadst_butterfly_lane_0_1_neon(x[12], x[13], c_25_7_29_3, s12, s13);
+ iadst_butterfly_lane_2_3_neon(x[14], x[15], c_25_7_29_3, s14, s15);
+
+ x[0] = add_dct_const_round_shift_low_8(s0, s8);
+ x[1] = add_dct_const_round_shift_low_8(s1, s9);
+ x[2] = add_dct_const_round_shift_low_8(s2, s10);
+ x[3] = add_dct_const_round_shift_low_8(s3, s11);
+ x[4] = add_dct_const_round_shift_low_8(s4, s12);
+ x[5] = add_dct_const_round_shift_low_8(s5, s13);
+ x[6] = add_dct_const_round_shift_low_8(s6, s14);
+ x[7] = add_dct_const_round_shift_low_8(s7, s15);
+ x[8] = sub_dct_const_round_shift_low_8(s0, s8);
+ x[9] = sub_dct_const_round_shift_low_8(s1, s9);
+ x[10] = sub_dct_const_round_shift_low_8(s2, s10);
+ x[11] = sub_dct_const_round_shift_low_8(s3, s11);
+ x[12] = sub_dct_const_round_shift_low_8(s4, s12);
+ x[13] = sub_dct_const_round_shift_low_8(s5, s13);
+ x[14] = sub_dct_const_round_shift_low_8(s6, s14);
+ x[15] = sub_dct_const_round_shift_low_8(s7, s15);
+
+ // stage 2
+ t[0] = x[0];
+ t[1] = x[1];
+ t[2] = x[2];
+ t[3] = x[3];
+ t[4] = x[4];
+ t[5] = x[5];
+ t[6] = x[6];
+ t[7] = x[7];
+ iadst_butterfly_lane_0_1_neon(x[8], x[9], c_4_28_20_12, s8, s9);
+ iadst_butterfly_lane_2_3_neon(x[10], x[11], c_4_28_20_12, s10, s11);
+ iadst_butterfly_lane_1_0_neon(x[13], x[12], c_4_28_20_12, s13, s12);
+ iadst_butterfly_lane_3_2_neon(x[15], x[14], c_4_28_20_12, s15, s14);
+
+ x[0] = vaddq_s16(t[0], t[4]);
+ x[1] = vaddq_s16(t[1], t[5]);
+ x[2] = vaddq_s16(t[2], t[6]);
+ x[3] = vaddq_s16(t[3], t[7]);
+ x[4] = vsubq_s16(t[0], t[4]);
+ x[5] = vsubq_s16(t[1], t[5]);
+ x[6] = vsubq_s16(t[2], t[6]);
+ x[7] = vsubq_s16(t[3], t[7]);
+ x[8] = add_dct_const_round_shift_low_8(s8, s12);
+ x[9] = add_dct_const_round_shift_low_8(s9, s13);
+ x[10] = add_dct_const_round_shift_low_8(s10, s14);
+ x[11] = add_dct_const_round_shift_low_8(s11, s15);
+ x[12] = sub_dct_const_round_shift_low_8(s8, s12);
+ x[13] = sub_dct_const_round_shift_low_8(s9, s13);
+ x[14] = sub_dct_const_round_shift_low_8(s10, s14);
+ x[15] = sub_dct_const_round_shift_low_8(s11, s15);
+
+ // stage 3
+ t[0] = x[0];
+ t[1] = x[1];
+ t[2] = x[2];
+ t[3] = x[3];
+ iadst_butterfly_lane_2_3_neon(x[4], x[5], c_16_n16_8_24, s4, s5);
+ iadst_butterfly_lane_3_2_neon(x[7], x[6], c_16_n16_8_24, s7, s6);
+ t[8] = x[8];
+ t[9] = x[9];
+ t[10] = x[10];
+ t[11] = x[11];
+ iadst_butterfly_lane_2_3_neon(x[12], x[13], c_16_n16_8_24, s12, s13);
+ iadst_butterfly_lane_3_2_neon(x[15], x[14], c_16_n16_8_24, s15, s14);
+
+ x[0] = vaddq_s16(t[0], t[2]);
+ x[1] = vaddq_s16(t[1], t[3]);
+ x[2] = vsubq_s16(t[0], t[2]);
+ x[3] = vsubq_s16(t[1], t[3]);
+ x[4] = add_dct_const_round_shift_low_8(s4, s6);
+ x[5] = add_dct_const_round_shift_low_8(s5, s7);
+ x[6] = sub_dct_const_round_shift_low_8(s4, s6);
+ x[7] = sub_dct_const_round_shift_low_8(s5, s7);
+ x[8] = vaddq_s16(t[8], t[10]);
+ x[9] = vaddq_s16(t[9], t[11]);
+ x[10] = vsubq_s16(t[8], t[10]);
+ x[11] = vsubq_s16(t[9], t[11]);
+ x[12] = add_dct_const_round_shift_low_8(s12, s14);
+ x[13] = add_dct_const_round_shift_low_8(s13, s15);
+ x[14] = sub_dct_const_round_shift_low_8(s12, s14);
+ x[15] = sub_dct_const_round_shift_low_8(s13, s15);
+
+ // stage 4
+ iadst_half_butterfly_neg_neon(&x[3], &x[2], c_16_n16_8_24);
+ iadst_half_butterfly_pos_neon(&x[7], &x[6], c_16_n16_8_24);
+ iadst_half_butterfly_pos_neon(&x[11], &x[10], c_16_n16_8_24);
+ iadst_half_butterfly_neg_neon(&x[15], &x[14], c_16_n16_8_24);
+
+ out[0] = x[0];
+ out[1] = vnegq_s16(x[8]);
+ out[2] = x[12];
+ out[3] = vnegq_s16(x[4]);
+ out[4] = x[6];
+ out[5] = x[14];
+ out[6] = x[10];
+ out[7] = x[2];
+ out[8] = x[3];
+ out[9] = x[11];
+ out[10] = x[15];
+ out[11] = x[7];
+ out[12] = x[5];
+ out[13] = vnegq_s16(x[13]);
+ out[14] = x[9];
+ out[15] = vnegq_s16(x[1]);
+
+ if (output) {
+ idct16x16_store_pass1(out, output);
+ } else {
+ if (highbd_flag) {
+ idct16x16_add_store_bd8(out, dest, stride);
+ } else {
+ idct16x16_add_store(out, dest, stride);
+ }
+ }
+}
+
+void vp9_iht16x16_256_add_neon(const tran_low_t *input, uint8_t *dest,
+ int stride, int tx_type) {
+ static const iht_2d IHT_16[] = {
+ { vpx_idct16x16_256_add_half1d,
+ vpx_idct16x16_256_add_half1d }, // DCT_DCT = 0
+ { vpx_iadst16x16_256_add_half1d,
+ vpx_idct16x16_256_add_half1d }, // ADST_DCT = 1
+ { vpx_idct16x16_256_add_half1d,
+ vpx_iadst16x16_256_add_half1d }, // DCT_ADST = 2
+ { vpx_iadst16x16_256_add_half1d,
+ vpx_iadst16x16_256_add_half1d } // ADST_ADST = 3
+ };
+ const iht_2d ht = IHT_16[tx_type];
+ int16_t row_output[16 * 16];
+
+ // pass 1
+ ht.rows(input, row_output, dest, stride, 0); // upper 8 rows
+ ht.rows(input + 8 * 16, row_output + 8, dest, stride, 0); // lower 8 rows
+
+ // pass 2
+ ht.cols(row_output, NULL, dest, stride, 0); // left 8 columns
+ ht.cols(row_output + 16 * 8, NULL, dest + 8, stride, 0); // right 8 columns
+}
diff --git a/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c b/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c
index 025254c3f..4f0a90f21 100644
--- a/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c
+++ b/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c
@@ -14,206 +14,63 @@
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/txfm_common.h"
-static INLINE void TRANSPOSE4X4(int16x8_t *q8s16, int16x8_t *q9s16) {
- int32x4_t q8s32, q9s32;
- int16x4x2_t d0x2s16, d1x2s16;
- int32x4x2_t q0x2s32;
-
- d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16));
- d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16));
-
- q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1]));
- q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1]));
- q0x2s32 = vtrnq_s32(q8s32, q9s32);
-
- *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]);
- *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]);
-}
-
-static INLINE void GENERATE_COSINE_CONSTANTS(int16x4_t *d0s16, int16x4_t *d1s16,
- int16x4_t *d2s16) {
- *d0s16 = vdup_n_s16(cospi_8_64);
- *d1s16 = vdup_n_s16(cospi_16_64);
- *d2s16 = vdup_n_s16(cospi_24_64);
-}
-
-static INLINE void GENERATE_SINE_CONSTANTS(int16x4_t *d3s16, int16x4_t *d4s16,
- int16x4_t *d5s16, int16x8_t *q3s16) {
- *d3s16 = vdup_n_s16(sinpi_1_9);
- *d4s16 = vdup_n_s16(sinpi_2_9);
- *q3s16 = vdupq_n_s16(sinpi_3_9);
- *d5s16 = vdup_n_s16(sinpi_4_9);
-}
-
-static INLINE void IDCT4x4_1D(int16x4_t *d0s16, int16x4_t *d1s16,
- int16x4_t *d2s16, int16x8_t *q8s16,
- int16x8_t *q9s16) {
- int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16;
- int16x4_t d26s16, d27s16, d28s16, d29s16;
- int32x4_t q10s32, q13s32, q14s32, q15s32;
- int16x8_t q13s16, q14s16;
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
-
- d23s16 = vadd_s16(d16s16, d18s16);
- d24s16 = vsub_s16(d16s16, d18s16);
-
- q15s32 = vmull_s16(d17s16, *d2s16);
- q10s32 = vmull_s16(d17s16, *d0s16);
- q13s32 = vmull_s16(d23s16, *d1s16);
- q14s32 = vmull_s16(d24s16, *d1s16);
- q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16);
- q10s32 = vmlal_s16(q10s32, d19s16, *d2s16);
-
- d26s16 = vrshrn_n_s32(q13s32, 14);
- d27s16 = vrshrn_n_s32(q14s32, 14);
- d29s16 = vrshrn_n_s32(q15s32, 14);
- d28s16 = vrshrn_n_s32(q10s32, 14);
-
- q13s16 = vcombine_s16(d26s16, d27s16);
- q14s16 = vcombine_s16(d28s16, d29s16);
- *q8s16 = vaddq_s16(q13s16, q14s16);
- *q9s16 = vsubq_s16(q13s16, q14s16);
- *q9s16 = vcombine_s16(vget_high_s16(*q9s16), vget_low_s16(*q9s16)); // vswp
-}
-
-static INLINE void IADST4x4_1D(int16x4_t *d3s16, int16x4_t *d4s16,
- int16x4_t *d5s16, int16x8_t *q3s16,
- int16x8_t *q8s16, int16x8_t *q9s16) {
- int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16;
- int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
-
- d6s16 = vget_low_s16(*q3s16);
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
-
- q10s32 = vmull_s16(*d3s16, d16s16);
- q11s32 = vmull_s16(*d4s16, d16s16);
- q12s32 = vmull_s16(d6s16, d17s16);
- q13s32 = vmull_s16(*d5s16, d18s16);
- q14s32 = vmull_s16(*d3s16, d18s16);
- q15s32 = vmovl_s16(d16s16);
- q15s32 = vaddw_s16(q15s32, d19s16);
- q8s32 = vmull_s16(*d4s16, d19s16);
- q15s32 = vsubw_s16(q15s32, d18s16);
- q9s32 = vmull_s16(*d5s16, d19s16);
-
- q10s32 = vaddq_s32(q10s32, q13s32);
- q10s32 = vaddq_s32(q10s32, q8s32);
- q11s32 = vsubq_s32(q11s32, q14s32);
- q8s32 = vdupq_n_s32(sinpi_3_9);
- q11s32 = vsubq_s32(q11s32, q9s32);
- q15s32 = vmulq_s32(q15s32, q8s32);
-
- q13s32 = vaddq_s32(q10s32, q12s32);
- q10s32 = vaddq_s32(q10s32, q11s32);
- q14s32 = vaddq_s32(q11s32, q12s32);
- q10s32 = vsubq_s32(q10s32, q12s32);
-
- d16s16 = vrshrn_n_s32(q13s32, 14);
- d17s16 = vrshrn_n_s32(q14s32, 14);
- d18s16 = vrshrn_n_s32(q15s32, 14);
- d19s16 = vrshrn_n_s32(q10s32, 14);
-
- *q8s16 = vcombine_s16(d16s16, d17s16);
- *q9s16 = vcombine_s16(d18s16, d19s16);
-}
-
void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
- uint8x8_t d26u8, d27u8;
- int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16;
- uint32x2_t d26u32, d27u32;
- int16x8_t q3s16, q8s16, q9s16;
- uint16x8_t q8u16, q9u16;
-
- d26u32 = d27u32 = vdup_n_u32(0);
+ int16x8_t a[2];
+ uint8x8_t s[2], d[2];
+ uint16x8_t sum[2];
- q8s16 = vld1q_s16(input);
- q9s16 = vld1q_s16(input + 8);
+ assert(!((intptr_t)dest % sizeof(uint32_t)));
+ assert(!(stride % sizeof(uint32_t)));
- TRANSPOSE4X4(&q8s16, &q9s16);
+ a[0] = load_tran_low_to_s16q(input);
+ a[1] = load_tran_low_to_s16q(input + 8);
+ transpose_s16_4x4q(&a[0], &a[1]);
switch (tx_type) {
- case 0: // idct_idct is not supported. Fall back to C
- vp9_iht4x4_16_add_c(input, dest, stride, tx_type);
- return;
- case 1: // iadst_idct
- // generate constants
- GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
- GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
-
- // first transform rows
- IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
-
- // transpose the matrix
- TRANSPOSE4X4(&q8s16, &q9s16);
-
- // then transform columns
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+ case DCT_DCT:
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ transpose_s16_4x4q(&a[0], &a[1]);
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
- case 2: // idct_iadst
- // generate constantsyy
- GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16);
- GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
- // first transform rows
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
-
- // transpose the matrix
- TRANSPOSE4X4(&q8s16, &q9s16);
-
- // then transform columns
- IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16);
+ case ADST_DCT:
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
+ transpose_s16_4x4q(&a[0], &a[1]);
+ iadst4(a);
break;
- case 3: // iadst_iadst
- // generate constants
- GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16);
-
- // first transform rows
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
- // transpose the matrix
- TRANSPOSE4X4(&q8s16, &q9s16);
-
- // then transform columns
- IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16);
+ case DCT_ADST:
+ iadst4(a);
+ transpose_s16_4x4q(&a[0], &a[1]);
+ idct4x4_16_kernel_bd8(a);
+ a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1]));
break;
- default: // iadst_idct
- assert(0);
+
+ default:
+ assert(tx_type == ADST_ADST);
+ iadst4(a);
+ transpose_s16_4x4q(&a[0], &a[1]);
+ iadst4(a);
break;
}
- q8s16 = vrshrq_n_s16(q8s16, 4);
- q9s16 = vrshrq_n_s16(q9s16, 4);
-
- d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0);
- dest += stride;
- d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1);
- dest += stride;
- d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0);
- dest += stride;
- d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1);
-
- q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32));
- q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32));
-
- d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
- d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
-
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1);
- dest -= stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0);
- dest -= stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1);
- dest -= stride;
- vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0);
+ a[0] = vrshrq_n_s16(a[0], 4);
+ a[1] = vrshrq_n_s16(a[1], 4);
+ s[0] = load_u8(dest, stride);
+ s[1] = load_u8(dest + 2 * stride, stride);
+ sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s[0]);
+ sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), s[1]);
+ d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0]));
+ d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1]));
+ store_u8(dest, stride, d[0]);
+ store_u8(dest + 2 * stride, stride, d[1]);
}
diff --git a/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c b/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c
index 1c739861c..46ee632e0 100644
--- a/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c
+++ b/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c
@@ -14,527 +14,55 @@
#include "./vp9_rtcd.h"
#include "./vpx_config.h"
#include "vp9/common/vp9_common.h"
+#include "vp9/common/arm/neon/vp9_iht_neon.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
-static int16_t cospi_2_64 = 16305;
-static int16_t cospi_4_64 = 16069;
-static int16_t cospi_6_64 = 15679;
-static int16_t cospi_8_64 = 15137;
-static int16_t cospi_10_64 = 14449;
-static int16_t cospi_12_64 = 13623;
-static int16_t cospi_14_64 = 12665;
-static int16_t cospi_16_64 = 11585;
-static int16_t cospi_18_64 = 10394;
-static int16_t cospi_20_64 = 9102;
-static int16_t cospi_22_64 = 7723;
-static int16_t cospi_24_64 = 6270;
-static int16_t cospi_26_64 = 4756;
-static int16_t cospi_28_64 = 3196;
-static int16_t cospi_30_64 = 1606;
-
-static INLINE void IDCT8x8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
- int16x8_t *q10s16, int16x8_t *q11s16,
- int16x8_t *q12s16, int16x8_t *q13s16,
- int16x8_t *q14s16, int16x8_t *q15s16) {
- int16x4_t d0s16, d1s16, d2s16, d3s16;
- int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
- int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
- int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
- int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16;
- int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32;
- int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32;
-
- d0s16 = vdup_n_s16(cospi_28_64);
- d1s16 = vdup_n_s16(cospi_4_64);
- d2s16 = vdup_n_s16(cospi_12_64);
- d3s16 = vdup_n_s16(cospi_20_64);
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
- d20s16 = vget_low_s16(*q10s16);
- d21s16 = vget_high_s16(*q10s16);
- d22s16 = vget_low_s16(*q11s16);
- d23s16 = vget_high_s16(*q11s16);
- d24s16 = vget_low_s16(*q12s16);
- d25s16 = vget_high_s16(*q12s16);
- d26s16 = vget_low_s16(*q13s16);
- d27s16 = vget_high_s16(*q13s16);
- d28s16 = vget_low_s16(*q14s16);
- d29s16 = vget_high_s16(*q14s16);
- d30s16 = vget_low_s16(*q15s16);
- d31s16 = vget_high_s16(*q15s16);
-
- q2s32 = vmull_s16(d18s16, d0s16);
- q3s32 = vmull_s16(d19s16, d0s16);
- q5s32 = vmull_s16(d26s16, d2s16);
- q6s32 = vmull_s16(d27s16, d2s16);
-
- q2s32 = vmlsl_s16(q2s32, d30s16, d1s16);
- q3s32 = vmlsl_s16(q3s32, d31s16, d1s16);
- q5s32 = vmlsl_s16(q5s32, d22s16, d3s16);
- q6s32 = vmlsl_s16(q6s32, d23s16, d3s16);
-
- d8s16 = vrshrn_n_s32(q2s32, 14);
- d9s16 = vrshrn_n_s32(q3s32, 14);
- d10s16 = vrshrn_n_s32(q5s32, 14);
- d11s16 = vrshrn_n_s32(q6s32, 14);
- q4s16 = vcombine_s16(d8s16, d9s16);
- q5s16 = vcombine_s16(d10s16, d11s16);
-
- q2s32 = vmull_s16(d18s16, d1s16);
- q3s32 = vmull_s16(d19s16, d1s16);
- q9s32 = vmull_s16(d26s16, d3s16);
- q13s32 = vmull_s16(d27s16, d3s16);
-
- q2s32 = vmlal_s16(q2s32, d30s16, d0s16);
- q3s32 = vmlal_s16(q3s32, d31s16, d0s16);
- q9s32 = vmlal_s16(q9s32, d22s16, d2s16);
- q13s32 = vmlal_s16(q13s32, d23s16, d2s16);
-
- d14s16 = vrshrn_n_s32(q2s32, 14);
- d15s16 = vrshrn_n_s32(q3s32, 14);
- d12s16 = vrshrn_n_s32(q9s32, 14);
- d13s16 = vrshrn_n_s32(q13s32, 14);
- q6s16 = vcombine_s16(d12s16, d13s16);
- q7s16 = vcombine_s16(d14s16, d15s16);
-
- d0s16 = vdup_n_s16(cospi_16_64);
-
- q2s32 = vmull_s16(d16s16, d0s16);
- q3s32 = vmull_s16(d17s16, d0s16);
- q13s32 = vmull_s16(d16s16, d0s16);
- q15s32 = vmull_s16(d17s16, d0s16);
-
- q2s32 = vmlal_s16(q2s32, d24s16, d0s16);
- q3s32 = vmlal_s16(q3s32, d25s16, d0s16);
- q13s32 = vmlsl_s16(q13s32, d24s16, d0s16);
- q15s32 = vmlsl_s16(q15s32, d25s16, d0s16);
-
- d0s16 = vdup_n_s16(cospi_24_64);
- d1s16 = vdup_n_s16(cospi_8_64);
-
- d18s16 = vrshrn_n_s32(q2s32, 14);
- d19s16 = vrshrn_n_s32(q3s32, 14);
- d22s16 = vrshrn_n_s32(q13s32, 14);
- d23s16 = vrshrn_n_s32(q15s32, 14);
- *q9s16 = vcombine_s16(d18s16, d19s16);
- *q11s16 = vcombine_s16(d22s16, d23s16);
-
- q2s32 = vmull_s16(d20s16, d0s16);
- q3s32 = vmull_s16(d21s16, d0s16);
- q8s32 = vmull_s16(d20s16, d1s16);
- q12s32 = vmull_s16(d21s16, d1s16);
-
- q2s32 = vmlsl_s16(q2s32, d28s16, d1s16);
- q3s32 = vmlsl_s16(q3s32, d29s16, d1s16);
- q8s32 = vmlal_s16(q8s32, d28s16, d0s16);
- q12s32 = vmlal_s16(q12s32, d29s16, d0s16);
-
- d26s16 = vrshrn_n_s32(q2s32, 14);
- d27s16 = vrshrn_n_s32(q3s32, 14);
- d30s16 = vrshrn_n_s32(q8s32, 14);
- d31s16 = vrshrn_n_s32(q12s32, 14);
- *q13s16 = vcombine_s16(d26s16, d27s16);
- *q15s16 = vcombine_s16(d30s16, d31s16);
-
- q0s16 = vaddq_s16(*q9s16, *q15s16);
- q1s16 = vaddq_s16(*q11s16, *q13s16);
- q2s16 = vsubq_s16(*q11s16, *q13s16);
- q3s16 = vsubq_s16(*q9s16, *q15s16);
-
- *q13s16 = vsubq_s16(q4s16, q5s16);
- q4s16 = vaddq_s16(q4s16, q5s16);
- *q14s16 = vsubq_s16(q7s16, q6s16);
- q7s16 = vaddq_s16(q7s16, q6s16);
- d26s16 = vget_low_s16(*q13s16);
- d27s16 = vget_high_s16(*q13s16);
- d28s16 = vget_low_s16(*q14s16);
- d29s16 = vget_high_s16(*q14s16);
-
- d16s16 = vdup_n_s16(cospi_16_64);
-
- q9s32 = vmull_s16(d28s16, d16s16);
- q10s32 = vmull_s16(d29s16, d16s16);
- q11s32 = vmull_s16(d28s16, d16s16);
- q12s32 = vmull_s16(d29s16, d16s16);
-
- q9s32 = vmlsl_s16(q9s32, d26s16, d16s16);
- q10s32 = vmlsl_s16(q10s32, d27s16, d16s16);
- q11s32 = vmlal_s16(q11s32, d26s16, d16s16);
- q12s32 = vmlal_s16(q12s32, d27s16, d16s16);
-
- d10s16 = vrshrn_n_s32(q9s32, 14);
- d11s16 = vrshrn_n_s32(q10s32, 14);
- d12s16 = vrshrn_n_s32(q11s32, 14);
- d13s16 = vrshrn_n_s32(q12s32, 14);
- q5s16 = vcombine_s16(d10s16, d11s16);
- q6s16 = vcombine_s16(d12s16, d13s16);
-
- *q8s16 = vaddq_s16(q0s16, q7s16);
- *q9s16 = vaddq_s16(q1s16, q6s16);
- *q10s16 = vaddq_s16(q2s16, q5s16);
- *q11s16 = vaddq_s16(q3s16, q4s16);
- *q12s16 = vsubq_s16(q3s16, q4s16);
- *q13s16 = vsubq_s16(q2s16, q5s16);
- *q14s16 = vsubq_s16(q1s16, q6s16);
- *q15s16 = vsubq_s16(q0s16, q7s16);
-}
-
-static INLINE void IADST8X8_1D(int16x8_t *q8s16, int16x8_t *q9s16,
- int16x8_t *q10s16, int16x8_t *q11s16,
- int16x8_t *q12s16, int16x8_t *q13s16,
- int16x8_t *q14s16, int16x8_t *q15s16) {
- int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16;
- int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16;
- int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16;
- int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16;
- int16x8_t q2s16, q4s16, q5s16, q6s16;
- int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32;
- int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32;
-
- d16s16 = vget_low_s16(*q8s16);
- d17s16 = vget_high_s16(*q8s16);
- d18s16 = vget_low_s16(*q9s16);
- d19s16 = vget_high_s16(*q9s16);
- d20s16 = vget_low_s16(*q10s16);
- d21s16 = vget_high_s16(*q10s16);
- d22s16 = vget_low_s16(*q11s16);
- d23s16 = vget_high_s16(*q11s16);
- d24s16 = vget_low_s16(*q12s16);
- d25s16 = vget_high_s16(*q12s16);
- d26s16 = vget_low_s16(*q13s16);
- d27s16 = vget_high_s16(*q13s16);
- d28s16 = vget_low_s16(*q14s16);
- d29s16 = vget_high_s16(*q14s16);
- d30s16 = vget_low_s16(*q15s16);
- d31s16 = vget_high_s16(*q15s16);
-
- d14s16 = vdup_n_s16(cospi_2_64);
- d15s16 = vdup_n_s16(cospi_30_64);
-
- q1s32 = vmull_s16(d30s16, d14s16);
- q2s32 = vmull_s16(d31s16, d14s16);
- q3s32 = vmull_s16(d30s16, d15s16);
- q4s32 = vmull_s16(d31s16, d15s16);
-
- d30s16 = vdup_n_s16(cospi_18_64);
- d31s16 = vdup_n_s16(cospi_14_64);
-
- q1s32 = vmlal_s16(q1s32, d16s16, d15s16);
- q2s32 = vmlal_s16(q2s32, d17s16, d15s16);
- q3s32 = vmlsl_s16(q3s32, d16s16, d14s16);
- q4s32 = vmlsl_s16(q4s32, d17s16, d14s16);
-
- q5s32 = vmull_s16(d22s16, d30s16);
- q6s32 = vmull_s16(d23s16, d30s16);
- q7s32 = vmull_s16(d22s16, d31s16);
- q8s32 = vmull_s16(d23s16, d31s16);
-
- q5s32 = vmlal_s16(q5s32, d24s16, d31s16);
- q6s32 = vmlal_s16(q6s32, d25s16, d31s16);
- q7s32 = vmlsl_s16(q7s32, d24s16, d30s16);
- q8s32 = vmlsl_s16(q8s32, d25s16, d30s16);
-
- q11s32 = vaddq_s32(q1s32, q5s32);
- q12s32 = vaddq_s32(q2s32, q6s32);
- q1s32 = vsubq_s32(q1s32, q5s32);
- q2s32 = vsubq_s32(q2s32, q6s32);
-
- d22s16 = vrshrn_n_s32(q11s32, 14);
- d23s16 = vrshrn_n_s32(q12s32, 14);
- *q11s16 = vcombine_s16(d22s16, d23s16);
-
- q12s32 = vaddq_s32(q3s32, q7s32);
- q15s32 = vaddq_s32(q4s32, q8s32);
- q3s32 = vsubq_s32(q3s32, q7s32);
- q4s32 = vsubq_s32(q4s32, q8s32);
-
- d2s16 = vrshrn_n_s32(q1s32, 14);
- d3s16 = vrshrn_n_s32(q2s32, 14);
- d24s16 = vrshrn_n_s32(q12s32, 14);
- d25s16 = vrshrn_n_s32(q15s32, 14);
- d6s16 = vrshrn_n_s32(q3s32, 14);
- d7s16 = vrshrn_n_s32(q4s32, 14);
- *q12s16 = vcombine_s16(d24s16, d25s16);
-
- d0s16 = vdup_n_s16(cospi_10_64);
- d1s16 = vdup_n_s16(cospi_22_64);
- q4s32 = vmull_s16(d26s16, d0s16);
- q5s32 = vmull_s16(d27s16, d0s16);
- q2s32 = vmull_s16(d26s16, d1s16);
- q6s32 = vmull_s16(d27s16, d1s16);
-
- d30s16 = vdup_n_s16(cospi_26_64);
- d31s16 = vdup_n_s16(cospi_6_64);
-
- q4s32 = vmlal_s16(q4s32, d20s16, d1s16);
- q5s32 = vmlal_s16(q5s32, d21s16, d1s16);
- q2s32 = vmlsl_s16(q2s32, d20s16, d0s16);
- q6s32 = vmlsl_s16(q6s32, d21s16, d0s16);
-
- q0s32 = vmull_s16(d18s16, d30s16);
- q13s32 = vmull_s16(d19s16, d30s16);
-
- q0s32 = vmlal_s16(q0s32, d28s16, d31s16);
- q13s32 = vmlal_s16(q13s32, d29s16, d31s16);
-
- q10s32 = vmull_s16(d18s16, d31s16);
- q9s32 = vmull_s16(d19s16, d31s16);
-
- q10s32 = vmlsl_s16(q10s32, d28s16, d30s16);
- q9s32 = vmlsl_s16(q9s32, d29s16, d30s16);
-
- q14s32 = vaddq_s32(q2s32, q10s32);
- q15s32 = vaddq_s32(q6s32, q9s32);
- q2s32 = vsubq_s32(q2s32, q10s32);
- q6s32 = vsubq_s32(q6s32, q9s32);
-
- d28s16 = vrshrn_n_s32(q14s32, 14);
- d29s16 = vrshrn_n_s32(q15s32, 14);
- d4s16 = vrshrn_n_s32(q2s32, 14);
- d5s16 = vrshrn_n_s32(q6s32, 14);
- *q14s16 = vcombine_s16(d28s16, d29s16);
-
- q9s32 = vaddq_s32(q4s32, q0s32);
- q10s32 = vaddq_s32(q5s32, q13s32);
- q4s32 = vsubq_s32(q4s32, q0s32);
- q5s32 = vsubq_s32(q5s32, q13s32);
-
- d30s16 = vdup_n_s16(cospi_8_64);
- d31s16 = vdup_n_s16(cospi_24_64);
-
- d18s16 = vrshrn_n_s32(q9s32, 14);
- d19s16 = vrshrn_n_s32(q10s32, 14);
- d8s16 = vrshrn_n_s32(q4s32, 14);
- d9s16 = vrshrn_n_s32(q5s32, 14);
- *q9s16 = vcombine_s16(d18s16, d19s16);
-
- q5s32 = vmull_s16(d2s16, d30s16);
- q6s32 = vmull_s16(d3s16, d30s16);
- q7s32 = vmull_s16(d2s16, d31s16);
- q0s32 = vmull_s16(d3s16, d31s16);
-
- q5s32 = vmlal_s16(q5s32, d6s16, d31s16);
- q6s32 = vmlal_s16(q6s32, d7s16, d31s16);
- q7s32 = vmlsl_s16(q7s32, d6s16, d30s16);
- q0s32 = vmlsl_s16(q0s32, d7s16, d30s16);
-
- q1s32 = vmull_s16(d4s16, d30s16);
- q3s32 = vmull_s16(d5s16, d30s16);
- q10s32 = vmull_s16(d4s16, d31s16);
- q2s32 = vmull_s16(d5s16, d31s16);
-
- q1s32 = vmlsl_s16(q1s32, d8s16, d31s16);
- q3s32 = vmlsl_s16(q3s32, d9s16, d31s16);
- q10s32 = vmlal_s16(q10s32, d8s16, d30s16);
- q2s32 = vmlal_s16(q2s32, d9s16, d30s16);
-
- *q8s16 = vaddq_s16(*q11s16, *q9s16);
- *q11s16 = vsubq_s16(*q11s16, *q9s16);
- q4s16 = vaddq_s16(*q12s16, *q14s16);
- *q12s16 = vsubq_s16(*q12s16, *q14s16);
-
- q14s32 = vaddq_s32(q5s32, q1s32);
- q15s32 = vaddq_s32(q6s32, q3s32);
- q5s32 = vsubq_s32(q5s32, q1s32);
- q6s32 = vsubq_s32(q6s32, q3s32);
-
- d18s16 = vrshrn_n_s32(q14s32, 14);
- d19s16 = vrshrn_n_s32(q15s32, 14);
- d10s16 = vrshrn_n_s32(q5s32, 14);
- d11s16 = vrshrn_n_s32(q6s32, 14);
- *q9s16 = vcombine_s16(d18s16, d19s16);
-
- q1s32 = vaddq_s32(q7s32, q10s32);
- q3s32 = vaddq_s32(q0s32, q2s32);
- q7s32 = vsubq_s32(q7s32, q10s32);
- q0s32 = vsubq_s32(q0s32, q2s32);
-
- d28s16 = vrshrn_n_s32(q1s32, 14);
- d29s16 = vrshrn_n_s32(q3s32, 14);
- d14s16 = vrshrn_n_s32(q7s32, 14);
- d15s16 = vrshrn_n_s32(q0s32, 14);
- *q14s16 = vcombine_s16(d28s16, d29s16);
-
- d30s16 = vdup_n_s16(cospi_16_64);
-
- d22s16 = vget_low_s16(*q11s16);
- d23s16 = vget_high_s16(*q11s16);
- q2s32 = vmull_s16(d22s16, d30s16);
- q3s32 = vmull_s16(d23s16, d30s16);
- q13s32 = vmull_s16(d22s16, d30s16);
- q1s32 = vmull_s16(d23s16, d30s16);
-
- d24s16 = vget_low_s16(*q12s16);
- d25s16 = vget_high_s16(*q12s16);
- q2s32 = vmlal_s16(q2s32, d24s16, d30s16);
- q3s32 = vmlal_s16(q3s32, d25s16, d30s16);
- q13s32 = vmlsl_s16(q13s32, d24s16, d30s16);
- q1s32 = vmlsl_s16(q1s32, d25s16, d30s16);
-
- d4s16 = vrshrn_n_s32(q2s32, 14);
- d5s16 = vrshrn_n_s32(q3s32, 14);
- d24s16 = vrshrn_n_s32(q13s32, 14);
- d25s16 = vrshrn_n_s32(q1s32, 14);
- q2s16 = vcombine_s16(d4s16, d5s16);
- *q12s16 = vcombine_s16(d24s16, d25s16);
-
- q13s32 = vmull_s16(d10s16, d30s16);
- q1s32 = vmull_s16(d11s16, d30s16);
- q11s32 = vmull_s16(d10s16, d30s16);
- q0s32 = vmull_s16(d11s16, d30s16);
-
- q13s32 = vmlal_s16(q13s32, d14s16, d30s16);
- q1s32 = vmlal_s16(q1s32, d15s16, d30s16);
- q11s32 = vmlsl_s16(q11s32, d14s16, d30s16);
- q0s32 = vmlsl_s16(q0s32, d15s16, d30s16);
-
- d20s16 = vrshrn_n_s32(q13s32, 14);
- d21s16 = vrshrn_n_s32(q1s32, 14);
- d12s16 = vrshrn_n_s32(q11s32, 14);
- d13s16 = vrshrn_n_s32(q0s32, 14);
- *q10s16 = vcombine_s16(d20s16, d21s16);
- q6s16 = vcombine_s16(d12s16, d13s16);
-
- q5s16 = vdupq_n_s16(0);
-
- *q9s16 = vsubq_s16(q5s16, *q9s16);
- *q11s16 = vsubq_s16(q5s16, q2s16);
- *q13s16 = vsubq_s16(q5s16, q6s16);
- *q15s16 = vsubq_s16(q5s16, q4s16);
-}
-
void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
- int i;
- uint8_t *d1, *d2;
- uint8x8_t d0u8, d1u8, d2u8, d3u8;
- uint64x1_t d0u64, d1u64, d2u64, d3u64;
- int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16;
- uint16x8_t q8u16, q9u16, q10u16, q11u16;
-
- q8s16 = vld1q_s16(input);
- q9s16 = vld1q_s16(input + 8);
- q10s16 = vld1q_s16(input + 8 * 2);
- q11s16 = vld1q_s16(input + 8 * 3);
- q12s16 = vld1q_s16(input + 8 * 4);
- q13s16 = vld1q_s16(input + 8 * 5);
- q14s16 = vld1q_s16(input + 8 * 6);
- q15s16 = vld1q_s16(input + 8 * 7);
-
- transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
+ const int16x8_t cospis = vld1q_s16(kCospi);
+ const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24
+ const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28
+ int16x8_t a[8];
+
+ a[0] = load_tran_low_to_s16q(input + 0 * 8);
+ a[1] = load_tran_low_to_s16q(input + 1 * 8);
+ a[2] = load_tran_low_to_s16q(input + 2 * 8);
+ a[3] = load_tran_low_to_s16q(input + 3 * 8);
+ a[4] = load_tran_low_to_s16q(input + 4 * 8);
+ a[5] = load_tran_low_to_s16q(input + 5 * 8);
+ a[6] = load_tran_low_to_s16q(input + 6 * 8);
+ a[7] = load_tran_low_to_s16q(input + 7 * 8);
+
+ transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
switch (tx_type) {
- case 0: // idct_idct is not supported. Fall back to C
- vp9_iht8x8_64_add_c(input, dest, stride, tx_type);
- return;
- case 1: // iadst_idct
- // generate IDCT constants
- // GENERATE_IDCT_CONSTANTS
-
- // first transform rows
- IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // transpose the matrix
- transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
- &q14s16, &q15s16);
-
- // generate IADST constants
- // GENERATE_IADST_CONSTANTS
-
- // then transform columns
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
+ case DCT_DCT:
+ idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
+ transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
break;
- case 2: // idct_iadst
- // generate IADST constants
- // GENERATE_IADST_CONSTANTS
-
- // first transform rows
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // transpose the matrix
- transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
- &q14s16, &q15s16);
- // generate IDCT constants
- // GENERATE_IDCT_CONSTANTS
-
- // then transform columns
- IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
+ case ADST_DCT:
+ idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
+ transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+ iadst8(a);
break;
- case 3: // iadst_iadst
- // generate IADST constants
- // GENERATE_IADST_CONSTANTS
-
- // first transform rows
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
-
- // transpose the matrix
- transpose_s16_8x8(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16,
- &q14s16, &q15s16);
- // then transform columns
- IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, &q12s16, &q13s16, &q14s16,
- &q15s16);
+ case DCT_ADST:
+ iadst8(a);
+ transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+ idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a);
break;
- default: // iadst_idct
- assert(0);
+
+ default:
+ assert(tx_type == ADST_ADST);
+ iadst8(a);
+ transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]);
+ iadst8(a);
break;
}
- q8s16 = vrshrq_n_s16(q8s16, 5);
- q9s16 = vrshrq_n_s16(q9s16, 5);
- q10s16 = vrshrq_n_s16(q10s16, 5);
- q11s16 = vrshrq_n_s16(q11s16, 5);
- q12s16 = vrshrq_n_s16(q12s16, 5);
- q13s16 = vrshrq_n_s16(q13s16, 5);
- q14s16 = vrshrq_n_s16(q14s16, 5);
- q15s16 = vrshrq_n_s16(q15s16, 5);
-
- for (d1 = d2 = dest, i = 0; i < 2; i++) {
- if (i != 0) {
- q8s16 = q12s16;
- q9s16 = q13s16;
- q10s16 = q14s16;
- q11s16 = q15s16;
- }
-
- d0u64 = vld1_u64((uint64_t *)d1);
- d1 += stride;
- d1u64 = vld1_u64((uint64_t *)d1);
- d1 += stride;
- d2u64 = vld1_u64((uint64_t *)d1);
- d1 += stride;
- d3u64 = vld1_u64((uint64_t *)d1);
- d1 += stride;
-
- q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u64(d0u64));
- q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u64(d1u64));
- q10u16 =
- vaddw_u8(vreinterpretq_u16_s16(q10s16), vreinterpret_u8_u64(d2u64));
- q11u16 =
- vaddw_u8(vreinterpretq_u16_s16(q11s16), vreinterpret_u8_u64(d3u64));
-
- d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16));
- d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16));
- d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16));
- d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16));
-
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8));
- d2 += stride;
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8));
- d2 += stride;
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8));
- d2 += stride;
- vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8));
- d2 += stride;
- }
+ idct8x8_add8x8_neon(a, dest, stride);
}
diff --git a/libvpx/vp9/common/arm/neon/vp9_iht_neon.h b/libvpx/vp9/common/arm/neon/vp9_iht_neon.h
new file mode 100644
index 000000000..c64822e27
--- /dev/null
+++ b/libvpx/vp9/common/arm/neon/vp9_iht_neon.h
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
+#define VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
+
+#include <arm_neon.h>
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "vp9/common/vp9_common.h"
+#include "vpx_dsp/arm/idct_neon.h"
+#include "vpx_dsp/arm/mem_neon.h"
+#include "vpx_dsp/txfm_common.h"
+
+static INLINE void iadst4(int16x8_t *const io) {
+ const int32x4_t c3 = vdupq_n_s32(sinpi_3_9);
+ int16x4_t x[4];
+ int32x4_t s[8], output[4];
+ const int16x4_t c =
+ create_s16x4_neon(sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9);
+
+ x[0] = vget_low_s16(io[0]);
+ x[1] = vget_low_s16(io[1]);
+ x[2] = vget_high_s16(io[0]);
+ x[3] = vget_high_s16(io[1]);
+
+ s[0] = vmull_lane_s16(x[0], c, 0);
+ s[1] = vmull_lane_s16(x[0], c, 1);
+ s[2] = vmull_lane_s16(x[1], c, 2);
+ s[3] = vmull_lane_s16(x[2], c, 3);
+ s[4] = vmull_lane_s16(x[2], c, 0);
+ s[5] = vmull_lane_s16(x[3], c, 1);
+ s[6] = vmull_lane_s16(x[3], c, 3);
+ s[7] = vaddl_s16(x[0], x[3]);
+ s[7] = vsubw_s16(s[7], x[2]);
+
+ s[0] = vaddq_s32(s[0], s[3]);
+ s[0] = vaddq_s32(s[0], s[5]);
+ s[1] = vsubq_s32(s[1], s[4]);
+ s[1] = vsubq_s32(s[1], s[6]);
+ s[3] = s[2];
+ s[2] = vmulq_s32(c3, s[7]);
+
+ output[0] = vaddq_s32(s[0], s[3]);
+ output[1] = vaddq_s32(s[1], s[3]);
+ output[2] = s[2];
+ output[3] = vaddq_s32(s[0], s[1]);
+ output[3] = vsubq_s32(output[3], s[3]);
+ dct_const_round_shift_low_8_dual(output, &io[0], &io[1]);
+}
+
+static INLINE void iadst_half_butterfly_neon(int16x8_t *const x,
+ const int16x4_t c) {
+ // Don't add/sub before multiply, which will overflow in iadst8.
+ const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0);
+ const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(x[0]), c, 0);
+ const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0);
+ const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(x[1]), c, 0);
+ int32x4_t t0[2], t1[2];
+
+ t0[0] = vaddq_s32(x0_lo, x1_lo);
+ t0[1] = vaddq_s32(x0_hi, x1_hi);
+ t1[0] = vsubq_s32(x0_lo, x1_lo);
+ t1[1] = vsubq_s32(x0_hi, x1_hi);
+ x[0] = dct_const_round_shift_low_8(t0);
+ x[1] = dct_const_round_shift_low_8(t1);
+}
+
+static INLINE void iadst_half_butterfly_neg_neon(int16x8_t *const x0,
+ int16x8_t *const x1,
+ const int16x4_t c) {
+ // Don't add/sub before multiply, which will overflow in iadst8.
+ const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 1);
+ const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(*x0), c, 1);
+ const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 1);
+ const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(*x1), c, 1);
+ int32x4_t t0[2], t1[2];
+
+ t0[0] = vaddq_s32(x0_lo, x1_lo);
+ t0[1] = vaddq_s32(x0_hi, x1_hi);
+ t1[0] = vsubq_s32(x0_lo, x1_lo);
+ t1[1] = vsubq_s32(x0_hi, x1_hi);
+ *x1 = dct_const_round_shift_low_8(t0);
+ *x0 = dct_const_round_shift_low_8(t1);
+}
+
+static INLINE void iadst_half_butterfly_pos_neon(int16x8_t *const x0,
+ int16x8_t *const x1,
+ const int16x4_t c) {
+ // Don't add/sub before multiply, which will overflow in iadst8.
+ const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 0);
+ const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(*x0), c, 0);
+ const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 0);
+ const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(*x1), c, 0);
+ int32x4_t t0[2], t1[2];
+
+ t0[0] = vaddq_s32(x0_lo, x1_lo);
+ t0[1] = vaddq_s32(x0_hi, x1_hi);
+ t1[0] = vsubq_s32(x0_lo, x1_lo);
+ t1[1] = vsubq_s32(x0_hi, x1_hi);
+ *x1 = dct_const_round_shift_low_8(t0);
+ *x0 = dct_const_round_shift_low_8(t1);
+}
+
+static INLINE void iadst_butterfly_lane_0_1_neon(const int16x8_t in0,
+ const int16x8_t in1,
+ const int16x4_t c,
+ int32x4_t *const s0,
+ int32x4_t *const s1) {
+ s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0);
+ s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 0);
+ s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1);
+ s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 1);
+
+ s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 1);
+ s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 1);
+ s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 0);
+ s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 0);
+}
+
+static INLINE void iadst_butterfly_lane_2_3_neon(const int16x8_t in0,
+ const int16x8_t in1,
+ const int16x4_t c,
+ int32x4_t *const s0,
+ int32x4_t *const s1) {
+ s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
+ s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
+ s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
+ s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
+
+ s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 3);
+ s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 3);
+ s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 2);
+ s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 2);
+}
+
+static INLINE void iadst_butterfly_lane_1_0_neon(const int16x8_t in0,
+ const int16x8_t in1,
+ const int16x4_t c,
+ int32x4_t *const s0,
+ int32x4_t *const s1) {
+ s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 1);
+ s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 1);
+ s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 0);
+ s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 0);
+
+ s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 0);
+ s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 0);
+ s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 1);
+ s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 1);
+}
+
+static INLINE void iadst_butterfly_lane_3_2_neon(const int16x8_t in0,
+ const int16x8_t in1,
+ const int16x4_t c,
+ int32x4_t *const s0,
+ int32x4_t *const s1) {
+ s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 3);
+ s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 3);
+ s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 2);
+ s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 2);
+
+ s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 2);
+ s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 2);
+ s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 3);
+ s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 3);
+}
+
+static INLINE int16x8_t add_dct_const_round_shift_low_8(
+ const int32x4_t *const in0, const int32x4_t *const in1) {
+ int32x4_t sum[2];
+
+ sum[0] = vaddq_s32(in0[0], in1[0]);
+ sum[1] = vaddq_s32(in0[1], in1[1]);
+ return dct_const_round_shift_low_8(sum);
+}
+
+static INLINE int16x8_t sub_dct_const_round_shift_low_8(
+ const int32x4_t *const in0, const int32x4_t *const in1) {
+ int32x4_t sum[2];
+
+ sum[0] = vsubq_s32(in0[0], in1[0]);
+ sum[1] = vsubq_s32(in0[1], in1[1]);
+ return dct_const_round_shift_low_8(sum);
+}
+
+static INLINE void iadst8(int16x8_t *const io) {
+ const int16x4_t c0 =
+ create_s16x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64);
+ const int16x4_t c1 =
+ create_s16x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64);
+ const int16x4_t c2 =
+ create_s16x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64);
+ int16x8_t x[8], t[4];
+ int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
+
+ x[0] = io[7];
+ x[1] = io[0];
+ x[2] = io[5];
+ x[3] = io[2];
+ x[4] = io[3];
+ x[5] = io[4];
+ x[6] = io[1];
+ x[7] = io[6];
+
+ // stage 1
+ iadst_butterfly_lane_0_1_neon(x[0], x[1], c0, s0, s1);
+ iadst_butterfly_lane_2_3_neon(x[2], x[3], c0, s2, s3);
+ iadst_butterfly_lane_0_1_neon(x[4], x[5], c1, s4, s5);
+ iadst_butterfly_lane_2_3_neon(x[6], x[7], c1, s6, s7);
+
+ x[0] = add_dct_const_round_shift_low_8(s0, s4);
+ x[1] = add_dct_const_round_shift_low_8(s1, s5);
+ x[2] = add_dct_const_round_shift_low_8(s2, s6);
+ x[3] = add_dct_const_round_shift_low_8(s3, s7);
+ x[4] = sub_dct_const_round_shift_low_8(s0, s4);
+ x[5] = sub_dct_const_round_shift_low_8(s1, s5);
+ x[6] = sub_dct_const_round_shift_low_8(s2, s6);
+ x[7] = sub_dct_const_round_shift_low_8(s3, s7);
+
+ // stage 2
+ t[0] = x[0];
+ t[1] = x[1];
+ t[2] = x[2];
+ t[3] = x[3];
+ iadst_butterfly_lane_2_3_neon(x[4], x[5], c2, s4, s5);
+ iadst_butterfly_lane_3_2_neon(x[7], x[6], c2, s7, s6);
+
+ x[0] = vaddq_s16(t[0], t[2]);
+ x[1] = vaddq_s16(t[1], t[3]);
+ x[2] = vsubq_s16(t[0], t[2]);
+ x[3] = vsubq_s16(t[1], t[3]);
+ x[4] = add_dct_const_round_shift_low_8(s4, s6);
+ x[5] = add_dct_const_round_shift_low_8(s5, s7);
+ x[6] = sub_dct_const_round_shift_low_8(s4, s6);
+ x[7] = sub_dct_const_round_shift_low_8(s5, s7);
+
+ // stage 3
+ iadst_half_butterfly_neon(x + 2, c2);
+ iadst_half_butterfly_neon(x + 6, c2);
+
+ io[0] = x[0];
+ io[1] = vnegq_s16(x[4]);
+ io[2] = x[6];
+ io[3] = vnegq_s16(x[2]);
+ io[4] = x[3];
+ io[5] = vnegq_s16(x[7]);
+ io[6] = x[5];
+ io[7] = vnegq_s16(x[1]);
+}
+
+void vpx_iadst16x16_256_add_half1d(const void *const input, int16_t *output,
+ void *const dest, const int stride,
+ const int highbd_flag);
+
+typedef void (*iht_1d)(const void *const input, int16_t *output,
+ void *const dest, const int stride,
+ const int highbd_flag);
+
+typedef struct {
+ iht_1d cols, rows; // vertical and horizontal
+} iht_2d;
+
+#endif // VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_
diff --git a/libvpx/vp9/common/ppc/vp9_idct_vsx.c b/libvpx/vp9/common/ppc/vp9_idct_vsx.c
new file mode 100644
index 000000000..1b2a93edb
--- /dev/null
+++ b/libvpx/vp9/common/ppc/vp9_idct_vsx.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/ppc/inv_txfm_vsx.h"
+#include "vpx_dsp/ppc/bitdepth_conversion_vsx.h"
+
+#include "vp9/common/vp9_enums.h"
+
+void vp9_iht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ int16x8_t in[2], out[2];
+
+ in[0] = load_tran_low(0, input);
+ in[1] = load_tran_low(8 * sizeof(*input), input);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_idct4_vsx(in, out);
+ vpx_idct4_vsx(out, in);
+ break;
+ case ADST_DCT:
+ vpx_idct4_vsx(in, out);
+ vp9_iadst4_vsx(out, in);
+ break;
+ case DCT_ADST:
+ vp9_iadst4_vsx(in, out);
+ vpx_idct4_vsx(out, in);
+ break;
+ default:
+ assert(tx_type == ADST_ADST);
+ vp9_iadst4_vsx(in, out);
+ vp9_iadst4_vsx(out, in);
+ break;
+ }
+
+ vpx_round_store4x4_vsx(in, out, dest, stride);
+}
+
+void vp9_iht8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, int stride,
+ int tx_type) {
+ int16x8_t in[8], out[8];
+
+ // load input data
+ in[0] = load_tran_low(0, input);
+ in[1] = load_tran_low(8 * sizeof(*input), input);
+ in[2] = load_tran_low(2 * 8 * sizeof(*input), input);
+ in[3] = load_tran_low(3 * 8 * sizeof(*input), input);
+ in[4] = load_tran_low(4 * 8 * sizeof(*input), input);
+ in[5] = load_tran_low(5 * 8 * sizeof(*input), input);
+ in[6] = load_tran_low(6 * 8 * sizeof(*input), input);
+ in[7] = load_tran_low(7 * 8 * sizeof(*input), input);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_idct8_vsx(in, out);
+ vpx_idct8_vsx(out, in);
+ break;
+ case ADST_DCT:
+ vpx_idct8_vsx(in, out);
+ vp9_iadst8_vsx(out, in);
+ break;
+ case DCT_ADST:
+ vp9_iadst8_vsx(in, out);
+ vpx_idct8_vsx(out, in);
+ break;
+ default:
+ assert(tx_type == ADST_ADST);
+ vp9_iadst8_vsx(in, out);
+ vp9_iadst8_vsx(out, in);
+ break;
+ }
+
+ vpx_round_store8x8_vsx(in, dest, stride);
+}
+
+void vp9_iht16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest,
+ int stride, int tx_type) {
+ int16x8_t in0[16], in1[16];
+
+ LOAD_INPUT16(load_tran_low, input, 0, 8 * sizeof(*input), in0);
+ LOAD_INPUT16(load_tran_low, input, 8 * 8 * 2 * sizeof(*input),
+ 8 * sizeof(*input), in1);
+
+ switch (tx_type) {
+ case DCT_DCT:
+ vpx_idct16_vsx(in0, in1);
+ vpx_idct16_vsx(in0, in1);
+ break;
+ case ADST_DCT:
+ vpx_idct16_vsx(in0, in1);
+ vpx_iadst16_vsx(in0, in1);
+ break;
+ case DCT_ADST:
+ vpx_iadst16_vsx(in0, in1);
+ vpx_idct16_vsx(in0, in1);
+ break;
+ default:
+ assert(tx_type == ADST_ADST);
+ vpx_iadst16_vsx(in0, in1);
+ vpx_iadst16_vsx(in0, in1);
+ break;
+ }
+
+ vpx_round_store16x16_vsx(in0, in1, dest, stride);
+}
diff --git a/libvpx/vp9/common/vp9_alloccommon.h b/libvpx/vp9/common/vp9_alloccommon.h
index a3a163857..5faa4f2be 100644
--- a/libvpx/vp9/common/vp9_alloccommon.h
+++ b/libvpx/vp9/common/vp9_alloccommon.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_
-#define VP9_COMMON_VP9_ALLOCCOMMON_H_
+#ifndef VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_
+#define VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_
#define INVALID_IDX -1 // Invalid buffer index.
@@ -41,4 +41,4 @@ void vp9_swap_current_and_last_seg_map(struct VP9Common *cm);
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_ALLOCCOMMON_H_
+#endif // VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_
diff --git a/libvpx/vp9/common/vp9_blockd.h b/libvpx/vp9/common/vp9_blockd.h
index 780b29208..f0887157e 100644
--- a/libvpx/vp9/common/vp9_blockd.h
+++ b/libvpx/vp9/common/vp9_blockd.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_BLOCKD_H_
-#define VP9_COMMON_VP9_BLOCKD_H_
+#ifndef VPX_VP9_COMMON_VP9_BLOCKD_H_
+#define VPX_VP9_COMMON_VP9_BLOCKD_H_
#include "./vpx_config.h"
@@ -60,6 +60,7 @@ typedef struct {
#define GOLDEN_FRAME 2
#define ALTREF_FRAME 3
#define MAX_REF_FRAMES 4
+
typedef int8_t MV_REFERENCE_FRAME;
// This structure now relates to 8x8 block regions.
@@ -130,6 +131,8 @@ struct macroblockd_plane {
// encoder
const int16_t *dequant;
+
+ int *eob;
};
#define BLOCK_OFFSET(x, i) ((x) + (i)*16)
@@ -193,6 +196,8 @@ typedef struct macroblockd {
int corrupted;
struct vpx_internal_error_info *error_info;
+
+ PARTITION_TYPE *partition;
} MACROBLOCKD;
static INLINE PLANE_TYPE get_plane_type(int plane) {
@@ -285,4 +290,4 @@ void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_BLOCKD_H_
+#endif // VPX_VP9_COMMON_VP9_BLOCKD_H_
diff --git a/libvpx/vp9/common/vp9_common.h b/libvpx/vp9/common/vp9_common.h
index 666c3beaf..ae8dad38e 100644
--- a/libvpx/vp9/common/vp9_common.h
+++ b/libvpx/vp9/common/vp9_common.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_COMMON_H_
-#define VP9_COMMON_VP9_COMMON_H_
+#ifndef VPX_VP9_COMMON_VP9_COMMON_H_
+#define VPX_VP9_COMMON_VP9_COMMON_H_
/* Interface header for common constant data structures and lookup tables */
@@ -75,4 +75,4 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_COMMON_H_
+#endif // VPX_VP9_COMMON_VP9_COMMON_H_
diff --git a/libvpx/vp9/common/vp9_common_data.c b/libvpx/vp9/common/vp9_common_data.c
index 4a1083322..809d7317c 100644
--- a/libvpx/vp9/common/vp9_common_data.c
+++ b/libvpx/vp9/common/vp9_common_data.c
@@ -28,7 +28,7 @@ const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 2, 2,
const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 2, 1, 2,
4, 2, 4, 8, 4, 8 };
-// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize)))
+// VPXMIN(3, VPXMIN(b_width_log2_lookup(bsize), b_height_log2_lookup(bsize)))
const uint8_t size_group_lookup[BLOCK_SIZES] = { 0, 0, 0, 1, 1, 1, 2,
2, 2, 3, 3, 3, 3 };
diff --git a/libvpx/vp9/common/vp9_common_data.h b/libvpx/vp9/common/vp9_common_data.h
index 5c6a7e8ff..a533c5f05 100644
--- a/libvpx/vp9/common/vp9_common_data.h
+++ b/libvpx/vp9/common/vp9_common_data.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_COMMON_DATA_H_
-#define VP9_COMMON_VP9_COMMON_DATA_H_
+#ifndef VPX_VP9_COMMON_VP9_COMMON_DATA_H_
+#define VPX_VP9_COMMON_VP9_COMMON_DATA_H_
#include "vp9/common/vp9_enums.h"
#include "vpx/vpx_integer.h"
@@ -42,4 +42,4 @@ extern const uint8_t need_top_left[INTRA_MODES];
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_COMMON_DATA_H_
+#endif // VPX_VP9_COMMON_VP9_COMMON_DATA_H_
diff --git a/libvpx/vp9/common/vp9_entropy.c b/libvpx/vp9/common/vp9_entropy.c
index a575bda72..430b917b8 100644
--- a/libvpx/vp9/common/vp9_entropy.c
+++ b/libvpx/vp9/common/vp9_entropy.c
@@ -42,6 +42,7 @@ const vpx_prob vp9_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254,
177, 153, 140, 133, 130, 129 };
#endif
+/* clang-format off */
const uint8_t vp9_coefband_trans_8x8plus[1024] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
// beyond MAXBAND_INDEX+1 all values are filled as 5
@@ -85,6 +86,7 @@ const uint8_t vp9_coefband_trans_8x8plus[1024] = {
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
};
+/* clang-format on */
const uint8_t vp9_coefband_trans_4x4[16] = {
0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5,
diff --git a/libvpx/vp9/common/vp9_entropy.h b/libvpx/vp9/common/vp9_entropy.h
index 1da491166..d026651df 100644
--- a/libvpx/vp9/common/vp9_entropy.h
+++ b/libvpx/vp9/common/vp9_entropy.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_ENTROPY_H_
-#define VP9_COMMON_VP9_ENTROPY_H_
+#ifndef VPX_VP9_COMMON_VP9_ENTROPY_H_
+#define VPX_VP9_COMMON_VP9_ENTROPY_H_
#include "vpx/vpx_integer.h"
#include "vpx_dsp/prob.h"
@@ -137,7 +137,6 @@ static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) {
// 128 lists of probabilities are stored for the following ONE node probs:
// 1, 3, 5, 7, ..., 253, 255
// In between probabilities are interpolated linearly
-
#define COEFF_PROB_MODELS 255
#define UNCONSTRAINED_NODES 3
@@ -195,4 +194,4 @@ static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_ENTROPY_H_
+#endif // VPX_VP9_COMMON_VP9_ENTROPY_H_
diff --git a/libvpx/vp9/common/vp9_entropymode.c b/libvpx/vp9/common/vp9_entropymode.c
index 47cd63e94..48cad3318 100644
--- a/libvpx/vp9/common/vp9_entropymode.c
+++ b/libvpx/vp9/common/vp9_entropymode.c
@@ -186,16 +186,19 @@ const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] =
{ 93, 24, 99 }, // a split, l not split
{ 85, 119, 44 }, // l split, a not split
{ 62, 59, 67 }, // a/l both split
+
// 16x16 -> 8x8
{ 149, 53, 53 }, // a/l both not split
{ 94, 20, 48 }, // a split, l not split
{ 83, 53, 24 }, // l split, a not split
{ 52, 18, 18 }, // a/l both split
+
// 32x32 -> 16x16
{ 150, 40, 39 }, // a/l both not split
{ 78, 12, 26 }, // a split, l not split
{ 67, 33, 11 }, // l split, a not split
{ 24, 7, 5 }, // a/l both split
+
// 64x64 -> 32x32
{ 174, 35, 49 }, // a/l both not split
{ 68, 11, 27 }, // a split, l not split
diff --git a/libvpx/vp9/common/vp9_entropymode.h b/libvpx/vp9/common/vp9_entropymode.h
index 0ee663fe8..a756c8d0b 100644
--- a/libvpx/vp9/common/vp9_entropymode.h
+++ b/libvpx/vp9/common/vp9_entropymode.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_ENTROPYMODE_H_
-#define VP9_COMMON_VP9_ENTROPYMODE_H_
+#ifndef VPX_VP9_COMMON_VP9_ENTROPYMODE_H_
+#define VPX_VP9_COMMON_VP9_ENTROPYMODE_H_
#include "vp9/common/vp9_entropy.h"
#include "vp9/common/vp9_entropymv.h"
@@ -104,4 +104,4 @@ void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_ENTROPYMODE_H_
+#endif // VPX_VP9_COMMON_VP9_ENTROPYMODE_H_
diff --git a/libvpx/vp9/common/vp9_entropymv.c b/libvpx/vp9/common/vp9_entropymv.c
index a18a290cf..b6f052d08 100644
--- a/libvpx/vp9/common/vp9_entropymv.c
+++ b/libvpx/vp9/common/vp9_entropymv.c
@@ -22,9 +22,7 @@ const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = {
18, -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10,
};
-const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = {
- -0, -1,
-};
+const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1 };
const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1,
4, -2, -3 };
diff --git a/libvpx/vp9/common/vp9_entropymv.h b/libvpx/vp9/common/vp9_entropymv.h
index e2fe37a32..ee9d37973 100644
--- a/libvpx/vp9/common/vp9_entropymv.h
+++ b/libvpx/vp9/common/vp9_entropymv.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_ENTROPYMV_H_
-#define VP9_COMMON_VP9_ENTROPYMV_H_
+#ifndef VPX_VP9_COMMON_VP9_ENTROPYMV_H_
+#define VPX_VP9_COMMON_VP9_ENTROPYMV_H_
#include "./vpx_config.h"
@@ -25,7 +25,7 @@ struct VP9Common;
void vp9_init_mv_probs(struct VP9Common *cm);
-void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp);
+void vp9_adapt_mv_probs(struct VP9Common *cm, int allow_hp);
static INLINE int use_mv_hp(const MV *ref) {
const int kMvRefThresh = 64; // threshold for use of high-precision 1/8 mv
@@ -127,10 +127,10 @@ typedef struct {
nmv_component_counts comps[2];
} nmv_context_counts;
-void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx);
+void vp9_inc_mv(const MV *mv, nmv_context_counts *counts);
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_ENTROPYMV_H_
+#endif // VPX_VP9_COMMON_VP9_ENTROPYMV_H_
diff --git a/libvpx/vp9/common/vp9_enums.h b/libvpx/vp9/common/vp9_enums.h
index 056b298b3..bc665534d 100644
--- a/libvpx/vp9/common/vp9_enums.h
+++ b/libvpx/vp9/common/vp9_enums.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_ENUMS_H_
-#define VP9_COMMON_VP9_ENUMS_H_
+#ifndef VPX_VP9_COMMON_VP9_ENUMS_H_
+#define VPX_VP9_COMMON_VP9_ENUMS_H_
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
@@ -140,4 +140,4 @@ typedef uint8_t PREDICTION_MODE;
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_ENUMS_H_
+#endif // VPX_VP9_COMMON_VP9_ENUMS_H_
diff --git a/libvpx/vp9/common/vp9_filter.c b/libvpx/vp9/common/vp9_filter.c
index 6c43af8ce..adbda6c82 100644
--- a/libvpx/vp9/common/vp9_filter.c
+++ b/libvpx/vp9/common/vp9_filter.c
@@ -63,6 +63,20 @@ DECLARE_ALIGNED(256, static const InterpKernel,
{ 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 }
};
-const InterpKernel *vp9_filter_kernels[4] = {
- sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters
+// 4-tap filter
+DECLARE_ALIGNED(256, static const InterpKernel,
+ sub_pel_filters_4[SUBPEL_SHIFTS]) = {
+ { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 },
+ { 0, 0, -6, 120, 18, -4, 0, 0 }, { 0, 0, -8, 114, 28, -6, 0, 0 },
+ { 0, 0, -10, 108, 36, -6, 0, 0 }, { 0, 0, -12, 102, 46, -8, 0, 0 },
+ { 0, 0, -12, 94, 56, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 },
+ { 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 },
+ { 0, 0, -10, 56, 94, -12, 0, 0 }, { 0, 0, -8, 46, 102, -12, 0, 0 },
+ { 0, 0, -6, 36, 108, -10, 0, 0 }, { 0, 0, -6, 28, 114, -8, 0, 0 },
+ { 0, 0, -4, 18, 120, -6, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 }
+};
+
+const InterpKernel *vp9_filter_kernels[5] = {
+ sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters,
+ sub_pel_filters_4
};
diff --git a/libvpx/vp9/common/vp9_filter.h b/libvpx/vp9/common/vp9_filter.h
index 9d2b8e1db..0382c88e7 100644
--- a/libvpx/vp9/common/vp9_filter.h
+++ b/libvpx/vp9/common/vp9_filter.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_FILTER_H_
-#define VP9_COMMON_VP9_FILTER_H_
+#ifndef VPX_VP9_COMMON_VP9_FILTER_H_
+#define VPX_VP9_COMMON_VP9_FILTER_H_
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
@@ -25,6 +25,7 @@ extern "C" {
#define EIGHTTAP_SHARP 2
#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */
#define BILINEAR 3
+#define FOURTAP 4
// The codec can operate in four possible inter prediction filter mode:
// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three.
#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1)
@@ -32,10 +33,10 @@ extern "C" {
typedef uint8_t INTERP_FILTER;
-extern const InterpKernel *vp9_filter_kernels[4];
+extern const InterpKernel *vp9_filter_kernels[5];
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_FILTER_H_
+#endif // VPX_VP9_COMMON_VP9_FILTER_H_
diff --git a/libvpx/vp9/common/vp9_frame_buffers.h b/libvpx/vp9/common/vp9_frame_buffers.h
index e2cfe61b6..11be838c0 100644
--- a/libvpx/vp9/common/vp9_frame_buffers.h
+++ b/libvpx/vp9/common/vp9_frame_buffers.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_FRAME_BUFFERS_H_
-#define VP9_COMMON_VP9_FRAME_BUFFERS_H_
+#ifndef VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_
+#define VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_
#include "vpx/vpx_frame_buffer.h"
#include "vpx/vpx_integer.h"
@@ -50,4 +50,4 @@ int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb);
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_FRAME_BUFFERS_H_
+#endif // VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_
diff --git a/libvpx/vp9/common/vp9_idct.h b/libvpx/vp9/common/vp9_idct.h
index 3e83b8402..94eeaf599 100644
--- a/libvpx/vp9/common/vp9_idct.h
+++ b/libvpx/vp9/common/vp9_idct.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_IDCT_H_
-#define VP9_COMMON_VP9_IDCT_H_
+#ifndef VPX_VP9_COMMON_VP9_IDCT_H_
+#define VPX_VP9_COMMON_VP9_IDCT_H_
#include <assert.h>
@@ -78,4 +78,4 @@ void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_IDCT_H_
+#endif // VPX_VP9_COMMON_VP9_IDCT_H_
diff --git a/libvpx/vp9/common/vp9_loopfilter.c b/libvpx/vp9/common/vp9_loopfilter.c
index c7c343aed..95d6029f3 100644
--- a/libvpx/vp9/common/vp9_loopfilter.c
+++ b/libvpx/vp9/common/vp9_loopfilter.c
@@ -880,12 +880,12 @@ void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col,
- MODE_INFO **mi, const int mode_info_stride,
+ MODE_INFO **mi8x8, const int mode_info_stride,
LOOP_FILTER_MASK *lfm) {
int idx_32, idx_16, idx_8;
const loop_filter_info_n *const lfi_n = &cm->lf_info;
- MODE_INFO **mip = mi;
- MODE_INFO **mip2 = mi;
+ MODE_INFO **mip = mi8x8;
+ MODE_INFO **mip2 = mi8x8;
// These are offsets to the next mi in the 64x64 block. It is what gets
// added to the mi ptr as we go through each loop. It helps us to avoid
@@ -1087,13 +1087,19 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
const int row_step_stride = cm->mi_stride * row_step;
struct buf_2d *const dst = &plane->dst;
uint8_t *const dst0 = dst->buf;
- unsigned int mask_16x16[MI_BLOCK_SIZE] = { 0 };
- unsigned int mask_8x8[MI_BLOCK_SIZE] = { 0 };
- unsigned int mask_4x4[MI_BLOCK_SIZE] = { 0 };
- unsigned int mask_4x4_int[MI_BLOCK_SIZE] = { 0 };
+ unsigned int mask_16x16[MI_BLOCK_SIZE];
+ unsigned int mask_8x8[MI_BLOCK_SIZE];
+ unsigned int mask_4x4[MI_BLOCK_SIZE];
+ unsigned int mask_4x4_int[MI_BLOCK_SIZE];
uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE];
int r, c;
+ vp9_zero(mask_16x16);
+ vp9_zero(mask_8x8);
+ vp9_zero(mask_4x4);
+ vp9_zero(mask_4x4_int);
+ vp9_zero(lfl);
+
for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) {
unsigned int mask_16x16_c = 0;
unsigned int mask_8x8_c = 0;
@@ -1174,7 +1180,7 @@ void vp9_filter_block_plane_non420(VP9_COMMON *cm,
}
// Disable filtering on the leftmost column
- border_mask = ~(mi_col == 0);
+ border_mask = ~(mi_col == 0 ? 1 : 0);
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) {
highbd_filter_selectively_vert(
@@ -1330,6 +1336,8 @@ void vp9_filter_block_plane_ss11(VP9_COMMON *const cm,
uint16_t mask_4x4 = lfm->left_uv[TX_4X4];
uint16_t mask_4x4_int = lfm->int_4x4_uv;
+ vp9_zero(lfl_uv);
+
assert(plane->subsampling_x == 1 && plane->subsampling_y == 1);
// Vertical pass: do 2 rows at one time
diff --git a/libvpx/vp9/common/vp9_loopfilter.h b/libvpx/vp9/common/vp9_loopfilter.h
index 481a6cdc6..39648a72c 100644
--- a/libvpx/vp9/common/vp9_loopfilter.h
+++ b/libvpx/vp9/common/vp9_loopfilter.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_LOOPFILTER_H_
-#define VP9_COMMON_VP9_LOOPFILTER_H_
+#ifndef VPX_VP9_COMMON_VP9_LOOPFILTER_H_
+#define VPX_VP9_COMMON_VP9_LOOPFILTER_H_
#include "vpx_ports/mem.h"
#include "./vpx_config.h"
@@ -97,7 +97,7 @@ struct VP9LfSyncData;
// This function sets up the bit masks for the entire 64x64 region represented
// by mi_row, mi_col.
void vp9_setup_mask(struct VP9Common *const cm, const int mi_row,
- const int mi_col, MODE_INFO **mi_8x8,
+ const int mi_col, MODE_INFO **mi8x8,
const int mode_info_stride, LOOP_FILTER_MASK *lfm);
void vp9_filter_block_plane_ss00(struct VP9Common *const cm,
@@ -120,7 +120,7 @@ void vp9_loop_filter_init(struct VP9Common *cm);
void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl);
void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm,
- struct macroblockd *mbd, int filter_level,
+ struct macroblockd *xd, int frame_filter_level,
int y_only, int partial_frame);
// Get the superblock lfm for a given mi_row, mi_col.
@@ -157,4 +157,4 @@ int vp9_loop_filter_worker(void *arg1, void *unused);
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_LOOPFILTER_H_
+#endif // VPX_VP9_COMMON_VP9_LOOPFILTER_H_
diff --git a/libvpx/vp9/common/vp9_mfqe.h b/libvpx/vp9/common/vp9_mfqe.h
index dfff8c23d..f53e1c2f9 100644
--- a/libvpx/vp9/common/vp9_mfqe.h
+++ b/libvpx/vp9/common/vp9_mfqe.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_MFQE_H_
-#define VP9_COMMON_VP9_MFQE_H_
+#ifndef VPX_VP9_COMMON_VP9_MFQE_H_
+#define VPX_VP9_COMMON_VP9_MFQE_H_
#ifdef __cplusplus
extern "C" {
@@ -28,4 +28,4 @@ void vp9_mfqe(struct VP9Common *cm);
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_MFQE_H_
+#endif // VPX_VP9_COMMON_VP9_MFQE_H_
diff --git a/libvpx/vp9/common/vp9_mv.h b/libvpx/vp9/common/vp9_mv.h
index 4c8eac721..14dde7dd0 100644
--- a/libvpx/vp9/common/vp9_mv.h
+++ b/libvpx/vp9/common/vp9_mv.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_MV_H_
-#define VP9_COMMON_VP9_MV_H_
+#ifndef VPX_VP9_COMMON_VP9_MV_H_
+#define VPX_VP9_COMMON_VP9_MV_H_
#include "vpx/vpx_integer.h"
@@ -52,4 +52,4 @@ static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_MV_H_
+#endif // VPX_VP9_COMMON_VP9_MV_H_
diff --git a/libvpx/vp9/common/vp9_mvref_common.h b/libvpx/vp9/common/vp9_mvref_common.h
index 2b2c1ba9e..ebe5fdad1 100644
--- a/libvpx/vp9/common/vp9_mvref_common.h
+++ b/libvpx/vp9/common/vp9_mvref_common.h
@@ -7,8 +7,8 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_
-#define VP9_COMMON_VP9_MVREF_COMMON_H_
+#ifndef VPX_VP9_COMMON_VP9_MVREF_COMMON_H_
+#define VPX_VP9_COMMON_VP9_MVREF_COMMON_H_
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/common/vp9_blockd.h"
@@ -320,4 +320,4 @@ void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_MVREF_COMMON_H_
+#endif // VPX_VP9_COMMON_VP9_MVREF_COMMON_H_
diff --git a/libvpx/vp9/common/vp9_onyxc_int.h b/libvpx/vp9/common/vp9_onyxc_int.h
index 1d96d92c2..662b8ef5e 100644
--- a/libvpx/vp9/common/vp9_onyxc_int.h
+++ b/libvpx/vp9/common/vp9_onyxc_int.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_ONYXC_INT_H_
-#define VP9_COMMON_VP9_ONYXC_INT_H_
+#ifndef VPX_VP9_COMMON_VP9_ONYXC_INT_H_
+#define VPX_VP9_COMMON_VP9_ONYXC_INT_H_
#include "./vpx_config.h"
#include "vpx/internal/vpx_codec_internal.h"
@@ -37,10 +37,9 @@ extern "C" {
#define REF_FRAMES_LOG2 3
#define REF_FRAMES (1 << REF_FRAMES_LOG2)
-// 1 scratch frame for the new frame, 3 for scaled references on the encoder.
-// TODO(jkoleszar): These 3 extra references could probably come from the
-// normal reference pool.
-#define FRAME_BUFFERS (REF_FRAMES + 4)
+// 1 scratch frame for the new frame, REFS_PER_FRAME for scaled references on
+// the encoder.
+#define FRAME_BUFFERS (REF_FRAMES + 1 + REFS_PER_FRAME)
#define FRAME_CONTEXTS_LOG2 2
#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2)
@@ -70,6 +69,7 @@ typedef struct {
int mi_rows;
int mi_cols;
uint8_t released;
+ int frame_index;
vpx_codec_frame_buffer_t raw_frame_buffer;
YV12_BUFFER_CONFIG buf;
} RefCntBuffer;
@@ -128,6 +128,8 @@ typedef struct VP9Common {
int new_fb_idx;
+ int cur_show_frame_fb_idx;
+
#if CONFIG_VP9_POSTPROC
YV12_BUFFER_CONFIG post_proc_buffer;
YV12_BUFFER_CONFIG post_proc_buffer_int;
@@ -256,8 +258,16 @@ typedef struct VP9Common {
PARTITION_CONTEXT *above_seg_context;
ENTROPY_CONTEXT *above_context;
int above_context_alloc_cols;
+
+ int lf_row;
} VP9_COMMON;
+static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) {
+ if (index < 0 || index >= FRAME_BUFFERS) return NULL;
+ if (cm->error.error_code != VPX_CODEC_OK) return NULL;
+ return &cm->buffer_pool->frame_bufs[index].buf;
+}
+
static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP9_COMMON *cm, int index) {
if (index < 0 || index >= REF_FRAMES) return NULL;
if (cm->ref_frame_map[index] < 0) return NULL;
@@ -405,4 +415,4 @@ static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_ONYXC_INT_H_
+#endif // VPX_VP9_COMMON_VP9_ONYXC_INT_H_
diff --git a/libvpx/vp9/common/vp9_postproc.c b/libvpx/vp9/common/vp9_postproc.c
index dfc315eea..5373b0218 100644
--- a/libvpx/vp9/common/vp9_postproc.c
+++ b/libvpx/vp9/common/vp9_postproc.c
@@ -293,7 +293,7 @@ static void swap_mi_and_prev_mi(VP9_COMMON *cm) {
}
int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest,
- vp9_ppflags_t *ppflags) {
+ vp9_ppflags_t *ppflags, int unscaled_width) {
const int q = VPXMIN(105, cm->lf.filter_level * 2);
const int flags = ppflags->post_proc_flag;
YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer;
@@ -359,7 +359,7 @@ int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest,
if (flags & (VP9D_DEMACROBLOCK | VP9D_DEBLOCK)) {
if (!cm->postproc_state.limits) {
cm->postproc_state.limits =
- vpx_calloc(cm->width, sizeof(*cm->postproc_state.limits));
+ vpx_calloc(unscaled_width, sizeof(*cm->postproc_state.limits));
}
}
diff --git a/libvpx/vp9/common/vp9_postproc.h b/libvpx/vp9/common/vp9_postproc.h
index 605909411..67efc1b4e 100644
--- a/libvpx/vp9/common/vp9_postproc.h
+++ b/libvpx/vp9/common/vp9_postproc.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_POSTPROC_H_
-#define VP9_COMMON_VP9_POSTPROC_H_
+#ifndef VPX_VP9_COMMON_VP9_POSTPROC_H_
+#define VPX_VP9_COMMON_VP9_POSTPROC_H_
#include "vpx_ports/mem.h"
#include "vpx_scale/yv12config.h"
@@ -38,7 +38,7 @@ struct VP9Common;
#define MFQE_PRECISION 4
int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest,
- vp9_ppflags_t *flags);
+ vp9_ppflags_t *ppflags, int unscaled_width);
void vp9_denoise(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
uint8_t *limits);
@@ -50,4 +50,4 @@ void vp9_deblock(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_POSTPROC_H_
+#endif // VPX_VP9_COMMON_VP9_POSTPROC_H_
diff --git a/libvpx/vp9/common/vp9_ppflags.h b/libvpx/vp9/common/vp9_ppflags.h
index b8b647bf1..a0e301762 100644
--- a/libvpx/vp9/common/vp9_ppflags.h
+++ b/libvpx/vp9/common/vp9_ppflags.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_PPFLAGS_H_
-#define VP9_COMMON_VP9_PPFLAGS_H_
+#ifndef VPX_VP9_COMMON_VP9_PPFLAGS_H_
+#define VPX_VP9_COMMON_VP9_PPFLAGS_H_
#ifdef __cplusplus
extern "C" {
@@ -33,4 +33,4 @@ typedef struct {
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_PPFLAGS_H_
+#endif // VPX_VP9_COMMON_VP9_PPFLAGS_H_
diff --git a/libvpx/vp9/common/vp9_pred_common.c b/libvpx/vp9/common/vp9_pred_common.c
index a7ddc0b95..375cb4d76 100644
--- a/libvpx/vp9/common/vp9_pred_common.c
+++ b/libvpx/vp9/common/vp9_pred_common.c
@@ -13,6 +13,32 @@
#include "vp9/common/vp9_pred_common.h"
#include "vp9/common/vp9_seg_common.h"
+int vp9_compound_reference_allowed(const VP9_COMMON *cm) {
+ int i;
+ for (i = 1; i < REFS_PER_FRAME; ++i)
+ if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) return 1;
+
+ return 0;
+}
+
+void vp9_setup_compound_reference_mode(VP9_COMMON *cm) {
+ if (cm->ref_frame_sign_bias[LAST_FRAME] ==
+ cm->ref_frame_sign_bias[GOLDEN_FRAME]) {
+ cm->comp_fixed_ref = ALTREF_FRAME;
+ cm->comp_var_ref[0] = LAST_FRAME;
+ cm->comp_var_ref[1] = GOLDEN_FRAME;
+ } else if (cm->ref_frame_sign_bias[LAST_FRAME] ==
+ cm->ref_frame_sign_bias[ALTREF_FRAME]) {
+ cm->comp_fixed_ref = GOLDEN_FRAME;
+ cm->comp_var_ref[0] = LAST_FRAME;
+ cm->comp_var_ref[1] = ALTREF_FRAME;
+ } else {
+ cm->comp_fixed_ref = LAST_FRAME;
+ cm->comp_var_ref[0] = GOLDEN_FRAME;
+ cm->comp_var_ref[1] = ALTREF_FRAME;
+ }
+}
+
int vp9_get_reference_mode_context(const VP9_COMMON *cm,
const MACROBLOCKD *xd) {
int ctx;
@@ -229,9 +255,8 @@ int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) {
else
pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME);
} else {
- pred_context = 1 +
- 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
- edge_mi->ref_frame[1] == GOLDEN_FRAME);
+ pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME ||
+ edge_mi->ref_frame[1] == GOLDEN_FRAME);
}
} else { // inter/inter
const int above_has_second = has_second_ref(above_mi);
diff --git a/libvpx/vp9/common/vp9_pred_common.h b/libvpx/vp9/common/vp9_pred_common.h
index 8400bd70f..ee5966935 100644
--- a/libvpx/vp9/common/vp9_pred_common.h
+++ b/libvpx/vp9/common/vp9_pred_common.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_PRED_COMMON_H_
-#define VP9_COMMON_VP9_PRED_COMMON_H_
+#ifndef VPX_VP9_COMMON_VP9_PRED_COMMON_H_
+#define VPX_VP9_COMMON_VP9_PRED_COMMON_H_
#include "vp9/common/vp9_blockd.h"
#include "vp9/common/vp9_onyxc_int.h"
@@ -145,6 +145,10 @@ static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm,
return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1];
}
+int vp9_compound_reference_allowed(const VP9_COMMON *cm);
+
+void vp9_setup_compound_reference_mode(VP9_COMMON *cm);
+
// Returns a context number for the given MB prediction signal
// The mode info data structure has a one element border above and to the
// left of the entries corresponding to real blocks.
@@ -176,12 +180,6 @@ static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx,
}
}
-static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size,
- const MACROBLOCKD *xd,
- const struct tx_probs *tx_probs) {
- return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs);
-}
-
static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
struct tx_counts *tx_counts) {
switch (max_tx_size) {
@@ -196,4 +194,4 @@ static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_PRED_COMMON_H_
+#endif // VPX_VP9_COMMON_VP9_PRED_COMMON_H_
diff --git a/libvpx/vp9/common/vp9_quant_common.h b/libvpx/vp9/common/vp9_quant_common.h
index 4bae4a896..ec8b9f4c6 100644
--- a/libvpx/vp9/common/vp9_quant_common.h
+++ b/libvpx/vp9/common/vp9_quant_common.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_QUANT_COMMON_H_
-#define VP9_COMMON_VP9_QUANT_COMMON_H_
+#ifndef VPX_VP9_COMMON_VP9_QUANT_COMMON_H_
+#define VPX_VP9_COMMON_VP9_QUANT_COMMON_H_
#include "vpx/vpx_codec.h"
#include "vp9/common/vp9_seg_common.h"
@@ -33,4 +33,4 @@ int vp9_get_qindex(const struct segmentation *seg, int segment_id,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_QUANT_COMMON_H_
+#endif // VPX_VP9_COMMON_VP9_QUANT_COMMON_H_
diff --git a/libvpx/vp9/common/vp9_reconinter.h b/libvpx/vp9/common/vp9_reconinter.h
index bb9291a26..992e30c34 100644
--- a/libvpx/vp9/common/vp9_reconinter.h
+++ b/libvpx/vp9/common/vp9_reconinter.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_RECONINTER_H_
-#define VP9_COMMON_VP9_RECONINTER_H_
+#ifndef VPX_VP9_COMMON_VP9_RECONINTER_H_
+#define VPX_VP9_COMMON_VP9_RECONINTER_H_
#include "vp9/common/vp9_filter.h"
#include "vp9/common/vp9_onyxc_int.h"
@@ -61,15 +61,15 @@ void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col,
BLOCK_SIZE bsize);
void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
- int dst_stride, const MV *mv_q3,
+ int dst_stride, const MV *src_mv,
const struct scale_factors *sf, int w, int h,
- int do_avg, const InterpKernel *kernel,
+ int ref, const InterpKernel *kernel,
enum mv_precision precision, int x, int y);
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_highbd_build_inter_predictor(
const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride,
- const MV *mv_q3, const struct scale_factors *sf, int w, int h, int do_avg,
+ const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref,
const InterpKernel *kernel, enum mv_precision precision, int x, int y,
int bd);
#endif
@@ -103,4 +103,4 @@ void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_RECONINTER_H_
+#endif // VPX_VP9_COMMON_VP9_RECONINTER_H_
diff --git a/libvpx/vp9/common/vp9_reconintra.h b/libvpx/vp9/common/vp9_reconintra.h
index 78e41c881..426a35ebf 100644
--- a/libvpx/vp9/common/vp9_reconintra.h
+++ b/libvpx/vp9/common/vp9_reconintra.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_RECONINTRA_H_
-#define VP9_COMMON_VP9_RECONINTRA_H_
+#ifndef VPX_VP9_COMMON_VP9_RECONINTRA_H_
+#define VPX_VP9_COMMON_VP9_RECONINTRA_H_
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
@@ -28,4 +28,4 @@ void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, TX_SIZE tx_size,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_RECONINTRA_H_
+#endif // VPX_VP9_COMMON_VP9_RECONINTRA_H_
diff --git a/libvpx/vp9/common/vp9_rtcd_defs.pl b/libvpx/vp9/common/vp9_rtcd_defs.pl
index 22b67ecac..8bb68cfdf 100644
--- a/libvpx/vp9/common/vp9_rtcd_defs.pl
+++ b/libvpx/vp9/common/vp9_rtcd_defs.pl
@@ -62,18 +62,18 @@ add_proto qw/void vp9_iht4x4_16_add/, "const tran_low_t *input, uint8_t *dest, i
add_proto qw/void vp9_iht8x8_64_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";
-add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *output, int pitch, int tx_type";
+add_proto qw/void vp9_iht16x16_256_add/, "const tran_low_t *input, uint8_t *dest, int stride, int tx_type";
if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
# Note that there are more specializations appended when
# CONFIG_VP9_HIGHBITDEPTH is off.
- specialize qw/vp9_iht4x4_16_add sse2/;
- specialize qw/vp9_iht8x8_64_add sse2/;
- specialize qw/vp9_iht16x16_256_add sse2/;
+ specialize qw/vp9_iht4x4_16_add neon sse2 vsx/;
+ specialize qw/vp9_iht8x8_64_add neon sse2 vsx/;
+ specialize qw/vp9_iht16x16_256_add neon sse2 vsx/;
if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") ne "yes") {
# Note that these specializations are appended to the above ones.
- specialize qw/vp9_iht4x4_16_add neon dspr2 msa/;
- specialize qw/vp9_iht8x8_64_add neon dspr2 msa/;
+ specialize qw/vp9_iht4x4_16_add dspr2 msa/;
+ specialize qw/vp9_iht8x8_64_add dspr2 msa/;
specialize qw/vp9_iht16x16_256_add dspr2 msa/;
}
}
@@ -100,7 +100,13 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_iht8x8_64_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
- add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *output, int pitch, int tx_type, int bd";
+ add_proto qw/void vp9_highbd_iht16x16_256_add/, "const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd";
+
+ if (vpx_config("CONFIG_EMULATE_HARDWARE") ne "yes") {
+ specialize qw/vp9_highbd_iht4x4_16_add neon sse4_1/;
+ specialize qw/vp9_highbd_iht8x8_64_add neon sse4_1/;
+ specialize qw/vp9_highbd_iht16x16_256_add neon sse4_1/;
+ }
}
#
@@ -123,10 +129,10 @@ add_proto qw/int64_t vp9_block_error/, "const tran_low_t *coeff, const tran_low_
add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-specialize qw/vp9_quantize_fp neon sse2/, "$ssse3_x86_64";
+specialize qw/vp9_quantize_fp neon sse2 avx2 vsx/, "$ssse3_x86_64";
add_proto qw/void vp9_quantize_fp_32x32/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
-specialize qw/vp9_quantize_fp_32x32 neon/, "$ssse3_x86_64";
+specialize qw/vp9_quantize_fp_32x32 neon vsx/, "$ssse3_x86_64";
add_proto qw/void vp9_fdct8x8_quant/, "const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
@@ -135,7 +141,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_block_error_fp avx2 sse2/;
- specialize qw/vp9_fdct8x8_quant neon ssse3/;
+ specialize qw/vp9_fdct8x8_quant sse2 ssse3 neon/;
add_proto qw/int64_t vp9_highbd_block_error/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd";
specialize qw/vp9_highbd_block_error sse2/;
@@ -199,7 +205,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
add_proto qw/void vp9_highbd_fwht4x4/, "const int16_t *input, tran_low_t *output, int stride";
- add_proto qw/void vp9_highbd_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int filter_weight, uint32_t *accumulator, uint16_t *count";
+ add_proto qw/void vp9_highbd_temporal_filter_apply/, "const uint8_t *frame1, unsigned int stride, const uint8_t *frame2, unsigned int block_width, unsigned int block_height, int strength, int *blk_fw, int use_32x32, uint32_t *accumulator, uint16_t *count";
}
# End vp9_high encoder functions
diff --git a/libvpx/vp9/common/vp9_scale.h b/libvpx/vp9/common/vp9_scale.h
index ada8dbaad..aaafdf867 100644
--- a/libvpx/vp9/common/vp9_scale.h
+++ b/libvpx/vp9/common/vp9_scale.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_SCALE_H_
-#define VP9_COMMON_VP9_SCALE_H_
+#ifndef VPX_VP9_COMMON_VP9_SCALE_H_
+#define VPX_VP9_COMMON_VP9_SCALE_H_
#include "vp9/common/vp9_mv.h"
#include "vpx_dsp/vpx_convolve.h"
@@ -42,7 +42,7 @@ MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf);
#if CONFIG_VP9_HIGHBITDEPTH
void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
int other_h, int this_w, int this_h,
- int use_high);
+ int use_highbd);
#else
void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w,
int other_h, int this_w, int this_h);
@@ -68,4 +68,4 @@ static INLINE int valid_ref_frame_size(int ref_width, int ref_height,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_SCALE_H_
+#endif // VPX_VP9_COMMON_VP9_SCALE_H_
diff --git a/libvpx/vp9/common/vp9_scan.h b/libvpx/vp9/common/vp9_scan.h
index b3520e7dc..72a9a5ec4 100644
--- a/libvpx/vp9/common/vp9_scan.h
+++ b/libvpx/vp9/common/vp9_scan.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_SCAN_H_
-#define VP9_COMMON_VP9_SCAN_H_
+#ifndef VPX_VP9_COMMON_VP9_SCAN_H_
+#define VPX_VP9_COMMON_VP9_SCAN_H_
#include "vpx/vpx_integer.h"
#include "vpx_ports/mem.h"
@@ -55,4 +55,4 @@ static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_SCAN_H_
+#endif // VPX_VP9_COMMON_VP9_SCAN_H_
diff --git a/libvpx/vp9/common/vp9_seg_common.h b/libvpx/vp9/common/vp9_seg_common.h
index b9bf75d58..b63e4f499 100644
--- a/libvpx/vp9/common/vp9_seg_common.h
+++ b/libvpx/vp9/common/vp9_seg_common.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_SEG_COMMON_H_
-#define VP9_COMMON_VP9_SEG_COMMON_H_
+#ifndef VPX_VP9_COMMON_VP9_SEG_COMMON_H_
+#define VPX_VP9_COMMON_VP9_SEG_COMMON_H_
#include "vpx_dsp/prob.h"
@@ -78,4 +78,4 @@ extern const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)];
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_SEG_COMMON_H_
+#endif // VPX_VP9_COMMON_VP9_SEG_COMMON_H_
diff --git a/libvpx/vp9/common/vp9_thread_common.c b/libvpx/vp9/common/vp9_thread_common.c
index 8d44e91f2..b008ed5cf 100644
--- a/libvpx/vp9/common/vp9_thread_common.c
+++ b/libvpx/vp9/common/vp9_thread_common.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <limits.h>
#include "./vpx_config.h"
#include "vpx_dsp/vpx_dsp_common.h"
#include "vpx_mem/vpx_mem.h"
@@ -38,11 +39,11 @@ static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) {
const int nsync = lf_sync->sync_range;
if (r && !(c & (nsync - 1))) {
- pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1];
+ pthread_mutex_t *const mutex = &lf_sync->mutex[r - 1];
mutex_lock(mutex);
while (c > lf_sync->cur_sb_col[r - 1] - nsync) {
- pthread_cond_wait(&lf_sync->cond_[r - 1], mutex);
+ pthread_cond_wait(&lf_sync->cond[r - 1], mutex);
}
pthread_mutex_unlock(mutex);
}
@@ -69,12 +70,12 @@ static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c,
}
if (sig) {
- mutex_lock(&lf_sync->mutex_[r]);
+ mutex_lock(&lf_sync->mutex[r]);
lf_sync->cur_sb_col[r] = cur;
- pthread_cond_signal(&lf_sync->cond_[r]);
- pthread_mutex_unlock(&lf_sync->mutex_[r]);
+ pthread_cond_signal(&lf_sync->cond[r]);
+ pthread_mutex_unlock(&lf_sync->mutex[r]);
}
#else
(void)lf_sync;
@@ -91,6 +92,7 @@ static INLINE void thread_loop_filter_rows(
int y_only, VP9LfSync *const lf_sync) {
const int num_planes = y_only ? 1 : MAX_MB_PLANE;
const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
+ const int num_active_workers = VPXMIN(lf_sync->num_workers, lf_sync->rows);
int mi_row, mi_col;
enum lf_path path;
if (y_only)
@@ -103,7 +105,7 @@ static INLINE void thread_loop_filter_rows(
path = LF_PATH_SLOW;
for (mi_row = start; mi_row < stop;
- mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
+ mi_row += num_active_workers * MI_BLOCK_SIZE) {
MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;
LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0);
@@ -157,10 +159,12 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm,
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
// Number of superblock rows and cols
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
- // Decoder may allocate more threads than number of tiles based on user's
- // input.
- const int tile_cols = 1 << cm->log2_tile_cols;
- const int num_workers = VPXMIN(nworkers, tile_cols);
+ const int num_tile_cols = 1 << cm->log2_tile_cols;
+ // Limit the number of workers to prevent changes in frame dimensions from
+ // causing incorrect sync calculations when sb_rows < threads/tile_cols.
+ // Further restrict them by the number of tile columns should the user
+ // request more as this implementation doesn't scale well beyond that.
+ const int num_workers = VPXMIN(nworkers, VPXMIN(num_tile_cols, sb_rows));
int i;
if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
@@ -231,6 +235,28 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm,
workers, num_workers, lf_sync);
}
+void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level,
+ int num_workers) {
+ const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
+
+ if (!frame_filter_level) return;
+
+ if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
+ num_workers > lf_sync->num_workers) {
+ vp9_loop_filter_dealloc(lf_sync);
+ vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
+ }
+
+ // Initialize cur_sb_col to -1 for all SB rows.
+ memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);
+
+ lf_sync->corrupted = 0;
+
+ memset(lf_sync->num_tiles_done, 0,
+ sizeof(*lf_sync->num_tiles_done) * sb_rows);
+ cm->lf_row = 0;
+}
+
// Set up nsync by width.
static INLINE int get_sync_range(int width) {
// nsync numbers are picked by testing. For example, for 4k
@@ -253,19 +279,38 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
{
int i;
- CHECK_MEM_ERROR(cm, lf_sync->mutex_,
- vpx_malloc(sizeof(*lf_sync->mutex_) * rows));
- if (lf_sync->mutex_) {
+ CHECK_MEM_ERROR(cm, lf_sync->mutex,
+ vpx_malloc(sizeof(*lf_sync->mutex) * rows));
+ if (lf_sync->mutex) {
for (i = 0; i < rows; ++i) {
- pthread_mutex_init(&lf_sync->mutex_[i], NULL);
+ pthread_mutex_init(&lf_sync->mutex[i], NULL);
}
}
- CHECK_MEM_ERROR(cm, lf_sync->cond_,
- vpx_malloc(sizeof(*lf_sync->cond_) * rows));
- if (lf_sync->cond_) {
+ CHECK_MEM_ERROR(cm, lf_sync->cond,
+ vpx_malloc(sizeof(*lf_sync->cond) * rows));
+ if (lf_sync->cond) {
for (i = 0; i < rows; ++i) {
- pthread_cond_init(&lf_sync->cond_[i], NULL);
+ pthread_cond_init(&lf_sync->cond[i], NULL);
+ }
+ }
+ pthread_mutex_init(&lf_sync->lf_mutex, NULL);
+
+ CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex,
+ vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows));
+ if (lf_sync->recon_done_mutex) {
+ int i;
+ for (i = 0; i < rows; ++i) {
+ pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL);
+ }
+ }
+
+ CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond,
+ vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows));
+ if (lf_sync->recon_done_cond) {
+ int i;
+ for (i = 0; i < rows; ++i) {
+ pthread_cond_init(&lf_sync->recon_done_cond[i], NULL);
}
}
}
@@ -278,6 +323,11 @@ void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows,
CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col,
vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows));
+ CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done,
+ vpx_malloc(sizeof(*lf_sync->num_tiles_done) *
+ mi_cols_aligned_to_sb(cm->mi_rows) >>
+ MI_BLOCK_SIZE_LOG2));
+
// Set up nsync.
lf_sync->sync_range = get_sync_range(width);
}
@@ -288,27 +338,143 @@ void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) {
#if CONFIG_MULTITHREAD
int i;
- if (lf_sync->mutex_ != NULL) {
+ if (lf_sync->mutex != NULL) {
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->mutex[i]);
+ }
+ vpx_free(lf_sync->mutex);
+ }
+ if (lf_sync->cond != NULL) {
for (i = 0; i < lf_sync->rows; ++i) {
- pthread_mutex_destroy(&lf_sync->mutex_[i]);
+ pthread_cond_destroy(&lf_sync->cond[i]);
}
- vpx_free(lf_sync->mutex_);
+ vpx_free(lf_sync->cond);
}
- if (lf_sync->cond_ != NULL) {
+ if (lf_sync->recon_done_mutex != NULL) {
+ int i;
+ for (i = 0; i < lf_sync->rows; ++i) {
+ pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]);
+ }
+ vpx_free(lf_sync->recon_done_mutex);
+ }
+
+ pthread_mutex_destroy(&lf_sync->lf_mutex);
+ if (lf_sync->recon_done_cond != NULL) {
+ int i;
for (i = 0; i < lf_sync->rows; ++i) {
- pthread_cond_destroy(&lf_sync->cond_[i]);
+ pthread_cond_destroy(&lf_sync->recon_done_cond[i]);
}
- vpx_free(lf_sync->cond_);
+ vpx_free(lf_sync->recon_done_cond);
}
#endif // CONFIG_MULTITHREAD
+
vpx_free(lf_sync->lfdata);
vpx_free(lf_sync->cur_sb_col);
+ vpx_free(lf_sync->num_tiles_done);
// clear the structure as the source of this call may be a resize in which
// case this call will be followed by an _alloc() which may fail.
vp9_zero(*lf_sync);
}
}
+static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) {
+ int return_val = -1;
+ int cur_row;
+ const int max_rows = cm->mi_rows;
+
+#if CONFIG_MULTITHREAD
+ const int tile_cols = 1 << cm->log2_tile_cols;
+
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ if (cm->lf_row < max_rows) {
+ cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
+ return_val = cm->lf_row;
+ cm->lf_row += MI_BLOCK_SIZE;
+ if (cm->lf_row < max_rows) {
+ /* If this is not the last row, make sure the next row is also decoded.
+ * This is because the intra predict has to happen before loop filter */
+ cur_row += 1;
+ }
+ }
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+
+ if (return_val == -1) return return_val;
+
+ pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]);
+ if (lf_sync->num_tiles_done[cur_row] < tile_cols) {
+ pthread_cond_wait(&lf_sync->recon_done_cond[cur_row],
+ &lf_sync->recon_done_mutex[cur_row]);
+ }
+ pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]);
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ if (lf_sync->corrupted) {
+ int row = return_val >> MI_BLOCK_SIZE_LOG2;
+ pthread_mutex_lock(&lf_sync->mutex[row]);
+ lf_sync->cur_sb_col[row] = INT_MAX;
+ pthread_cond_signal(&lf_sync->cond[row]);
+ pthread_mutex_unlock(&lf_sync->mutex[row]);
+ return_val = -1;
+ }
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+#else
+ (void)lf_sync;
+ if (cm->lf_row < max_rows) {
+ cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2;
+ return_val = cm->lf_row;
+ cm->lf_row += MI_BLOCK_SIZE;
+ if (cm->lf_row < max_rows) {
+ /* If this is not the last row, make sure the next row is also decoded.
+ * This is because the intra predict has to happen before loop filter */
+ cur_row += 1;
+ }
+ }
+#endif // CONFIG_MULTITHREAD
+
+ return return_val;
+}
+
+void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) {
+ int mi_row;
+ VP9_COMMON *cm = lf_data->cm;
+
+ while ((mi_row = get_next_row(cm, lf_sync)) != -1 && mi_row < cm->mi_rows) {
+ lf_data->start = mi_row;
+ lf_data->stop = mi_row + MI_BLOCK_SIZE;
+
+ thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes,
+ lf_data->start, lf_data->stop, lf_data->y_only,
+ lf_sync);
+ }
+}
+
+void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
+ int corrupted) {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(&lf_sync->lf_mutex);
+ lf_sync->corrupted |= corrupted;
+ pthread_mutex_unlock(&lf_sync->lf_mutex);
+ pthread_mutex_lock(&lf_sync->recon_done_mutex[row]);
+ lf_sync->num_tiles_done[row] += 1;
+ if (num_tiles == lf_sync->num_tiles_done[row]) {
+ if (is_last_row) {
+ /* The last 2 rows wait on the last row to be done.
+ * So, we have to broadcast the signal in this case.
+ */
+ pthread_cond_broadcast(&lf_sync->recon_done_cond[row]);
+ } else {
+ pthread_cond_signal(&lf_sync->recon_done_cond[row]);
+ }
+ }
+ pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]);
+#else
+ (void)lf_sync;
+ (void)num_tiles;
+ (void)row;
+ (void)is_last_row;
+ (void)corrupted;
+#endif // CONFIG_MULTITHREAD
+}
+
// Accumulate frame counts.
void vp9_accumulate_frame_counts(FRAME_COUNTS *accum,
const FRAME_COUNTS *counts, int is_dec) {
diff --git a/libvpx/vp9/common/vp9_thread_common.h b/libvpx/vp9/common/vp9_thread_common.h
index 0f7c3ff74..b97e9ee13 100644
--- a/libvpx/vp9/common/vp9_thread_common.h
+++ b/libvpx/vp9/common/vp9_thread_common.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_THREAD_COMMON_H_
-#define VP9_COMMON_VP9_THREAD_COMMON_H_
+#ifndef VPX_VP9_COMMON_VP9_THREAD_COMMON_H_
+#define VPX_VP9_COMMON_VP9_THREAD_COMMON_H_
#include "./vpx_config.h"
#include "vp9/common/vp9_loopfilter.h"
#include "vpx_util/vpx_thread.h"
@@ -24,8 +24,8 @@ struct FRAME_COUNTS;
// Loopfilter row synchronization
typedef struct VP9LfSyncData {
#if CONFIG_MULTITHREAD
- pthread_mutex_t *mutex_;
- pthread_cond_t *cond_;
+ pthread_mutex_t *mutex;
+ pthread_cond_t *cond;
#endif
// Allocate memory to store the loop-filtered superblock index in each row.
int *cur_sb_col;
@@ -37,6 +37,14 @@ typedef struct VP9LfSyncData {
// Row-based parallel loopfilter data
LFWorkerData *lfdata;
int num_workers;
+
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t lf_mutex;
+ pthread_mutex_t *recon_done_mutex;
+ pthread_cond_t *recon_done_cond;
+#endif
+ int *num_tiles_done;
+ int corrupted;
} VP9LfSync;
// Allocate memory for loopfilter row synchronization.
@@ -53,6 +61,17 @@ void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm,
int partial_frame, VPxWorker *workers,
int num_workers, VP9LfSync *lf_sync);
+// Multi-threaded loopfilter initialisations
+void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm,
+ int frame_filter_level, int num_workers);
+
+void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync);
+
+void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row,
+ int corrupted);
+
+void vp9_set_last_decoded_row(struct VP9Common *cm, int tile_col, int mi_row);
+
void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
const struct FRAME_COUNTS *counts, int is_dec);
@@ -60,4 +79,4 @@ void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_THREAD_COMMON_H_
+#endif // VPX_VP9_COMMON_VP9_THREAD_COMMON_H_
diff --git a/libvpx/vp9/common/vp9_tile_common.h b/libvpx/vp9/common/vp9_tile_common.h
index 1b11c2680..4ccf0a3d5 100644
--- a/libvpx/vp9/common/vp9_tile_common.h
+++ b/libvpx/vp9/common/vp9_tile_common.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_COMMON_VP9_TILE_COMMON_H_
-#define VP9_COMMON_VP9_TILE_COMMON_H_
+#ifndef VPX_VP9_COMMON_VP9_TILE_COMMON_H_
+#define VPX_VP9_COMMON_VP9_TILE_COMMON_H_
#ifdef __cplusplus
extern "C" {
@@ -37,4 +37,4 @@ void vp9_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols,
} // extern "C"
#endif
-#endif // VP9_COMMON_VP9_TILE_COMMON_H_
+#endif // VPX_VP9_COMMON_VP9_TILE_COMMON_H_
diff --git a/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c b/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c
new file mode 100644
index 000000000..57b79a732
--- /dev/null
+++ b/libvpx/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
+#include "vpx_dsp/x86/inv_txfm_sse2.h"
+#include "vpx_dsp/x86/transpose_sse2.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
+
+static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
+ const int c,
+ __m128i *const s) {
+ const __m128i pair_c = pair_set_epi32(4 * c, 0);
+ __m128i x[2];
+
+ extend_64bit(in, x);
+ s[0] = _mm_mul_epi32(pair_c, x[0]);
+ s[1] = _mm_mul_epi32(pair_c, x[1]);
+}
+
+static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
+ const __m128i in1,
+ const int c0, const int c1,
+ __m128i *const s0,
+ __m128i *const s1) {
+ const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
+ const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
+ __m128i t00[2], t01[2], t10[2], t11[2];
+ __m128i x0[2], x1[2];
+
+ extend_64bit(in0, x0);
+ extend_64bit(in1, x1);
+ t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
+ t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
+ t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
+ t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
+ t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
+ t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
+ t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
+ t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
+
+ s0[0] = _mm_add_epi64(t00[0], t11[0]);
+ s0[1] = _mm_add_epi64(t00[1], t11[1]);
+ s1[0] = _mm_sub_epi64(t10[0], t01[0]);
+ s1[1] = _mm_sub_epi64(t10[1], t01[1]);
+}
+
+static void highbd_iadst16_4col_sse4_1(__m128i *const io /*io[16]*/) {
+ __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2], s8[2], s9[2],
+ s10[2], s11[2], s12[2], s13[2], s14[2], s15[2];
+ __m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2], x8[2], x9[2],
+ x10[2], x11[2], x12[2], x13[2], x14[2], x15[2];
+
+ // stage 1
+ highbd_iadst_butterfly_sse4_1(io[15], io[0], cospi_1_64, cospi_31_64, s0, s1);
+ highbd_iadst_butterfly_sse4_1(io[13], io[2], cospi_5_64, cospi_27_64, s2, s3);
+ highbd_iadst_butterfly_sse4_1(io[11], io[4], cospi_9_64, cospi_23_64, s4, s5);
+ highbd_iadst_butterfly_sse4_1(io[9], io[6], cospi_13_64, cospi_19_64, s6, s7);
+ highbd_iadst_butterfly_sse4_1(io[7], io[8], cospi_17_64, cospi_15_64, s8, s9);
+ highbd_iadst_butterfly_sse4_1(io[5], io[10], cospi_21_64, cospi_11_64, s10,
+ s11);
+ highbd_iadst_butterfly_sse4_1(io[3], io[12], cospi_25_64, cospi_7_64, s12,
+ s13);
+ highbd_iadst_butterfly_sse4_1(io[1], io[14], cospi_29_64, cospi_3_64, s14,
+ s15);
+
+ x0[0] = _mm_add_epi64(s0[0], s8[0]);
+ x0[1] = _mm_add_epi64(s0[1], s8[1]);
+ x1[0] = _mm_add_epi64(s1[0], s9[0]);
+ x1[1] = _mm_add_epi64(s1[1], s9[1]);
+ x2[0] = _mm_add_epi64(s2[0], s10[0]);
+ x2[1] = _mm_add_epi64(s2[1], s10[1]);
+ x3[0] = _mm_add_epi64(s3[0], s11[0]);
+ x3[1] = _mm_add_epi64(s3[1], s11[1]);
+ x4[0] = _mm_add_epi64(s4[0], s12[0]);
+ x4[1] = _mm_add_epi64(s4[1], s12[1]);
+ x5[0] = _mm_add_epi64(s5[0], s13[0]);
+ x5[1] = _mm_add_epi64(s5[1], s13[1]);
+ x6[0] = _mm_add_epi64(s6[0], s14[0]);
+ x6[1] = _mm_add_epi64(s6[1], s14[1]);
+ x7[0] = _mm_add_epi64(s7[0], s15[0]);
+ x7[1] = _mm_add_epi64(s7[1], s15[1]);
+ x8[0] = _mm_sub_epi64(s0[0], s8[0]);
+ x8[1] = _mm_sub_epi64(s0[1], s8[1]);
+ x9[0] = _mm_sub_epi64(s1[0], s9[0]);
+ x9[1] = _mm_sub_epi64(s1[1], s9[1]);
+ x10[0] = _mm_sub_epi64(s2[0], s10[0]);
+ x10[1] = _mm_sub_epi64(s2[1], s10[1]);
+ x11[0] = _mm_sub_epi64(s3[0], s11[0]);
+ x11[1] = _mm_sub_epi64(s3[1], s11[1]);
+ x12[0] = _mm_sub_epi64(s4[0], s12[0]);
+ x12[1] = _mm_sub_epi64(s4[1], s12[1]);
+ x13[0] = _mm_sub_epi64(s5[0], s13[0]);
+ x13[1] = _mm_sub_epi64(s5[1], s13[1]);
+ x14[0] = _mm_sub_epi64(s6[0], s14[0]);
+ x14[1] = _mm_sub_epi64(s6[1], s14[1]);
+ x15[0] = _mm_sub_epi64(s7[0], s15[0]);
+ x15[1] = _mm_sub_epi64(s7[1], s15[1]);
+
+ x0[0] = dct_const_round_shift_64bit(x0[0]);
+ x0[1] = dct_const_round_shift_64bit(x0[1]);
+ x1[0] = dct_const_round_shift_64bit(x1[0]);
+ x1[1] = dct_const_round_shift_64bit(x1[1]);
+ x2[0] = dct_const_round_shift_64bit(x2[0]);
+ x2[1] = dct_const_round_shift_64bit(x2[1]);
+ x3[0] = dct_const_round_shift_64bit(x3[0]);
+ x3[1] = dct_const_round_shift_64bit(x3[1]);
+ x4[0] = dct_const_round_shift_64bit(x4[0]);
+ x4[1] = dct_const_round_shift_64bit(x4[1]);
+ x5[0] = dct_const_round_shift_64bit(x5[0]);
+ x5[1] = dct_const_round_shift_64bit(x5[1]);
+ x6[0] = dct_const_round_shift_64bit(x6[0]);
+ x6[1] = dct_const_round_shift_64bit(x6[1]);
+ x7[0] = dct_const_round_shift_64bit(x7[0]);
+ x7[1] = dct_const_round_shift_64bit(x7[1]);
+ x8[0] = dct_const_round_shift_64bit(x8[0]);
+ x8[1] = dct_const_round_shift_64bit(x8[1]);
+ x9[0] = dct_const_round_shift_64bit(x9[0]);
+ x9[1] = dct_const_round_shift_64bit(x9[1]);
+ x10[0] = dct_const_round_shift_64bit(x10[0]);
+ x10[1] = dct_const_round_shift_64bit(x10[1]);
+ x11[0] = dct_const_round_shift_64bit(x11[0]);
+ x11[1] = dct_const_round_shift_64bit(x11[1]);
+ x12[0] = dct_const_round_shift_64bit(x12[0]);
+ x12[1] = dct_const_round_shift_64bit(x12[1]);
+ x13[0] = dct_const_round_shift_64bit(x13[0]);
+ x13[1] = dct_const_round_shift_64bit(x13[1]);
+ x14[0] = dct_const_round_shift_64bit(x14[0]);
+ x14[1] = dct_const_round_shift_64bit(x14[1]);
+ x15[0] = dct_const_round_shift_64bit(x15[0]);
+ x15[1] = dct_const_round_shift_64bit(x15[1]);
+ x0[0] = pack_4(x0[0], x0[1]);
+ x1[0] = pack_4(x1[0], x1[1]);
+ x2[0] = pack_4(x2[0], x2[1]);
+ x3[0] = pack_4(x3[0], x3[1]);
+ x4[0] = pack_4(x4[0], x4[1]);
+ x5[0] = pack_4(x5[0], x5[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+ x8[0] = pack_4(x8[0], x8[1]);
+ x9[0] = pack_4(x9[0], x9[1]);
+ x10[0] = pack_4(x10[0], x10[1]);
+ x11[0] = pack_4(x11[0], x11[1]);
+ x12[0] = pack_4(x12[0], x12[1]);
+ x13[0] = pack_4(x13[0], x13[1]);
+ x14[0] = pack_4(x14[0], x14[1]);
+ x15[0] = pack_4(x15[0], x15[1]);
+
+ // stage 2
+ s0[0] = x0[0];
+ s1[0] = x1[0];
+ s2[0] = x2[0];
+ s3[0] = x3[0];
+ s4[0] = x4[0];
+ s5[0] = x5[0];
+ s6[0] = x6[0];
+ s7[0] = x7[0];
+ x0[0] = _mm_add_epi32(s0[0], s4[0]);
+ x1[0] = _mm_add_epi32(s1[0], s5[0]);
+ x2[0] = _mm_add_epi32(s2[0], s6[0]);
+ x3[0] = _mm_add_epi32(s3[0], s7[0]);
+ x4[0] = _mm_sub_epi32(s0[0], s4[0]);
+ x5[0] = _mm_sub_epi32(s1[0], s5[0]);
+ x6[0] = _mm_sub_epi32(s2[0], s6[0]);
+ x7[0] = _mm_sub_epi32(s3[0], s7[0]);
+
+ highbd_iadst_butterfly_sse4_1(x8[0], x9[0], cospi_4_64, cospi_28_64, s8, s9);
+ highbd_iadst_butterfly_sse4_1(x10[0], x11[0], cospi_20_64, cospi_12_64, s10,
+ s11);
+ highbd_iadst_butterfly_sse4_1(x13[0], x12[0], cospi_28_64, cospi_4_64, s13,
+ s12);
+ highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_12_64, cospi_20_64, s15,
+ s14);
+
+ x8[0] = _mm_add_epi64(s8[0], s12[0]);
+ x8[1] = _mm_add_epi64(s8[1], s12[1]);
+ x9[0] = _mm_add_epi64(s9[0], s13[0]);
+ x9[1] = _mm_add_epi64(s9[1], s13[1]);
+ x10[0] = _mm_add_epi64(s10[0], s14[0]);
+ x10[1] = _mm_add_epi64(s10[1], s14[1]);
+ x11[0] = _mm_add_epi64(s11[0], s15[0]);
+ x11[1] = _mm_add_epi64(s11[1], s15[1]);
+ x12[0] = _mm_sub_epi64(s8[0], s12[0]);
+ x12[1] = _mm_sub_epi64(s8[1], s12[1]);
+ x13[0] = _mm_sub_epi64(s9[0], s13[0]);
+ x13[1] = _mm_sub_epi64(s9[1], s13[1]);
+ x14[0] = _mm_sub_epi64(s10[0], s14[0]);
+ x14[1] = _mm_sub_epi64(s10[1], s14[1]);
+ x15[0] = _mm_sub_epi64(s11[0], s15[0]);
+ x15[1] = _mm_sub_epi64(s11[1], s15[1]);
+ x8[0] = dct_const_round_shift_64bit(x8[0]);
+ x8[1] = dct_const_round_shift_64bit(x8[1]);
+ x9[0] = dct_const_round_shift_64bit(x9[0]);
+ x9[1] = dct_const_round_shift_64bit(x9[1]);
+ x10[0] = dct_const_round_shift_64bit(x10[0]);
+ x10[1] = dct_const_round_shift_64bit(x10[1]);
+ x11[0] = dct_const_round_shift_64bit(x11[0]);
+ x11[1] = dct_const_round_shift_64bit(x11[1]);
+ x12[0] = dct_const_round_shift_64bit(x12[0]);
+ x12[1] = dct_const_round_shift_64bit(x12[1]);
+ x13[0] = dct_const_round_shift_64bit(x13[0]);
+ x13[1] = dct_const_round_shift_64bit(x13[1]);
+ x14[0] = dct_const_round_shift_64bit(x14[0]);
+ x14[1] = dct_const_round_shift_64bit(x14[1]);
+ x15[0] = dct_const_round_shift_64bit(x15[0]);
+ x15[1] = dct_const_round_shift_64bit(x15[1]);
+ x8[0] = pack_4(x8[0], x8[1]);
+ x9[0] = pack_4(x9[0], x9[1]);
+ x10[0] = pack_4(x10[0], x10[1]);
+ x11[0] = pack_4(x11[0], x11[1]);
+ x12[0] = pack_4(x12[0], x12[1]);
+ x13[0] = pack_4(x13[0], x13[1]);
+ x14[0] = pack_4(x14[0], x14[1]);
+ x15[0] = pack_4(x15[0], x15[1]);
+
+ // stage 3
+ s0[0] = x0[0];
+ s1[0] = x1[0];
+ s2[0] = x2[0];
+ s3[0] = x3[0];
+ highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
+ highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
+ s8[0] = x8[0];
+ s9[0] = x9[0];
+ s10[0] = x10[0];
+ s11[0] = x11[0];
+ highbd_iadst_butterfly_sse4_1(x12[0], x13[0], cospi_8_64, cospi_24_64, s12,
+ s13);
+ highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_24_64, cospi_8_64, s15,
+ s14);
+
+ x0[0] = _mm_add_epi32(s0[0], s2[0]);
+ x1[0] = _mm_add_epi32(s1[0], s3[0]);
+ x2[0] = _mm_sub_epi32(s0[0], s2[0]);
+ x3[0] = _mm_sub_epi32(s1[0], s3[0]);
+ x4[0] = _mm_add_epi64(s4[0], s6[0]);
+ x4[1] = _mm_add_epi64(s4[1], s6[1]);
+ x5[0] = _mm_add_epi64(s5[0], s7[0]);
+ x5[1] = _mm_add_epi64(s5[1], s7[1]);
+ x6[0] = _mm_sub_epi64(s4[0], s6[0]);
+ x6[1] = _mm_sub_epi64(s4[1], s6[1]);
+ x7[0] = _mm_sub_epi64(s5[0], s7[0]);
+ x7[1] = _mm_sub_epi64(s5[1], s7[1]);
+ x4[0] = dct_const_round_shift_64bit(x4[0]);
+ x4[1] = dct_const_round_shift_64bit(x4[1]);
+ x5[0] = dct_const_round_shift_64bit(x5[0]);
+ x5[1] = dct_const_round_shift_64bit(x5[1]);
+ x6[0] = dct_const_round_shift_64bit(x6[0]);
+ x6[1] = dct_const_round_shift_64bit(x6[1]);
+ x7[0] = dct_const_round_shift_64bit(x7[0]);
+ x7[1] = dct_const_round_shift_64bit(x7[1]);
+ x4[0] = pack_4(x4[0], x4[1]);
+ x5[0] = pack_4(x5[0], x5[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+ x8[0] = _mm_add_epi32(s8[0], s10[0]);
+ x9[0] = _mm_add_epi32(s9[0], s11[0]);
+ x10[0] = _mm_sub_epi32(s8[0], s10[0]);
+ x11[0] = _mm_sub_epi32(s9[0], s11[0]);
+ x12[0] = _mm_add_epi64(s12[0], s14[0]);
+ x12[1] = _mm_add_epi64(s12[1], s14[1]);
+ x13[0] = _mm_add_epi64(s13[0], s15[0]);
+ x13[1] = _mm_add_epi64(s13[1], s15[1]);
+ x14[0] = _mm_sub_epi64(s12[0], s14[0]);
+ x14[1] = _mm_sub_epi64(s12[1], s14[1]);
+ x15[0] = _mm_sub_epi64(s13[0], s15[0]);
+ x15[1] = _mm_sub_epi64(s13[1], s15[1]);
+ x12[0] = dct_const_round_shift_64bit(x12[0]);
+ x12[1] = dct_const_round_shift_64bit(x12[1]);
+ x13[0] = dct_const_round_shift_64bit(x13[0]);
+ x13[1] = dct_const_round_shift_64bit(x13[1]);
+ x14[0] = dct_const_round_shift_64bit(x14[0]);
+ x14[1] = dct_const_round_shift_64bit(x14[1]);
+ x15[0] = dct_const_round_shift_64bit(x15[0]);
+ x15[1] = dct_const_round_shift_64bit(x15[1]);
+ x12[0] = pack_4(x12[0], x12[1]);
+ x13[0] = pack_4(x13[0], x13[1]);
+ x14[0] = pack_4(x14[0], x14[1]);
+ x15[0] = pack_4(x15[0], x15[1]);
+
+ // stage 4
+ s2[0] = _mm_add_epi32(x2[0], x3[0]);
+ s3[0] = _mm_sub_epi32(x2[0], x3[0]);
+ s6[0] = _mm_add_epi32(x7[0], x6[0]);
+ s7[0] = _mm_sub_epi32(x7[0], x6[0]);
+ s10[0] = _mm_add_epi32(x11[0], x10[0]);
+ s11[0] = _mm_sub_epi32(x11[0], x10[0]);
+ s14[0] = _mm_add_epi32(x14[0], x15[0]);
+ s15[0] = _mm_sub_epi32(x14[0], x15[0]);
+ highbd_iadst_half_butterfly_sse4_1(s2[0], -cospi_16_64, s2);
+ highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
+ highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
+ highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
+ highbd_iadst_half_butterfly_sse4_1(s10[0], cospi_16_64, s10);
+ highbd_iadst_half_butterfly_sse4_1(s11[0], cospi_16_64, s11);
+ highbd_iadst_half_butterfly_sse4_1(s14[0], -cospi_16_64, s14);
+ highbd_iadst_half_butterfly_sse4_1(s15[0], cospi_16_64, s15);
+
+ x2[0] = dct_const_round_shift_64bit(s2[0]);
+ x2[1] = dct_const_round_shift_64bit(s2[1]);
+ x3[0] = dct_const_round_shift_64bit(s3[0]);
+ x3[1] = dct_const_round_shift_64bit(s3[1]);
+ x6[0] = dct_const_round_shift_64bit(s6[0]);
+ x6[1] = dct_const_round_shift_64bit(s6[1]);
+ x7[0] = dct_const_round_shift_64bit(s7[0]);
+ x7[1] = dct_const_round_shift_64bit(s7[1]);
+ x10[0] = dct_const_round_shift_64bit(s10[0]);
+ x10[1] = dct_const_round_shift_64bit(s10[1]);
+ x11[0] = dct_const_round_shift_64bit(s11[0]);
+ x11[1] = dct_const_round_shift_64bit(s11[1]);
+ x14[0] = dct_const_round_shift_64bit(s14[0]);
+ x14[1] = dct_const_round_shift_64bit(s14[1]);
+ x15[0] = dct_const_round_shift_64bit(s15[0]);
+ x15[1] = dct_const_round_shift_64bit(s15[1]);
+ x2[0] = pack_4(x2[0], x2[1]);
+ x3[0] = pack_4(x3[0], x3[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+ x10[0] = pack_4(x10[0], x10[1]);
+ x11[0] = pack_4(x11[0], x11[1]);
+ x14[0] = pack_4(x14[0], x14[1]);
+ x15[0] = pack_4(x15[0], x15[1]);
+
+ io[0] = x0[0];
+ io[1] = _mm_sub_epi32(_mm_setzero_si128(), x8[0]);
+ io[2] = x12[0];
+ io[3] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
+ io[4] = x6[0];
+ io[5] = x14[0];
+ io[6] = x10[0];
+ io[7] = x2[0];
+ io[8] = x3[0];
+ io[9] = x11[0];
+ io[10] = x15[0];
+ io[11] = x7[0];
+ io[12] = x5[0];
+ io[13] = _mm_sub_epi32(_mm_setzero_si128(), x13[0]);
+ io[14] = x9[0];
+ io[15] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
+}
+
+void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t *input, uint16_t *dest,
+ int stride, int tx_type, int bd) {
+ int i;
+ __m128i out[16], *in;
+
+ if (bd == 8) {
+ __m128i l[16], r[16];
+
+ in = l;
+ for (i = 0; i < 2; i++) {
+ highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]);
+ highbd_load_pack_transpose_32bit_8x8(&input[8], 16, &in[8]);
+ if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+ idct16_8col(in, in);
+ } else {
+ vpx_iadst16_8col_sse2(in);
+ }
+ in = r;
+ input += 128;
+ }
+
+ for (i = 0; i < 16; i += 8) {
+ int j;
+ transpose_16bit_8x8(l + i, out);
+ transpose_16bit_8x8(r + i, out + 8);
+ if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+ idct16_8col(out, out);
+ } else {
+ vpx_iadst16_8col_sse2(out);
+ }
+
+ for (j = 0; j < 16; ++j) {
+ highbd_write_buffer_8(dest + j * stride, out[j], bd);
+ }
+ dest += 8;
+ }
+ } else {
+ __m128i all[4][16];
+
+ for (i = 0; i < 4; i++) {
+ in = all[i];
+ highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]);
+ highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]);
+ if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+ vpx_highbd_idct16_4col_sse4_1(in);
+ } else {
+ highbd_iadst16_4col_sse4_1(in);
+ }
+ input += 4 * 16;
+ }
+
+ for (i = 0; i < 16; i += 4) {
+ int j;
+ transpose_32bit_4x4(all[0] + i, out + 0);
+ transpose_32bit_4x4(all[1] + i, out + 4);
+ transpose_32bit_4x4(all[2] + i, out + 8);
+ transpose_32bit_4x4(all[3] + i, out + 12);
+ if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+ vpx_highbd_idct16_4col_sse4_1(out);
+ } else {
+ highbd_iadst16_4col_sse4_1(out);
+ }
+
+ for (j = 0; j < 16; ++j) {
+ highbd_write_buffer_4(dest + j * stride, out[j], bd);
+ }
+ dest += 4;
+ }
+ }
+}
diff --git a/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c b/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c
new file mode 100644
index 000000000..af158536f
--- /dev/null
+++ b/libvpx/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
+#include "vpx_dsp/x86/inv_txfm_sse2.h"
+#include "vpx_dsp/x86/transpose_sse2.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
+
+static INLINE void highbd_iadst4_sse4_1(__m128i *const io) {
+ const __m128i pair_c1 = pair_set_epi32(4 * sinpi_1_9, 0);
+ const __m128i pair_c2 = pair_set_epi32(4 * sinpi_2_9, 0);
+ const __m128i pair_c3 = pair_set_epi32(4 * sinpi_3_9, 0);
+ const __m128i pair_c4 = pair_set_epi32(4 * sinpi_4_9, 0);
+ __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], t0[2], t1[2], t2[2];
+ __m128i temp[2];
+
+ transpose_32bit_4x4(io, io);
+
+ extend_64bit(io[0], temp);
+ s0[0] = _mm_mul_epi32(pair_c1, temp[0]);
+ s0[1] = _mm_mul_epi32(pair_c1, temp[1]);
+ s1[0] = _mm_mul_epi32(pair_c2, temp[0]);
+ s1[1] = _mm_mul_epi32(pair_c2, temp[1]);
+
+ extend_64bit(io[1], temp);
+ s2[0] = _mm_mul_epi32(pair_c3, temp[0]);
+ s2[1] = _mm_mul_epi32(pair_c3, temp[1]);
+
+ extend_64bit(io[2], temp);
+ s3[0] = _mm_mul_epi32(pair_c4, temp[0]);
+ s3[1] = _mm_mul_epi32(pair_c4, temp[1]);
+ s4[0] = _mm_mul_epi32(pair_c1, temp[0]);
+ s4[1] = _mm_mul_epi32(pair_c1, temp[1]);
+
+ extend_64bit(io[3], temp);
+ s5[0] = _mm_mul_epi32(pair_c2, temp[0]);
+ s5[1] = _mm_mul_epi32(pair_c2, temp[1]);
+ s6[0] = _mm_mul_epi32(pair_c4, temp[0]);
+ s6[1] = _mm_mul_epi32(pair_c4, temp[1]);
+
+ t0[0] = _mm_add_epi64(s0[0], s3[0]);
+ t0[1] = _mm_add_epi64(s0[1], s3[1]);
+ t0[0] = _mm_add_epi64(t0[0], s5[0]);
+ t0[1] = _mm_add_epi64(t0[1], s5[1]);
+ t1[0] = _mm_sub_epi64(s1[0], s4[0]);
+ t1[1] = _mm_sub_epi64(s1[1], s4[1]);
+ t1[0] = _mm_sub_epi64(t1[0], s6[0]);
+ t1[1] = _mm_sub_epi64(t1[1], s6[1]);
+ temp[0] = _mm_sub_epi32(io[0], io[2]);
+ temp[0] = _mm_add_epi32(temp[0], io[3]);
+ extend_64bit(temp[0], temp);
+ t2[0] = _mm_mul_epi32(pair_c3, temp[0]);
+ t2[1] = _mm_mul_epi32(pair_c3, temp[1]);
+
+ s0[0] = _mm_add_epi64(t0[0], s2[0]);
+ s0[1] = _mm_add_epi64(t0[1], s2[1]);
+ s1[0] = _mm_add_epi64(t1[0], s2[0]);
+ s1[1] = _mm_add_epi64(t1[1], s2[1]);
+ s3[0] = _mm_add_epi64(t0[0], t1[0]);
+ s3[1] = _mm_add_epi64(t0[1], t1[1]);
+ s3[0] = _mm_sub_epi64(s3[0], s2[0]);
+ s3[1] = _mm_sub_epi64(s3[1], s2[1]);
+
+ s0[0] = dct_const_round_shift_64bit(s0[0]);
+ s0[1] = dct_const_round_shift_64bit(s0[1]);
+ s1[0] = dct_const_round_shift_64bit(s1[0]);
+ s1[1] = dct_const_round_shift_64bit(s1[1]);
+ s2[0] = dct_const_round_shift_64bit(t2[0]);
+ s2[1] = dct_const_round_shift_64bit(t2[1]);
+ s3[0] = dct_const_round_shift_64bit(s3[0]);
+ s3[1] = dct_const_round_shift_64bit(s3[1]);
+ io[0] = pack_4(s0[0], s0[1]);
+ io[1] = pack_4(s1[0], s1[1]);
+ io[2] = pack_4(s2[0], s2[1]);
+ io[3] = pack_4(s3[0], s3[1]);
+}
+
+void vp9_highbd_iht4x4_16_add_sse4_1(const tran_low_t *input, uint16_t *dest,
+ int stride, int tx_type, int bd) {
+ __m128i io[4];
+
+ io[0] = _mm_load_si128((const __m128i *)(input + 0));
+ io[1] = _mm_load_si128((const __m128i *)(input + 4));
+ io[2] = _mm_load_si128((const __m128i *)(input + 8));
+ io[3] = _mm_load_si128((const __m128i *)(input + 12));
+
+ if (bd == 8) {
+ __m128i io_short[2];
+
+ io_short[0] = _mm_packs_epi32(io[0], io[1]);
+ io_short[1] = _mm_packs_epi32(io[2], io[3]);
+ if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+ idct4_sse2(io_short);
+ } else {
+ iadst4_sse2(io_short);
+ }
+ if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+ idct4_sse2(io_short);
+ } else {
+ iadst4_sse2(io_short);
+ }
+ io_short[0] = _mm_add_epi16(io_short[0], _mm_set1_epi16(8));
+ io_short[1] = _mm_add_epi16(io_short[1], _mm_set1_epi16(8));
+ io[0] = _mm_srai_epi16(io_short[0], 4);
+ io[1] = _mm_srai_epi16(io_short[1], 4);
+ } else {
+ if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+ highbd_idct4_sse4_1(io);
+ } else {
+ highbd_iadst4_sse4_1(io);
+ }
+ if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+ highbd_idct4_sse4_1(io);
+ } else {
+ highbd_iadst4_sse4_1(io);
+ }
+ io[0] = wraplow_16bit_shift4(io[0], io[1], _mm_set1_epi32(8));
+ io[1] = wraplow_16bit_shift4(io[2], io[3], _mm_set1_epi32(8));
+ }
+
+ recon_and_store_4x4(io, dest, stride, bd);
+}
diff --git a/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c b/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c
new file mode 100644
index 000000000..7d949b6db
--- /dev/null
+++ b/libvpx/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_idct.h"
+#include "vpx_dsp/x86/highbd_inv_txfm_sse4.h"
+#include "vpx_dsp/x86/inv_txfm_sse2.h"
+#include "vpx_dsp/x86/transpose_sse2.h"
+#include "vpx_dsp/x86/txfm_common_sse2.h"
+
+static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in,
+ const int c,
+ __m128i *const s) {
+ const __m128i pair_c = pair_set_epi32(4 * c, 0);
+ __m128i x[2];
+
+ extend_64bit(in, x);
+ s[0] = _mm_mul_epi32(pair_c, x[0]);
+ s[1] = _mm_mul_epi32(pair_c, x[1]);
+}
+
+static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0,
+ const __m128i in1,
+ const int c0, const int c1,
+ __m128i *const s0,
+ __m128i *const s1) {
+ const __m128i pair_c0 = pair_set_epi32(4 * c0, 0);
+ const __m128i pair_c1 = pair_set_epi32(4 * c1, 0);
+ __m128i t00[2], t01[2], t10[2], t11[2];
+ __m128i x0[2], x1[2];
+
+ extend_64bit(in0, x0);
+ extend_64bit(in1, x1);
+ t00[0] = _mm_mul_epi32(pair_c0, x0[0]);
+ t00[1] = _mm_mul_epi32(pair_c0, x0[1]);
+ t01[0] = _mm_mul_epi32(pair_c0, x1[0]);
+ t01[1] = _mm_mul_epi32(pair_c0, x1[1]);
+ t10[0] = _mm_mul_epi32(pair_c1, x0[0]);
+ t10[1] = _mm_mul_epi32(pair_c1, x0[1]);
+ t11[0] = _mm_mul_epi32(pair_c1, x1[0]);
+ t11[1] = _mm_mul_epi32(pair_c1, x1[1]);
+
+ s0[0] = _mm_add_epi64(t00[0], t11[0]);
+ s0[1] = _mm_add_epi64(t00[1], t11[1]);
+ s1[0] = _mm_sub_epi64(t10[0], t01[0]);
+ s1[1] = _mm_sub_epi64(t10[1], t01[1]);
+}
+
+static void highbd_iadst8_sse4_1(__m128i *const io) {
+ __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2];
+ __m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2];
+
+ transpose_32bit_4x4x2(io, io);
+
+ // stage 1
+ highbd_iadst_butterfly_sse4_1(io[7], io[0], cospi_2_64, cospi_30_64, s0, s1);
+ highbd_iadst_butterfly_sse4_1(io[3], io[4], cospi_18_64, cospi_14_64, s4, s5);
+ x0[0] = _mm_add_epi64(s0[0], s4[0]);
+ x0[1] = _mm_add_epi64(s0[1], s4[1]);
+ x1[0] = _mm_add_epi64(s1[0], s5[0]);
+ x1[1] = _mm_add_epi64(s1[1], s5[1]);
+ x4[0] = _mm_sub_epi64(s0[0], s4[0]);
+ x4[1] = _mm_sub_epi64(s0[1], s4[1]);
+ x5[0] = _mm_sub_epi64(s1[0], s5[0]);
+ x5[1] = _mm_sub_epi64(s1[1], s5[1]);
+
+ highbd_iadst_butterfly_sse4_1(io[5], io[2], cospi_10_64, cospi_22_64, s2, s3);
+ highbd_iadst_butterfly_sse4_1(io[1], io[6], cospi_26_64, cospi_6_64, s6, s7);
+ x2[0] = _mm_add_epi64(s2[0], s6[0]);
+ x2[1] = _mm_add_epi64(s2[1], s6[1]);
+ x3[0] = _mm_add_epi64(s3[0], s7[0]);
+ x3[1] = _mm_add_epi64(s3[1], s7[1]);
+ x6[0] = _mm_sub_epi64(s2[0], s6[0]);
+ x6[1] = _mm_sub_epi64(s2[1], s6[1]);
+ x7[0] = _mm_sub_epi64(s3[0], s7[0]);
+ x7[1] = _mm_sub_epi64(s3[1], s7[1]);
+
+ x0[0] = dct_const_round_shift_64bit(x0[0]);
+ x0[1] = dct_const_round_shift_64bit(x0[1]);
+ x1[0] = dct_const_round_shift_64bit(x1[0]);
+ x1[1] = dct_const_round_shift_64bit(x1[1]);
+ x2[0] = dct_const_round_shift_64bit(x2[0]);
+ x2[1] = dct_const_round_shift_64bit(x2[1]);
+ x3[0] = dct_const_round_shift_64bit(x3[0]);
+ x3[1] = dct_const_round_shift_64bit(x3[1]);
+ x4[0] = dct_const_round_shift_64bit(x4[0]);
+ x4[1] = dct_const_round_shift_64bit(x4[1]);
+ x5[0] = dct_const_round_shift_64bit(x5[0]);
+ x5[1] = dct_const_round_shift_64bit(x5[1]);
+ x6[0] = dct_const_round_shift_64bit(x6[0]);
+ x6[1] = dct_const_round_shift_64bit(x6[1]);
+ x7[0] = dct_const_round_shift_64bit(x7[0]);
+ x7[1] = dct_const_round_shift_64bit(x7[1]);
+ s0[0] = pack_4(x0[0], x0[1]); // s0 = x0;
+ s1[0] = pack_4(x1[0], x1[1]); // s1 = x1;
+ s2[0] = pack_4(x2[0], x2[1]); // s2 = x2;
+ s3[0] = pack_4(x3[0], x3[1]); // s3 = x3;
+ x4[0] = pack_4(x4[0], x4[1]);
+ x5[0] = pack_4(x5[0], x5[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+
+ // stage 2
+ x0[0] = _mm_add_epi32(s0[0], s2[0]);
+ x1[0] = _mm_add_epi32(s1[0], s3[0]);
+ x2[0] = _mm_sub_epi32(s0[0], s2[0]);
+ x3[0] = _mm_sub_epi32(s1[0], s3[0]);
+
+ highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5);
+ highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6);
+
+ x4[0] = _mm_add_epi64(s4[0], s6[0]);
+ x4[1] = _mm_add_epi64(s4[1], s6[1]);
+ x5[0] = _mm_add_epi64(s5[0], s7[0]);
+ x5[1] = _mm_add_epi64(s5[1], s7[1]);
+ x6[0] = _mm_sub_epi64(s4[0], s6[0]);
+ x6[1] = _mm_sub_epi64(s4[1], s6[1]);
+ x7[0] = _mm_sub_epi64(s5[0], s7[0]);
+ x7[1] = _mm_sub_epi64(s5[1], s7[1]);
+ x4[0] = dct_const_round_shift_64bit(x4[0]);
+ x4[1] = dct_const_round_shift_64bit(x4[1]);
+ x5[0] = dct_const_round_shift_64bit(x5[0]);
+ x5[1] = dct_const_round_shift_64bit(x5[1]);
+ x6[0] = dct_const_round_shift_64bit(x6[0]);
+ x6[1] = dct_const_round_shift_64bit(x6[1]);
+ x7[0] = dct_const_round_shift_64bit(x7[0]);
+ x7[1] = dct_const_round_shift_64bit(x7[1]);
+ x4[0] = pack_4(x4[0], x4[1]);
+ x5[0] = pack_4(x5[0], x5[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+
+ // stage 3
+ s2[0] = _mm_add_epi32(x2[0], x3[0]);
+ s3[0] = _mm_sub_epi32(x2[0], x3[0]);
+ s6[0] = _mm_add_epi32(x6[0], x7[0]);
+ s7[0] = _mm_sub_epi32(x6[0], x7[0]);
+ highbd_iadst_half_butterfly_sse4_1(s2[0], cospi_16_64, s2);
+ highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3);
+ highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6);
+ highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7);
+
+ x2[0] = dct_const_round_shift_64bit(s2[0]);
+ x2[1] = dct_const_round_shift_64bit(s2[1]);
+ x3[0] = dct_const_round_shift_64bit(s3[0]);
+ x3[1] = dct_const_round_shift_64bit(s3[1]);
+ x6[0] = dct_const_round_shift_64bit(s6[0]);
+ x6[1] = dct_const_round_shift_64bit(s6[1]);
+ x7[0] = dct_const_round_shift_64bit(s7[0]);
+ x7[1] = dct_const_round_shift_64bit(s7[1]);
+ x2[0] = pack_4(x2[0], x2[1]);
+ x3[0] = pack_4(x3[0], x3[1]);
+ x6[0] = pack_4(x6[0], x6[1]);
+ x7[0] = pack_4(x7[0], x7[1]);
+
+ io[0] = x0[0];
+ io[1] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]);
+ io[2] = x6[0];
+ io[3] = _mm_sub_epi32(_mm_setzero_si128(), x2[0]);
+ io[4] = x3[0];
+ io[5] = _mm_sub_epi32(_mm_setzero_si128(), x7[0]);
+ io[6] = x5[0];
+ io[7] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]);
+}
+
+void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest,
+ int stride, int tx_type, int bd) {
+ __m128i io[16];
+
+ io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0));
+ io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4));
+ io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0));
+ io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4));
+ io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0));
+ io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4));
+ io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0));
+ io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4));
+ io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0));
+ io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4));
+ io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0));
+ io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4));
+ io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0));
+ io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4));
+ io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0));
+ io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4));
+
+ if (bd == 8) {
+ __m128i io_short[8];
+
+ io_short[0] = _mm_packs_epi32(io[0], io[4]);
+ io_short[1] = _mm_packs_epi32(io[1], io[5]);
+ io_short[2] = _mm_packs_epi32(io[2], io[6]);
+ io_short[3] = _mm_packs_epi32(io[3], io[7]);
+ io_short[4] = _mm_packs_epi32(io[8], io[12]);
+ io_short[5] = _mm_packs_epi32(io[9], io[13]);
+ io_short[6] = _mm_packs_epi32(io[10], io[14]);
+ io_short[7] = _mm_packs_epi32(io[11], io[15]);
+
+ if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+ vpx_idct8_sse2(io_short);
+ } else {
+ iadst8_sse2(io_short);
+ }
+ if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+ vpx_idct8_sse2(io_short);
+ } else {
+ iadst8_sse2(io_short);
+ }
+ round_shift_8x8(io_short, io);
+ } else {
+ __m128i temp[4];
+
+ if (tx_type == DCT_DCT || tx_type == ADST_DCT) {
+ vpx_highbd_idct8x8_half1d_sse4_1(io);
+ vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
+ } else {
+ highbd_iadst8_sse4_1(io);
+ highbd_iadst8_sse4_1(&io[8]);
+ }
+
+ temp[0] = io[4];
+ temp[1] = io[5];
+ temp[2] = io[6];
+ temp[3] = io[7];
+ io[4] = io[8];
+ io[5] = io[9];
+ io[6] = io[10];
+ io[7] = io[11];
+
+ if (tx_type == DCT_DCT || tx_type == DCT_ADST) {
+ vpx_highbd_idct8x8_half1d_sse4_1(io);
+ io[8] = temp[0];
+ io[9] = temp[1];
+ io[10] = temp[2];
+ io[11] = temp[3];
+ vpx_highbd_idct8x8_half1d_sse4_1(&io[8]);
+ } else {
+ highbd_iadst8_sse4_1(io);
+ io[8] = temp[0];
+ io[9] = temp[1];
+ io[10] = temp[2];
+ io[11] = temp[3];
+ highbd_iadst8_sse4_1(&io[8]);
+ }
+ highbd_idct8x8_final_round(io);
+ }
+ recon_and_store_8x8(io, dest, stride, bd);
+}
diff --git a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
index 6996260e2..ad693718c 100644
--- a/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
+++ b/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c
@@ -10,8 +10,6 @@
#include "./vp9_rtcd.h"
#include "vpx_dsp/x86/inv_txfm_sse2.h"
-#include "vpx_dsp/x86/txfm_common_sse2.h"
-#include "vpx_ports/mem.h"
void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
int tx_type) {
@@ -22,23 +20,23 @@ void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[1] = load_input_data8(input + 8);
switch (tx_type) {
- case 0: // DCT_DCT
+ case DCT_DCT:
idct4_sse2(in);
idct4_sse2(in);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
idct4_sse2(in);
iadst4_sse2(in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst4_sse2(in);
idct4_sse2(in);
break;
- case 3: // ADST_ADST
+ default:
+ assert(tx_type == ADST_ADST);
iadst4_sse2(in);
iadst4_sse2(in);
break;
- default: assert(0); break;
}
// Final round and shift
@@ -67,23 +65,23 @@ void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride,
in[7] = load_input_data8(input + 8 * 7);
switch (tx_type) {
- case 0: // DCT_DCT
- idct8_sse2(in);
- idct8_sse2(in);
+ case DCT_DCT:
+ vpx_idct8_sse2(in);
+ vpx_idct8_sse2(in);
break;
- case 1: // ADST_DCT
- idct8_sse2(in);
+ case ADST_DCT:
+ vpx_idct8_sse2(in);
iadst8_sse2(in);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst8_sse2(in);
- idct8_sse2(in);
+ vpx_idct8_sse2(in);
break;
- case 3: // ADST_ADST
+ default:
+ assert(tx_type == ADST_ADST);
iadst8_sse2(in);
iadst8_sse2(in);
break;
- default: assert(0); break;
}
// Final rounding and shift
@@ -201,23 +199,23 @@ void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest,
load_buffer_8x16(input, in1);
switch (tx_type) {
- case 0: // DCT_DCT
+ case DCT_DCT:
idct16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
- case 1: // ADST_DCT
+ case ADST_DCT:
idct16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
- case 2: // DCT_ADST
+ case DCT_ADST:
iadst16_sse2(in0, in1);
idct16_sse2(in0, in1);
break;
- case 3: // ADST_ADST
+ default:
+ assert(tx_type == ADST_ADST);
iadst16_sse2(in0, in1);
iadst16_sse2(in0, in1);
break;
- default: assert(0); break;
}
write_buffer_8x16(dest, in0, stride);
diff --git a/libvpx/vp9/decoder/vp9_decodeframe.c b/libvpx/vp9/decoder/vp9_decodeframe.c
index 497a22459..c9c85053d 100644
--- a/libvpx/vp9/decoder/vp9_decodeframe.c
+++ b/libvpx/vp9/decoder/vp9_decodeframe.c
@@ -45,31 +45,11 @@
#define MAX_VP9_HEADER_SIZE 80
-static int is_compound_reference_allowed(const VP9_COMMON *cm) {
- int i;
- for (i = 1; i < REFS_PER_FRAME; ++i)
- if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) return 1;
-
- return 0;
-}
-
-static void setup_compound_reference_mode(VP9_COMMON *cm) {
- if (cm->ref_frame_sign_bias[LAST_FRAME] ==
- cm->ref_frame_sign_bias[GOLDEN_FRAME]) {
- cm->comp_fixed_ref = ALTREF_FRAME;
- cm->comp_var_ref[0] = LAST_FRAME;
- cm->comp_var_ref[1] = GOLDEN_FRAME;
- } else if (cm->ref_frame_sign_bias[LAST_FRAME] ==
- cm->ref_frame_sign_bias[ALTREF_FRAME]) {
- cm->comp_fixed_ref = GOLDEN_FRAME;
- cm->comp_var_ref[0] = LAST_FRAME;
- cm->comp_var_ref[1] = ALTREF_FRAME;
- } else {
- cm->comp_fixed_ref = LAST_FRAME;
- cm->comp_var_ref[0] = GOLDEN_FRAME;
- cm->comp_var_ref[1] = ALTREF_FRAME;
- }
-}
+typedef int (*predict_recon_func)(TileWorkerData *twd, MODE_INFO *const mi,
+ int plane, int row, int col, TX_SIZE tx_size);
+
+typedef void (*intra_recon_func)(TileWorkerData *twd, MODE_INFO *const mi,
+ int plane, int row, int col, TX_SIZE tx_size);
static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) {
return len != 0 && len <= (size_t)(end - start);
@@ -118,7 +98,7 @@ static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) {
static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm,
vpx_reader *r) {
- if (is_compound_reference_allowed(cm)) {
+ if (vp9_compound_reference_allowed(cm)) {
return vpx_read_bit(r)
? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT : COMPOUND_REFERENCE)
: SINGLE_REFERENCE;
@@ -351,6 +331,59 @@ static void predict_and_reconstruct_intra_block(TileWorkerData *twd,
}
}
+static void parse_intra_block_row_mt(TileWorkerData *twd, MODE_INFO *const mi,
+ int plane, int row, int col,
+ TX_SIZE tx_size) {
+ MACROBLOCKD *const xd = &twd->xd;
+ PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode;
+
+ if (mi->sb_type < BLOCK_8X8)
+ if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
+
+ if (!mi->skip) {
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ const TX_TYPE tx_type =
+ (plane || xd->lossless) ? DCT_DCT : intra_mode_to_tx_type_lookup[mode];
+ const scan_order *sc = (plane || xd->lossless)
+ ? &vp9_default_scan_orders[tx_size]
+ : &vp9_scan_orders[tx_size][tx_type];
+ *pd->eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size,
+ mi->segment_id);
+ /* Keep the alignment to 16 */
+ pd->dqcoeff += (16 << (tx_size << 1));
+ pd->eob++;
+ }
+}
+
+static void predict_and_reconstruct_intra_block_row_mt(TileWorkerData *twd,
+ MODE_INFO *const mi,
+ int plane, int row,
+ int col,
+ TX_SIZE tx_size) {
+ MACROBLOCKD *const xd = &twd->xd;
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode;
+ uint8_t *dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col];
+
+ if (mi->sb_type < BLOCK_8X8)
+ if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode;
+
+ vp9_predict_intra_block(xd, pd->n4_wl, tx_size, mode, dst, pd->dst.stride,
+ dst, pd->dst.stride, col, row, plane);
+
+ if (!mi->skip) {
+ const TX_TYPE tx_type =
+ (plane || xd->lossless) ? DCT_DCT : intra_mode_to_tx_type_lookup[mode];
+ if (*pd->eob > 0) {
+ inverse_transform_block_intra(xd, plane, tx_type, tx_size, dst,
+ pd->dst.stride, *pd->eob);
+ }
+ /* Keep the alignment to 16 */
+ pd->dqcoeff += (16 << (tx_size << 1));
+ pd->eob++;
+ }
+}
+
static int reconstruct_inter_block(TileWorkerData *twd, MODE_INFO *const mi,
int plane, int row, int col,
TX_SIZE tx_size) {
@@ -368,6 +401,41 @@ static int reconstruct_inter_block(TileWorkerData *twd, MODE_INFO *const mi,
return eob;
}
+static int parse_inter_block_row_mt(TileWorkerData *twd, MODE_INFO *const mi,
+ int plane, int row, int col,
+ TX_SIZE tx_size) {
+ MACROBLOCKD *const xd = &twd->xd;
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ const scan_order *sc = &vp9_default_scan_orders[tx_size];
+ const int eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size,
+ mi->segment_id);
+
+ *pd->eob = eob;
+ pd->dqcoeff += (16 << (tx_size << 1));
+ pd->eob++;
+
+ return eob;
+}
+
+static int reconstruct_inter_block_row_mt(TileWorkerData *twd,
+ MODE_INFO *const mi, int plane,
+ int row, int col, TX_SIZE tx_size) {
+ MACROBLOCKD *const xd = &twd->xd;
+ struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int eob = *pd->eob;
+
+ (void)mi;
+ if (eob > 0) {
+ inverse_transform_block_inter(
+ xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col],
+ pd->dst.stride, eob);
+ }
+ pd->dqcoeff += (16 << (tx_size << 1));
+ pd->eob++;
+
+ return eob;
+}
+
static void build_mc_border(const uint8_t *src, int src_stride, uint8_t *dst,
int dst_stride, int x, int y, int b_w, int b_h,
int w, int h) {
@@ -715,6 +783,25 @@ static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl,
}
}
+static MODE_INFO *set_offsets_recon(VP9_COMMON *const cm, MACROBLOCKD *const xd,
+ int mi_row, int mi_col, int bw, int bh,
+ int bwl, int bhl) {
+ const int offset = mi_row * cm->mi_stride + mi_col;
+ const TileInfo *const tile = &xd->tile;
+ xd->mi = cm->mi_grid_visible + offset;
+
+ set_plane_n4(xd, bw, bh, bwl, bhl);
+
+ set_skip_context(xd, mi_row, mi_col);
+
+ // Distance of Mb to the various image edges. These are specified to 8th pel
+ // as they are always compared to values that are in 1/8th pel units
+ set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols);
+
+ vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
+ return xd->mi[0];
+}
+
static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
BLOCK_SIZE bsize, int mi_row, int mi_col, int bw,
int bh, int x_mis, int y_mis, int bwl, int bhl) {
@@ -744,6 +831,66 @@ static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd,
return xd->mi[0];
}
+static INLINE int predict_recon_inter(MACROBLOCKD *xd, MODE_INFO *mi,
+ TileWorkerData *twd,
+ predict_recon_func func) {
+ int eobtotal = 0;
+ int plane;
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ const int step = (1 << tx_size);
+ int row, col;
+ const int max_blocks_wide =
+ num_4x4_w + (xd->mb_to_right_edge >= 0
+ ? 0
+ : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ const int max_blocks_high =
+ num_4x4_h + (xd->mb_to_bottom_edge >= 0
+ ? 0
+ : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+ xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide;
+ xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high;
+
+ for (row = 0; row < max_blocks_high; row += step)
+ for (col = 0; col < max_blocks_wide; col += step)
+ eobtotal += func(twd, mi, plane, row, col, tx_size);
+ }
+ return eobtotal;
+}
+
+static INLINE void predict_recon_intra(MACROBLOCKD *xd, MODE_INFO *mi,
+ TileWorkerData *twd,
+ intra_recon_func func) {
+ int plane;
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size;
+ const int num_4x4_w = pd->n4_w;
+ const int num_4x4_h = pd->n4_h;
+ const int step = (1 << tx_size);
+ int row, col;
+ const int max_blocks_wide =
+ num_4x4_w + (xd->mb_to_right_edge >= 0
+ ? 0
+ : xd->mb_to_right_edge >> (5 + pd->subsampling_x));
+ const int max_blocks_high =
+ num_4x4_h + (xd->mb_to_bottom_edge >= 0
+ ? 0
+ : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y));
+
+ xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide;
+ xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high;
+
+ for (row = 0; row < max_blocks_high; row += step)
+ for (col = 0; col < max_blocks_wide; col += step)
+ func(twd, mi, plane, row, col, tx_size);
+ }
+}
+
static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) {
VP9_COMMON *const cm = &pbi->common;
@@ -844,6 +991,81 @@ static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
}
}
+static void recon_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) {
+ VP9_COMMON *const cm = &pbi->common;
+ const int bw = 1 << (bwl - 1);
+ const int bh = 1 << (bhl - 1);
+ MACROBLOCKD *const xd = &twd->xd;
+
+ MODE_INFO *mi = set_offsets_recon(cm, xd, mi_row, mi_col, bw, bh, bwl, bhl);
+
+ if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) {
+ const BLOCK_SIZE uv_subsize =
+ ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y];
+ if (uv_subsize == BLOCK_INVALID)
+ vpx_internal_error(xd->error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid block size.");
+ }
+
+ if (!is_inter_block(mi)) {
+ predict_recon_intra(xd, mi, twd,
+ predict_and_reconstruct_intra_block_row_mt);
+ } else {
+ // Prediction
+ dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col);
+
+ // Reconstruction
+ if (!mi->skip) {
+ predict_recon_inter(xd, mi, twd, reconstruct_inter_block_row_mt);
+ }
+ }
+
+ vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh);
+}
+
+static void parse_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) {
+ VP9_COMMON *const cm = &pbi->common;
+ const int less8x8 = bsize < BLOCK_8X8;
+ const int bw = 1 << (bwl - 1);
+ const int bh = 1 << (bhl - 1);
+ const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
+ const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
+ vpx_reader *r = &twd->bit_reader;
+ MACROBLOCKD *const xd = &twd->xd;
+
+ MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis,
+ y_mis, bwl, bhl);
+
+ if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) {
+ const BLOCK_SIZE uv_subsize =
+ ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y];
+ if (uv_subsize == BLOCK_INVALID)
+ vpx_internal_error(xd->error_info, VPX_CODEC_CORRUPT_FRAME,
+ "Invalid block size.");
+ }
+
+ vp9_read_mode_info(twd, pbi, mi_row, mi_col, x_mis, y_mis);
+
+ if (mi->skip) {
+ dec_reset_skip_context(xd);
+ }
+
+ if (!is_inter_block(mi)) {
+ predict_recon_intra(xd, mi, twd, parse_intra_block_row_mt);
+ } else {
+ if (!mi->skip) {
+ const int eobtotal =
+ predict_recon_inter(xd, mi, twd, parse_inter_block_row_mt);
+
+ if (!less8x8 && eobtotal == 0) mi->skip = 1; // skip loopfilter
+ }
+ }
+
+ xd->corrupted |= vpx_reader_has_error(r);
+}
+
static INLINE int dec_partition_plane_context(TileWorkerData *twd, int mi_row,
int mi_col, int bsl) {
const PARTITION_CONTEXT *above_ctx = twd->xd.above_seg_context + mi_col;
@@ -950,6 +1172,118 @@ static void decode_partition(TileWorkerData *twd, VP9Decoder *const pbi,
dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh);
}
+static void recon_partition(TileWorkerData *twd, VP9Decoder *const pbi,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ int n4x4_l2) {
+ VP9_COMMON *const cm = &pbi->common;
+ const int n8x8_l2 = n4x4_l2 - 1;
+ const int num_8x8_wh = 1 << n8x8_l2;
+ const int hbs = num_8x8_wh >> 1;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+ const int has_rows = (mi_row + hbs) < cm->mi_rows;
+ const int has_cols = (mi_col + hbs) < cm->mi_cols;
+ MACROBLOCKD *const xd = &twd->xd;
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ partition = *xd->partition;
+ xd->partition++;
+
+ subsize = get_subsize(bsize, partition);
+ if (!hbs) {
+ // calculate bmode block dimensions (log 2)
+ xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT);
+ xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ);
+ recon_block(twd, pbi, mi_row, mi_col, subsize, 1, 1);
+ } else {
+ switch (partition) {
+ case PARTITION_NONE:
+ recon_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n4x4_l2);
+ break;
+ case PARTITION_HORZ:
+ recon_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n8x8_l2);
+ if (has_rows)
+ recon_block(twd, pbi, mi_row + hbs, mi_col, subsize, n4x4_l2,
+ n8x8_l2);
+ break;
+ case PARTITION_VERT:
+ recon_block(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, n4x4_l2);
+ if (has_cols)
+ recon_block(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2,
+ n4x4_l2);
+ break;
+ case PARTITION_SPLIT:
+ recon_partition(twd, pbi, mi_row, mi_col, subsize, n8x8_l2);
+ recon_partition(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2);
+ recon_partition(twd, pbi, mi_row + hbs, mi_col, subsize, n8x8_l2);
+ recon_partition(twd, pbi, mi_row + hbs, mi_col + hbs, subsize, n8x8_l2);
+ break;
+ default: assert(0 && "Invalid partition type");
+ }
+ }
+}
+
+static void parse_partition(TileWorkerData *twd, VP9Decoder *const pbi,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ int n4x4_l2) {
+ VP9_COMMON *const cm = &pbi->common;
+ const int n8x8_l2 = n4x4_l2 - 1;
+ const int num_8x8_wh = 1 << n8x8_l2;
+ const int hbs = num_8x8_wh >> 1;
+ PARTITION_TYPE partition;
+ BLOCK_SIZE subsize;
+ const int has_rows = (mi_row + hbs) < cm->mi_rows;
+ const int has_cols = (mi_col + hbs) < cm->mi_cols;
+ MACROBLOCKD *const xd = &twd->xd;
+
+ if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
+
+ *xd->partition =
+ read_partition(twd, mi_row, mi_col, has_rows, has_cols, n8x8_l2);
+
+ partition = *xd->partition;
+ xd->partition++;
+
+ subsize = get_subsize(bsize, partition);
+ if (!hbs) {
+ // calculate bmode block dimensions (log 2)
+ xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT);
+ xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ);
+ parse_block(twd, pbi, mi_row, mi_col, subsize, 1, 1);
+ } else {
+ switch (partition) {
+ case PARTITION_NONE:
+ parse_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n4x4_l2);
+ break;
+ case PARTITION_HORZ:
+ parse_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n8x8_l2);
+ if (has_rows)
+ parse_block(twd, pbi, mi_row + hbs, mi_col, subsize, n4x4_l2,
+ n8x8_l2);
+ break;
+ case PARTITION_VERT:
+ parse_block(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, n4x4_l2);
+ if (has_cols)
+ parse_block(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2,
+ n4x4_l2);
+ break;
+ case PARTITION_SPLIT:
+ parse_partition(twd, pbi, mi_row, mi_col, subsize, n8x8_l2);
+ parse_partition(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2);
+ parse_partition(twd, pbi, mi_row + hbs, mi_col, subsize, n8x8_l2);
+ parse_partition(twd, pbi, mi_row + hbs, mi_col + hbs, subsize, n8x8_l2);
+ break;
+ default: assert(0 && "Invalid partition type");
+ }
+ }
+
+ // update partition context
+ if (bsize >= BLOCK_8X8 &&
+ (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT))
+ dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh);
+}
+
static void setup_token_decoder(const uint8_t *data, const uint8_t *data_end,
size_t read_size,
struct vpx_internal_error_info *error_info,
@@ -1148,9 +1482,15 @@ static void resize_context_buffers(VP9_COMMON *cm, int width, int height) {
// Allocations in vp9_alloc_context_buffers() depend on individual
// dimensions as well as the overall size.
if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) {
- if (vp9_alloc_context_buffers(cm, width, height))
+ if (vp9_alloc_context_buffers(cm, width, height)) {
+ // The cm->mi_* values have been cleared and any existing context
+ // buffers have been freed. Clear cm->width and cm->height to be
+ // consistent and to force a realloc next time.
+ cm->width = 0;
+ cm->height = 0;
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Failed to allocate context buffers");
+ }
} else {
vp9_set_mb_mi(cm, width, height);
}
@@ -1426,7 +1766,27 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data,
vp9_zero(tile_data->xd.left_seg_context);
for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end;
mi_col += MI_BLOCK_SIZE) {
- decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
+ if (pbi->row_mt == 1) {
+ int plane;
+ RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data;
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane];
+ tile_data->xd.plane[plane].dqcoeff =
+ row_mt_worker_data->dqcoeff[plane];
+ }
+ tile_data->xd.partition = row_mt_worker_data->partition;
+ parse_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
+
+ for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
+ tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane];
+ tile_data->xd.plane[plane].dqcoeff =
+ row_mt_worker_data->dqcoeff[plane];
+ }
+ tile_data->xd.partition = row_mt_worker_data->partition;
+ recon_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
+ } else {
+ decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
+ }
}
pbi->mb.corrupted |= tile_data->xd.corrupted;
if (pbi->mb.corrupted)
@@ -1471,6 +1831,25 @@ static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data,
return vpx_reader_find_end(&tile_data->bit_reader);
}
+static void set_rows_after_error(VP9LfSync *lf_sync, int start_row, int mi_rows,
+ int num_tiles_left, int total_num_tiles) {
+ do {
+ int mi_row;
+ const int aligned_rows = mi_cols_aligned_to_sb(mi_rows);
+ const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
+ const int corrupted = 1;
+ for (mi_row = start_row; mi_row < mi_rows; mi_row += MI_BLOCK_SIZE) {
+ const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
+ vp9_set_row(lf_sync, total_num_tiles, mi_row >> MI_BLOCK_SIZE_LOG2,
+ is_last_row, corrupted);
+ }
+ /* If there are multiple tiles, the second tile should start marking row
+ * progress from row 0.
+ */
+ start_row = 0;
+ } while (num_tiles_left--);
+}
+
// On entry 'tile_data->data_end' points to the end of the input frame, on exit
// it is updated to reflect the bitreader position of the final tile column if
// present in the tile buffer group or NULL otherwise.
@@ -1481,6 +1860,12 @@ static int tile_worker_hook(void *arg1, void *arg2) {
TileInfo *volatile tile = &tile_data->xd.tile;
const int final_col = (1 << pbi->common.log2_tile_cols) - 1;
const uint8_t *volatile bit_reader_end = NULL;
+ VP9_COMMON *cm = &pbi->common;
+
+ LFWorkerData *lf_data = tile_data->lf_data;
+ VP9LfSync *lf_sync = tile_data->lf_sync;
+
+ volatile int mi_row = 0;
volatile int n = tile_data->buf_start;
tile_data->error_info.setjmp = 1;
@@ -1488,14 +1873,26 @@ static int tile_worker_hook(void *arg1, void *arg2) {
tile_data->error_info.setjmp = 0;
tile_data->xd.corrupted = 1;
tile_data->data_end = NULL;
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ const int num_tiles_left = tile_data->buf_end - n;
+ const int mi_row_start = mi_row;
+ set_rows_after_error(lf_sync, mi_row_start, cm->mi_rows, num_tiles_left,
+ 1 << cm->log2_tile_cols);
+ }
return 0;
}
tile_data->xd.corrupted = 0;
do {
- int mi_row, mi_col;
+ int mi_col;
const TileBuffer *const buf = pbi->tile_buffers + n;
+
+ /* Initialize to 0 is safe since we do not deal with streams that have
+ * more than one row of tiles. (So tile->mi_row_start will be 0)
+ */
+ assert(cm->log2_tile_rows == 0);
+ mi_row = 0;
vp9_zero(tile_data->dqcoeff);
vp9_tile_init(tile, &pbi->common, 0, buf->col);
setup_token_decoder(buf->data, tile_data->data_end, buf->size,
@@ -1513,6 +1910,14 @@ static int tile_worker_hook(void *arg1, void *arg2) {
mi_col += MI_BLOCK_SIZE) {
decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4);
}
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+ const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2);
+ const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2);
+ vp9_set_row(lf_sync, 1 << cm->log2_tile_cols,
+ mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row,
+ tile_data->xd.corrupted);
+ }
}
if (buf->col == final_col) {
@@ -1520,6 +1925,21 @@ static int tile_worker_hook(void *arg1, void *arg2) {
}
} while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end);
+ if (pbi->lpf_mt_opt && n < tile_data->buf_end && cm->lf.filter_level &&
+ !cm->skip_loop_filter) {
+ /* This was not incremented in the tile loop, so increment before tiles left
+ * calculation
+ */
+ ++n;
+ set_rows_after_error(lf_sync, 0, cm->mi_rows, tile_data->buf_end - n,
+ 1 << cm->log2_tile_cols);
+ }
+
+ if (pbi->lpf_mt_opt && !tile_data->xd.corrupted && cm->lf.filter_level &&
+ !cm->skip_loop_filter) {
+ vp9_loopfilter_rows(lf_data, lf_sync);
+ }
+
tile_data->data_end = bit_reader_end;
return !tile_data->xd.corrupted;
}
@@ -1536,6 +1956,8 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data,
VP9_COMMON *const cm = &pbi->common;
const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
const uint8_t *bit_reader_end = NULL;
+ VP9LfSync *lf_row_sync = &pbi->lf_row_sync;
+ YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm);
const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
@@ -1562,12 +1984,26 @@ static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data,
}
}
+ // Initialize LPF
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level,
+ pbi->num_tile_workers);
+ }
+
// Reset tile decoding hook
for (n = 0; n < num_workers; ++n) {
VPxWorker *const worker = &pbi->tile_workers[n];
TileWorkerData *const tile_data =
&pbi->tile_worker_data[n + pbi->total_tiles];
winterface->sync(worker);
+
+ if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) {
+ tile_data->lf_sync = lf_row_sync;
+ tile_data->lf_data = &tile_data->lf_sync->lfdata[n];
+ vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane);
+ tile_data->lf_data->y_only = 0;
+ }
+
tile_data->xd = pbi->mb;
tile_data->xd.counts =
cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts;
@@ -1724,6 +2160,22 @@ static void read_bitdepth_colorspace_sampling(VP9_COMMON *cm,
}
}
+static INLINE void flush_all_fb_on_key(VP9_COMMON *cm) {
+ if (cm->frame_type == KEY_FRAME && cm->current_video_frame > 0) {
+ RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
+ BufferPool *const pool = cm->buffer_pool;
+ int i;
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
+ if (i == cm->new_fb_idx) continue;
+ frame_bufs[i].ref_count = 0;
+ if (!frame_bufs[i].released) {
+ pool->release_fb_cb(pool->cb_priv, &frame_bufs[i].raw_frame_buffer);
+ frame_bufs[i].released = 1;
+ }
+ }
+ }
+}
+
static size_t read_uncompressed_header(VP9Decoder *pbi,
struct vpx_read_bit_buffer *rb) {
VP9_COMMON *const cm = &pbi->common;
@@ -1788,6 +2240,7 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
setup_frame_size(cm, rb);
if (pbi->need_resync) {
memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
+ flush_all_fb_on_key(cm);
pbi->need_resync = 0;
}
} else {
@@ -1911,6 +2364,28 @@ static size_t read_uncompressed_header(VP9Decoder *pbi,
setup_segmentation_dequant(cm);
setup_tile_info(cm, rb);
+ if (pbi->row_mt == 1) {
+ int num_sbs = 1;
+
+ if (pbi->row_mt_worker_data == NULL) {
+ CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data,
+ vpx_calloc(1, sizeof(*pbi->row_mt_worker_data)));
+ }
+
+ if (pbi->max_threads > 1) {
+ const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2;
+ const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+ const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2;
+
+ num_sbs = sb_cols * sb_rows;
+ }
+
+ if (num_sbs > pbi->row_mt_worker_data->num_sbs) {
+ vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data);
+ vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs);
+ }
+ }
sz = vpx_rb_read_literal(rb, 16);
if (sz == 0)
@@ -1953,7 +2428,7 @@ static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data,
cm->reference_mode = read_frame_reference_mode(cm, &r);
if (cm->reference_mode != SINGLE_REFERENCE)
- setup_compound_reference_mode(cm);
+ vp9_setup_compound_reference_mode(cm);
read_frame_reference_mode_probs(cm, &r);
for (j = 0; j < BLOCK_SIZE_GROUPS; j++)
@@ -2072,17 +2547,19 @@ void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data,
if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) {
// Multi-threaded tile decoder
*p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end);
- if (!xd->corrupted) {
- if (!cm->skip_loop_filter) {
- // If multiple threads are used to decode tiles, then we use those
- // threads to do parallel loopfiltering.
- vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, cm->lf.filter_level,
- 0, 0, pbi->tile_workers, pbi->num_tile_workers,
- &pbi->lf_row_sync);
+ if (!pbi->lpf_mt_opt) {
+ if (!xd->corrupted) {
+ if (!cm->skip_loop_filter) {
+ // If multiple threads are used to decode tiles, then we use those
+ // threads to do parallel loopfiltering.
+ vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane,
+ cm->lf.filter_level, 0, 0, pbi->tile_workers,
+ pbi->num_tile_workers, &pbi->lf_row_sync);
+ }
+ } else {
+ vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
+ "Decode failed. Frame data is corrupted.");
}
- } else {
- vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME,
- "Decode failed. Frame data is corrupted.");
}
} else {
*p_data_end = decode_tiles(pbi, data + first_partition_size, data_end);
diff --git a/libvpx/vp9/decoder/vp9_decodeframe.h b/libvpx/vp9/decoder/vp9_decodeframe.h
index 44717f546..ba95e7234 100644
--- a/libvpx/vp9/decoder/vp9_decodeframe.h
+++ b/libvpx/vp9/decoder/vp9_decodeframe.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_DECODER_VP9_DECODEFRAME_H_
-#define VP9_DECODER_VP9_DECODEFRAME_H_
+#ifndef VPX_VP9_DECODER_VP9_DECODEFRAME_H_
+#define VPX_VP9_DECODER_VP9_DECODEFRAME_H_
#ifdef __cplusplus
extern "C" {
@@ -32,4 +32,4 @@ void vp9_decode_frame(struct VP9Decoder *pbi, const uint8_t *data,
} // extern "C"
#endif
-#endif // VP9_DECODER_VP9_DECODEFRAME_H_
+#endif // VPX_VP9_DECODER_VP9_DECODEFRAME_H_
diff --git a/libvpx/vp9/decoder/vp9_decodemv.h b/libvpx/vp9/decoder/vp9_decodemv.h
index b460cb8fb..11b45ace0 100644
--- a/libvpx/vp9/decoder/vp9_decodemv.h
+++ b/libvpx/vp9/decoder/vp9_decodemv.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_DECODER_VP9_DECODEMV_H_
-#define VP9_DECODER_VP9_DECODEMV_H_
+#ifndef VPX_VP9_DECODER_VP9_DECODEMV_H_
+#define VPX_VP9_DECODER_VP9_DECODEMV_H_
#include "vpx_dsp/bitreader.h"
@@ -26,4 +26,4 @@ void vp9_read_mode_info(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row,
} // extern "C"
#endif
-#endif // VP9_DECODER_VP9_DECODEMV_H_
+#endif // VPX_VP9_DECODER_VP9_DECODEMV_H_
diff --git a/libvpx/vp9/decoder/vp9_decoder.c b/libvpx/vp9/decoder/vp9_decoder.c
index a913fa560..7fde0b07f 100644
--- a/libvpx/vp9/decoder/vp9_decoder.c
+++ b/libvpx/vp9/decoder/vp9_decoder.c
@@ -55,6 +55,43 @@ static void vp9_dec_setup_mi(VP9_COMMON *cm) {
cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base));
}
+void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
+ VP9_COMMON *cm, int num_sbs) {
+ int plane;
+ const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) *
+ sizeof(*row_mt_worker_data->dqcoeff[0]);
+ row_mt_worker_data->num_sbs = num_sbs;
+ for (plane = 0; plane < 3; ++plane) {
+ CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane],
+ vpx_memalign(16, dqcoeff_size));
+ memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size);
+ CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane],
+ vpx_calloc(num_sbs << EOBS_PER_SB_LOG2,
+ sizeof(*row_mt_worker_data->eob[plane])));
+ }
+ CHECK_MEM_ERROR(cm, row_mt_worker_data->partition,
+ vpx_calloc(num_sbs * PARTITIONS_PER_SB,
+ sizeof(*row_mt_worker_data->partition)));
+ CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map,
+ vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map)));
+}
+
+void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data) {
+ if (row_mt_worker_data != NULL) {
+ int plane;
+ for (plane = 0; plane < 3; ++plane) {
+ vpx_free(row_mt_worker_data->eob[plane]);
+ row_mt_worker_data->eob[plane] = NULL;
+ vpx_free(row_mt_worker_data->dqcoeff[plane]);
+ row_mt_worker_data->dqcoeff[plane] = NULL;
+ }
+ vpx_free(row_mt_worker_data->partition);
+ row_mt_worker_data->partition = NULL;
+ vpx_free(row_mt_worker_data->recon_map);
+ row_mt_worker_data->recon_map = NULL;
+ }
+}
+
static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) {
cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip));
if (!cm->mip) return 1;
@@ -69,6 +106,7 @@ static void vp9_dec_free_mi(VP9_COMMON *cm) {
cm->mip = NULL;
vpx_free(cm->mi_grid_base);
cm->mi_grid_base = NULL;
+ cm->mi_alloc_size = 0;
}
VP9Decoder *vp9_decoder_create(BufferPool *const pool) {
@@ -139,6 +177,10 @@ void vp9_decoder_remove(VP9Decoder *pbi) {
vp9_loop_filter_dealloc(&pbi->lf_row_sync);
}
+ if (pbi->row_mt == 1) {
+ vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data);
+ vpx_free(pbi->row_mt_worker_data);
+ }
vp9_remove_common(&pbi->common);
vpx_free(pbi);
}
@@ -260,6 +302,44 @@ static void swap_frame_buffers(VP9Decoder *pbi) {
cm->frame_refs[ref_index].idx = -1;
}
+static void release_fb_on_decoder_exit(VP9Decoder *pbi) {
+ const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
+ VP9_COMMON *volatile const cm = &pbi->common;
+ BufferPool *volatile const pool = cm->buffer_pool;
+ RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs;
+ int i;
+
+ // Synchronize all threads immediately as a subsequent decode call may
+ // cause a resize invalidating some allocations.
+ winterface->sync(&pbi->lf_worker);
+ for (i = 0; i < pbi->num_tile_workers; ++i) {
+ winterface->sync(&pbi->tile_workers[i]);
+ }
+
+ // Release all the reference buffers if worker thread is holding them.
+ if (pbi->hold_ref_buf == 1) {
+ int ref_index = 0, mask;
+ for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
+ const int old_idx = cm->ref_frame_map[ref_index];
+ // Current thread releases the holding of reference frame.
+ decrease_ref_count(old_idx, frame_bufs, pool);
+
+ // Release the reference frame in reference map.
+ if (mask & 1) {
+ decrease_ref_count(old_idx, frame_bufs, pool);
+ }
+ ++ref_index;
+ }
+
+ // Current thread releases the holding of reference frame.
+ for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
+ const int old_idx = cm->ref_frame_map[ref_index];
+ decrease_ref_count(old_idx, frame_bufs, pool);
+ }
+ pbi->hold_ref_buf = 0;
+ }
+}
+
int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size,
const uint8_t **psource) {
VP9_COMMON *volatile const cm = &pbi->common;
@@ -297,6 +377,9 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size,
// Find a free frame buffer. Return error if can not find any.
cm->new_fb_idx = get_free_fb(cm);
if (cm->new_fb_idx == INVALID_IDX) {
+ pbi->ready_for_new_data = 1;
+ release_fb_on_decoder_exit(pbi);
+ vpx_clear_system_state();
vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
"Unable to find free frame buffer");
return cm->error.error_code;
@@ -309,44 +392,11 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size,
pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
if (setjmp(cm->error.jmp)) {
- const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
- int i;
-
cm->error.setjmp = 0;
pbi->ready_for_new_data = 1;
-
- // Synchronize all threads immediately as a subsequent decode call may
- // cause a resize invalidating some allocations.
- winterface->sync(&pbi->lf_worker);
- for (i = 0; i < pbi->num_tile_workers; ++i) {
- winterface->sync(&pbi->tile_workers[i]);
- }
-
- // Release all the reference buffers if worker thread is holding them.
- if (pbi->hold_ref_buf == 1) {
- int ref_index = 0, mask;
- for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
- const int old_idx = cm->ref_frame_map[ref_index];
- // Current thread releases the holding of reference frame.
- decrease_ref_count(old_idx, frame_bufs, pool);
-
- // Release the reference frame in reference map.
- if (mask & 1) {
- decrease_ref_count(old_idx, frame_bufs, pool);
- }
- ++ref_index;
- }
-
- // Current thread releases the holding of reference frame.
- for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
- const int old_idx = cm->ref_frame_map[ref_index];
- decrease_ref_count(old_idx, frame_bufs, pool);
- }
- pbi->hold_ref_buf = 0;
- }
+ release_fb_on_decoder_exit(pbi);
// Release current frame.
decrease_ref_count(cm->new_fb_idx, frame_bufs, pool);
-
vpx_clear_system_state();
return -1;
}
@@ -364,6 +414,8 @@ int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size,
if (cm->seg.enabled) vp9_swap_current_and_last_seg_map(cm);
}
+ if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx;
+
// Update progress in frame parallel decode.
cm->last_width = cm->width;
cm->last_height = cm->height;
@@ -394,7 +446,7 @@ int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
#if CONFIG_VP9_POSTPROC
if (!cm->show_existing_frame) {
- ret = vp9_post_proc_frame(cm, sd, flags);
+ ret = vp9_post_proc_frame(cm, sd, flags, cm->width);
} else {
*sd = *cm->frame_to_show;
ret = 0;
diff --git a/libvpx/vp9/decoder/vp9_decoder.h b/libvpx/vp9/decoder/vp9_decoder.h
index 4b26c314d..9a582fffb 100644
--- a/libvpx/vp9/decoder/vp9_decoder.h
+++ b/libvpx/vp9/decoder/vp9_decoder.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_DECODER_VP9_DECODER_H_
-#define VP9_DECODER_VP9_DECODER_H_
+#ifndef VPX_VP9_DECODER_VP9_DECODER_H_
+#define VPX_VP9_DECODER_VP9_DECODER_H_
#include "./vpx_config.h"
@@ -26,6 +26,10 @@
extern "C" {
#endif
+#define EOBS_PER_SB_LOG2 8
+#define DQCOEFFS_PER_SB_LOG2 12
+#define PARTITIONS_PER_SB 85
+
typedef struct TileBuffer {
const uint8_t *data;
size_t size;
@@ -37,12 +41,22 @@ typedef struct TileWorkerData {
int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive
vpx_reader bit_reader;
FRAME_COUNTS counts;
+ LFWorkerData *lf_data;
+ VP9LfSync *lf_sync;
DECLARE_ALIGNED(16, MACROBLOCKD, xd);
/* dqcoeff are shared by all the planes. So planes must be decoded serially */
DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
struct vpx_internal_error_info error_info;
} TileWorkerData;
+typedef struct RowMTWorkerData {
+ int num_sbs;
+ int *eob[MAX_MB_PLANE];
+ PARTITION_TYPE *partition;
+ tran_low_t *dqcoeff[MAX_MB_PLANE];
+ int8_t *recon_map;
+} RowMTWorkerData;
+
typedef struct VP9Decoder {
DECLARE_ALIGNED(16, MACROBLOCKD, mb);
@@ -72,10 +86,14 @@ typedef struct VP9Decoder {
int inv_tile_order;
int need_resync; // wait for key/intra-only frame.
int hold_ref_buf; // hold the reference buffer.
+
+ int row_mt;
+ int lpf_mt_opt;
+ RowMTWorkerData *row_mt_worker_data;
} VP9Decoder;
int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size,
- const uint8_t **dest);
+ const uint8_t **psource);
int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd,
vp9_ppflags_t *flags);
@@ -109,6 +127,10 @@ struct VP9Decoder *vp9_decoder_create(BufferPool *const pool);
void vp9_decoder_remove(struct VP9Decoder *pbi);
+void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
+ VP9_COMMON *cm, int num_sbs);
+void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data);
+
static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs,
BufferPool *const pool) {
if (idx >= 0 && frame_bufs[idx].ref_count > 0) {
@@ -129,4 +151,4 @@ static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs,
} // extern "C"
#endif
-#endif // VP9_DECODER_VP9_DECODER_H_
+#endif // VPX_VP9_DECODER_VP9_DECODER_H_
diff --git a/libvpx/vp9/decoder/vp9_detokenize.h b/libvpx/vp9/decoder/vp9_detokenize.h
index 7b0d87601..a32052fff 100644
--- a/libvpx/vp9/decoder/vp9_detokenize.h
+++ b/libvpx/vp9/decoder/vp9_detokenize.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_DECODER_VP9_DETOKENIZE_H_
-#define VP9_DECODER_VP9_DETOKENIZE_H_
+#ifndef VPX_VP9_DECODER_VP9_DETOKENIZE_H_
+#define VPX_VP9_DECODER_VP9_DETOKENIZE_H_
#include "vpx_dsp/bitreader.h"
#include "vp9/decoder/vp9_decoder.h"
@@ -27,4 +27,4 @@ int vp9_decode_block_tokens(TileWorkerData *twd, int plane,
} // extern "C"
#endif
-#endif // VP9_DECODER_VP9_DETOKENIZE_H_
+#endif // VPX_VP9_DECODER_VP9_DETOKENIZE_H_
diff --git a/libvpx/vp9/decoder/vp9_dsubexp.h b/libvpx/vp9/decoder/vp9_dsubexp.h
index 5a8ec8300..b0c775073 100644
--- a/libvpx/vp9/decoder/vp9_dsubexp.h
+++ b/libvpx/vp9/decoder/vp9_dsubexp.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_DECODER_VP9_DSUBEXP_H_
-#define VP9_DECODER_VP9_DSUBEXP_H_
+#ifndef VPX_VP9_DECODER_VP9_DSUBEXP_H_
+#define VPX_VP9_DECODER_VP9_DSUBEXP_H_
#include "vpx_dsp/bitreader.h"
@@ -23,4 +23,4 @@ void vp9_diff_update_prob(vpx_reader *r, vpx_prob *p);
} // extern "C"
#endif
-#endif // VP9_DECODER_VP9_DSUBEXP_H_
+#endif // VPX_VP9_DECODER_VP9_DSUBEXP_H_
diff --git a/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c b/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c
index 513718e7c..f8dd0a6f7 100644
--- a/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c
+++ b/libvpx/vp9/encoder/arm/neon/vp9_dct_neon.c
@@ -23,13 +23,13 @@ void vp9_fdct8x8_quant_neon(const int16_t *input, int stride,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
tran_low_t temp_buffer[64];
(void)coeff_ptr;
vpx_fdct8x8_neon(input, temp_buffer, stride);
vp9_quantize_fp_neon(temp_buffer, n_coeffs, skip_block, round_ptr, quant_ptr,
- qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan_ptr,
- iscan_ptr);
+ qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan,
+ iscan);
}
diff --git a/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c b/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
index 97a09bdff..8b62b450c 100644
--- a/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
+++ b/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c
@@ -97,6 +97,9 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff);
store_s16q_to_tran_low(dqcoeff_ptr + i, v_dqcoeff);
}
+#ifdef __aarch64__
+ *eob_ptr = vmaxvq_s16(v_eobmax_76543210);
+#else
{
const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210),
vget_high_s16(v_eobmax_76543210));
@@ -111,6 +114,7 @@ void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count,
*eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0);
}
+#endif // __aarch64__
}
static INLINE int32x4_t extract_sign_bit(int32x4_t a) {
@@ -122,7 +126,7 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
const int16_t *quant_ptr,
tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr,
const int16_t *dequant_ptr, uint16_t *eob_ptr,
- const int16_t *scan, const int16_t *iscan_ptr) {
+ const int16_t *scan, const int16_t *iscan) {
const int16x8_t one = vdupq_n_s16(1);
const int16x8_t neg_one = vdupq_n_s16(-1);
@@ -134,8 +138,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
const int16x8_t dequant_thresh = vshrq_n_s16(vld1q_s16(dequant_ptr), 2);
// Process dc and the first seven ac coeffs.
- const uint16x8_t iscan =
- vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one));
+ const uint16x8_t v_iscan =
+ vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one));
const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr);
const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15);
const int16x8_t coeff_abs = vabsq_s16(coeff);
@@ -169,12 +173,12 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
dqcoeff = vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
- eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), iscan);
+ eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan);
store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
- iscan_ptr += 8;
+ iscan += 8;
coeff_ptr += 8;
qcoeff_ptr += 8;
dqcoeff_ptr += 8;
@@ -188,8 +192,8 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
// Process the rest of the ac coeffs.
for (i = 8; i < 32 * 32; i += 8) {
- const uint16x8_t iscan =
- vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan_ptr), one));
+ const uint16x8_t v_iscan =
+ vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one));
const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr);
const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15);
const int16x8_t coeff_abs = vabsq_s16(coeff);
@@ -215,17 +219,20 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1));
eob_max =
- vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), iscan));
+ vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan));
store_s16q_to_tran_low(qcoeff_ptr, qcoeff);
store_s16q_to_tran_low(dqcoeff_ptr, dqcoeff);
- iscan_ptr += 8;
+ iscan += 8;
coeff_ptr += 8;
qcoeff_ptr += 8;
dqcoeff_ptr += 8;
}
+#ifdef __aarch64__
+ *eob_ptr = vmaxvq_u16(eob_max);
+#else
{
const uint16x4_t eob_max_0 =
vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max));
@@ -233,5 +240,6 @@ void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count,
const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1);
vst1_lane_u16(eob_ptr, eob_max_2, 0);
}
+#endif // __aarch64__
}
}
diff --git a/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h b/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h
index 794bec70b..fa1af2fc5 100644
--- a/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h
+++ b/libvpx/vp9/encoder/mips/msa/vp9_fdct_msa.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
-#define VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
+#ifndef VPX_VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
+#define VPX_VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
#include "vpx_dsp/mips/fwd_txfm_msa.h"
#include "vpx_dsp/mips/txfm_macros_msa.h"
@@ -113,4 +113,4 @@
PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, s3_m, s3_m, \
out0, out1, out2, out3); \
}
-#endif /* VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ */
+#endif // VPX_VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_
diff --git a/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c b/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c
new file mode 100644
index 000000000..4f88b8fff
--- /dev/null
+++ b/libvpx/vp9/encoder/ppc/vp9_quantize_vsx.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "./vpx_config.h"
+
+#include "./vp9_rtcd.h"
+#include "vpx_dsp/ppc/types_vsx.h"
+
+// Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit
+// integers, and return the high 16 bits of the intermediate integers.
+// (a * b) >> 16
+// Note: Because this is done in 2 operations, a and b cannot both be UINT16_MIN
+static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) {
+ // madds does ((A * B) >> 15) + C, we need >> 16, so we perform an extra right
+ // shift.
+ return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16);
+}
+
+// Negate 16-bit integers in a when the corresponding signed 16-bit
+// integer in b is negative.
+static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) {
+ const int16x8_t mask = vec_sra(b, vec_shift_sign_s16);
+ return vec_xor(vec_add(a, mask), mask);
+}
+
+// Compare packed 16-bit integers across a, and return the maximum value in
+// every element. Returns a vector containing the biggest value across vector a.
+static INLINE int16x8_t vec_max_across(int16x8_t a) {
+ a = vec_max(a, vec_perm(a, a, vec_perm64));
+ a = vec_max(a, vec_perm(a, a, vec_perm32));
+ return vec_max(a, vec_perm(a, a, vec_perm16));
+}
+
+void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
+ int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob;
+ bool16x8_t zero_coeff0, zero_coeff1;
+
+ int16x8_t round = vec_vsx_ld(0, round_ptr);
+ int16x8_t quant = vec_vsx_ld(0, quant_ptr);
+ int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
+ int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
+ int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
+ int16x8_t scan0 = vec_vsx_ld(0, iscan);
+ int16x8_t scan1 = vec_vsx_ld(16, iscan);
+
+ (void)scan;
+ (void)skip_block;
+ assert(!skip_block);
+
+ // First set of 8 coeff starts with DC + 7 AC
+ qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant);
+ zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16);
+ qcoeff0 = vec_sign(qcoeff0, coeff0);
+ vec_vsx_st(qcoeff0, 0, qcoeff_ptr);
+
+ dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr);
+
+ // Remove DC value from round and quant
+ round = vec_splat(round, 1);
+ quant = vec_splat(quant, 1);
+
+ // Remove DC value from dequant
+ dequant = vec_splat(dequant, 1);
+
+ // Second set of 8 coeff starts with (all AC)
+ qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant);
+ zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16);
+ qcoeff1 = vec_sign(qcoeff1, coeff1);
+ vec_vsx_st(qcoeff1, 16, qcoeff_ptr);
+
+ dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr);
+
+ eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1));
+
+ // We quantize 16 coeff up front (enough for a 4x4) and process 24 coeff per
+ // loop iteration.
+ // for 8x8: 16 + 2 x 24 = 64
+ // for 16x16: 16 + 10 x 24 = 256
+ if (n_coeffs > 16) {
+ int16x8_t coeff2, qcoeff2, dqcoeff2, eob2, scan2;
+ bool16x8_t zero_coeff2;
+
+ int index = 16;
+ int off0 = 32;
+ int off1 = 48;
+ int off2 = 64;
+
+ do {
+ coeff0 = vec_vsx_ld(off0, coeff_ptr);
+ coeff1 = vec_vsx_ld(off1, coeff_ptr);
+ coeff2 = vec_vsx_ld(off2, coeff_ptr);
+ scan0 = vec_vsx_ld(off0, iscan);
+ scan1 = vec_vsx_ld(off1, iscan);
+ scan2 = vec_vsx_ld(off2, iscan);
+
+ qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant);
+ zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16);
+ qcoeff0 = vec_sign(qcoeff0, coeff0);
+ vec_vsx_st(qcoeff0, off0, qcoeff_ptr);
+ dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr);
+
+ qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant);
+ zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16);
+ qcoeff1 = vec_sign(qcoeff1, coeff1);
+ vec_vsx_st(qcoeff1, off1, qcoeff_ptr);
+ dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr);
+
+ qcoeff2 = vec_mulhi(vec_vaddshs(vec_abs(coeff2), round), quant);
+ zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16);
+ qcoeff2 = vec_sign(qcoeff2, coeff2);
+ vec_vsx_st(qcoeff2, off2, qcoeff_ptr);
+ dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16);
+ vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr);
+
+ eob = vec_max(eob, vec_or(scan0, zero_coeff0));
+ eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2));
+ eob = vec_max(eob, eob2);
+
+ index += 24;
+ off0 += 48;
+ off1 += 48;
+ off2 += 48;
+ } while (index < n_coeffs);
+ }
+
+ eob = vec_max_across(eob);
+ *eob_ptr = eob[0] + 1;
+}
+
+// Sets the value of a 32-bit integers to 1 when the corresponding value in a is
+// negative.
+static INLINE int32x4_t vec_is_neg(int32x4_t a) {
+ return vec_sr(a, vec_shift_sign_s32);
+}
+
+// DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32
+// blocks are twice as big as for other block sizes. As such, using
+// vec_mladd results in overflow.
+static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff,
+ int16x8_t dequant) {
+ int32x4_t dqcoeffe = vec_mule(qcoeff, dequant);
+ int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant);
+ // Add 1 if negative to round towards zero because the C uses division.
+ dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe));
+ dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo));
+ dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32);
+ dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32);
+ return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack);
+}
+
+void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
+ // In stage 1, we quantize 16 coeffs (DC + 15 AC)
+ // In stage 2, we loop 42 times and quantize 24 coeffs per iteration
+ // (32 * 32 - 16) / 24 = 42
+ int num_itr = 42;
+ // Offsets are in bytes, 16 coeffs = 32 bytes
+ int off0 = 32;
+ int off1 = 48;
+ int off2 = 64;
+
+ int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob;
+ bool16x8_t mask0, mask1, zero_coeff0, zero_coeff1;
+
+ int16x8_t round = vec_vsx_ld(0, round_ptr);
+ int16x8_t quant = vec_vsx_ld(0, quant_ptr);
+ int16x8_t dequant = vec_vsx_ld(0, dequant_ptr);
+ int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr);
+ int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr);
+ int16x8_t scan0 = vec_vsx_ld(0, iscan);
+ int16x8_t scan1 = vec_vsx_ld(16, iscan);
+ int16x8_t thres = vec_sra(dequant, vec_splats((uint16_t)2));
+ int16x8_t abs_coeff0 = vec_abs(coeff0);
+ int16x8_t abs_coeff1 = vec_abs(coeff1);
+
+ (void)scan;
+ (void)skip_block;
+ (void)n_coeffs;
+ assert(!skip_block);
+
+ mask0 = vec_cmpge(abs_coeff0, thres);
+ round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16);
+ // First set of 8 coeff starts with DC + 7 AC
+ qcoeff0 = vec_madds(vec_vaddshs(abs_coeff0, round), quant, vec_zeros_s16);
+ qcoeff0 = vec_and(qcoeff0, mask0);
+ zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16);
+ qcoeff0 = vec_sign(qcoeff0, coeff0);
+ vec_vsx_st(qcoeff0, 0, qcoeff_ptr);
+
+ dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant);
+ vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr);
+
+ // Remove DC value from thres, round, quant and dequant
+ thres = vec_splat(thres, 1);
+ round = vec_splat(round, 1);
+ quant = vec_splat(quant, 1);
+ dequant = vec_splat(dequant, 1);
+
+ mask1 = vec_cmpge(abs_coeff1, thres);
+
+ // Second set of 8 coeff starts with (all AC)
+ qcoeff1 =
+ vec_madds(vec_vaddshs(vec_abs(coeff1), round), quant, vec_zeros_s16);
+ qcoeff1 = vec_and(qcoeff1, mask1);
+ zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16);
+ qcoeff1 = vec_sign(qcoeff1, coeff1);
+ vec_vsx_st(qcoeff1, 16, qcoeff_ptr);
+
+ dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant);
+ vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr);
+
+ eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1));
+
+ do {
+ int16x8_t coeff2, abs_coeff2, qcoeff2, dqcoeff2, eob2, scan2;
+ bool16x8_t zero_coeff2, mask2;
+ coeff0 = vec_vsx_ld(off0, coeff_ptr);
+ coeff1 = vec_vsx_ld(off1, coeff_ptr);
+ coeff2 = vec_vsx_ld(off2, coeff_ptr);
+ scan0 = vec_vsx_ld(off0, iscan);
+ scan1 = vec_vsx_ld(off1, iscan);
+ scan2 = vec_vsx_ld(off2, iscan);
+
+ abs_coeff0 = vec_abs(coeff0);
+ abs_coeff1 = vec_abs(coeff1);
+ abs_coeff2 = vec_abs(coeff2);
+
+ qcoeff0 = vec_madds(vec_vaddshs(abs_coeff0, round), quant, vec_zeros_s16);
+ qcoeff1 = vec_madds(vec_vaddshs(abs_coeff1, round), quant, vec_zeros_s16);
+ qcoeff2 = vec_madds(vec_vaddshs(abs_coeff2, round), quant, vec_zeros_s16);
+
+ mask0 = vec_cmpge(abs_coeff0, thres);
+ mask1 = vec_cmpge(abs_coeff1, thres);
+ mask2 = vec_cmpge(abs_coeff2, thres);
+
+ qcoeff0 = vec_and(qcoeff0, mask0);
+ qcoeff1 = vec_and(qcoeff1, mask1);
+ qcoeff2 = vec_and(qcoeff2, mask2);
+
+ zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16);
+ zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16);
+ zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16);
+
+ qcoeff0 = vec_sign(qcoeff0, coeff0);
+ qcoeff1 = vec_sign(qcoeff1, coeff1);
+ qcoeff2 = vec_sign(qcoeff2, coeff2);
+
+ vec_vsx_st(qcoeff0, off0, qcoeff_ptr);
+ vec_vsx_st(qcoeff1, off1, qcoeff_ptr);
+ vec_vsx_st(qcoeff2, off2, qcoeff_ptr);
+
+ dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant);
+ dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant);
+ dqcoeff2 = dequantize_coeff_32(qcoeff2, dequant);
+
+ vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr);
+ vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr);
+ vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr);
+
+ eob = vec_max(eob, vec_or(scan0, zero_coeff0));
+ eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2));
+ eob = vec_max(eob, eob2);
+
+ off0 += 48;
+ off1 += 48;
+ off2 += 48;
+ num_itr--;
+ } while (num_itr != 0);
+
+ eob = vec_max_across(eob);
+ *eob_ptr = eob[0] + 1;
+}
diff --git a/libvpx/vp9/encoder/vp9_alt_ref_aq.h b/libvpx/vp9/encoder/vp9_alt_ref_aq.h
index e508cb44a..22a657e03 100644
--- a/libvpx/vp9/encoder/vp9_alt_ref_aq.h
+++ b/libvpx/vp9/encoder/vp9_alt_ref_aq.h
@@ -15,8 +15,8 @@
* for altref frames. Go to alt_ref_aq_private.h for implmentation details.
*/
-#ifndef VP9_ENCODER_VP9_ALT_REF_AQ_H_
-#define VP9_ENCODER_VP9_ALT_REF_AQ_H_
+#ifndef VPX_VP9_ENCODER_VP9_ALT_REF_AQ_H_
+#define VPX_VP9_ENCODER_VP9_ALT_REF_AQ_H_
#include "vpx/vpx_integer.h"
@@ -124,4 +124,4 @@ void vp9_alt_ref_aq_destroy(struct ALT_REF_AQ *const self);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_ALT_REF_AQ_H_
+#endif // VPX_VP9_ENCODER_VP9_ALT_REF_AQ_H_
diff --git a/libvpx/vp9/encoder/vp9_aq_360.h b/libvpx/vp9/encoder/vp9_aq_360.h
index b1b56561d..749d3c198 100644
--- a/libvpx/vp9/encoder/vp9_aq_360.h
+++ b/libvpx/vp9/encoder/vp9_aq_360.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_AQ_360_H_
-#define VP9_ENCODER_VP9_AQ_360_H_
+#ifndef VPX_VP9_ENCODER_VP9_AQ_360_H_
+#define VPX_VP9_ENCODER_VP9_AQ_360_H_
#include "vp9/encoder/vp9_encoder.h"
@@ -24,4 +24,4 @@ void vp9_360aq_frame_setup(VP9_COMP *cpi);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_
+#endif // VPX_VP9_ENCODER_VP9_AQ_360_H_
diff --git a/libvpx/vp9/encoder/vp9_aq_complexity.h b/libvpx/vp9/encoder/vp9_aq_complexity.h
index a00d34e70..d3cb34c01 100644
--- a/libvpx/vp9/encoder/vp9_aq_complexity.h
+++ b/libvpx/vp9/encoder/vp9_aq_complexity.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
-#define VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
+#ifndef VPX_VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
+#define VPX_VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
#ifdef __cplusplus
extern "C" {
@@ -33,4 +33,4 @@ void vp9_setup_in_frame_q_adj(struct VP9_COMP *cpi);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
+#endif // VPX_VP9_ENCODER_VP9_AQ_COMPLEXITY_H_
diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
index 2f2f0055a..a2a742493 100644
--- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -21,6 +21,14 @@
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_segmentation.h"
+static const uint8_t VP9_VAR_OFFS[64] = {
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128
+};
+
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
size_t last_coded_q_map_size;
CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr));
@@ -39,13 +47,16 @@ CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) {
}
assert(MAXQ <= 255);
memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
+ cr->counter_encode_maxq_scene_change = 0;
return cr;
}
void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) {
- vpx_free(cr->map);
- vpx_free(cr->last_coded_q_map);
- vpx_free(cr);
+ if (cr != NULL) {
+ vpx_free(cr->map);
+ vpx_free(cr->last_coded_q_map);
+ vpx_free(cr);
+ }
}
// Check if this coding block, of size bsize, should be considered for refresh
@@ -318,6 +329,28 @@ void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) {
rc->baseline_gf_interval = 10;
}
+static int is_superblock_flat_static(VP9_COMP *const cpi, int sb_row_index,
+ int sb_col_index) {
+ unsigned int source_variance;
+ const uint8_t *src_y = cpi->Source->y_buffer;
+ const int ystride = cpi->Source->y_stride;
+ unsigned int sse;
+ const BLOCK_SIZE bsize = BLOCK_64X64;
+ src_y += (sb_row_index << 6) * ystride + (sb_col_index << 6);
+ source_variance =
+ cpi->fn_ptr[bsize].vf(src_y, ystride, VP9_VAR_OFFS, 0, &sse);
+ if (source_variance == 0) {
+ uint64_t block_sad;
+ const uint8_t *last_src_y = cpi->Last_Source->y_buffer;
+ const int last_ystride = cpi->Last_Source->y_stride;
+ last_src_y += (sb_row_index << 6) * ystride + (sb_col_index << 6);
+ block_sad =
+ cpi->fn_ptr[bsize].sdf(src_y, ystride, last_src_y, last_ystride);
+ if (block_sad == 0) return 1;
+ }
+ return 0;
+}
+
// Update the segmentation map, and related quantities: cyclic refresh map,
// refresh sb_index, and target number of blocks to be refreshed.
// The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to
@@ -368,8 +401,17 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
int sb_col_index = i - sb_row_index * sb_cols;
int mi_row = sb_row_index * MI_BLOCK_SIZE;
int mi_col = sb_col_index * MI_BLOCK_SIZE;
+ int flat_static_blocks = 0;
+ int compute_content = 1;
assert(mi_row >= 0 && mi_row < cm->mi_rows);
assert(mi_col >= 0 && mi_col < cm->mi_cols);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->common.use_highbitdepth) compute_content = 0;
+#endif
+ if (cpi->Last_Source == NULL ||
+ cpi->Last_Source->y_width != cpi->Source->y_width ||
+ cpi->Last_Source->y_height != cpi->Source->y_height)
+ compute_content = 0;
bl_index = mi_row * cm->mi_cols + mi_col;
// Loop through all 8x8 blocks in superblock and update map.
xmis =
@@ -400,11 +442,21 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
// Enforce constant segment over superblock.
// If segment is at least half of superblock, set to 1.
if (sum_map >= xmis * ymis / 2) {
- for (y = 0; y < ymis; y++)
- for (x = 0; x < xmis; x++) {
- seg_map[bl_index + y * cm->mi_cols + x] = CR_SEGMENT_ID_BOOST1;
- }
- cr->target_num_seg_blocks += xmis * ymis;
+ // This superblock is a candidate for refresh:
+ // compute spatial variance and exclude blocks that are spatially flat
+ // and stationary. Note: this is currently only done for screne content
+ // mode.
+ if (compute_content && cr->skip_flat_static_blocks)
+ flat_static_blocks =
+ is_superblock_flat_static(cpi, sb_row_index, sb_col_index);
+ if (!flat_static_blocks) {
+ // Label this superblock as segment 1.
+ for (y = 0; y < ymis; y++)
+ for (x = 0; x < xmis; x++) {
+ seg_map[bl_index + y * cm->mi_cols + x] = CR_SEGMENT_ID_BOOST1;
+ }
+ cr->target_num_seg_blocks += xmis * ymis;
+ }
}
i++;
if (i == sbs_in_frame) {
@@ -413,7 +465,8 @@ static void cyclic_refresh_update_map(VP9_COMP *const cpi) {
} while (cr->target_num_seg_blocks < block_count && i != cr->sb_index);
cr->sb_index = i;
cr->reduce_refresh = 0;
- if (count_sel<(3 * count_tot)>> 2) cr->reduce_refresh = 1;
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
+ if (count_sel<(3 * count_tot)>> 2) cr->reduce_refresh = 1;
}
// Set cyclic refresh parameters.
@@ -426,8 +479,13 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
double weight_segment_target = 0;
double weight_segment = 0;
int thresh_low_motion = (cm->width < 720) ? 55 : 20;
+ int qp_thresh = VPXMIN(20, rc->best_quality << 1);
cr->apply_cyclic_refresh = 1;
- if (cm->frame_type == KEY_FRAME || cpi->svc.temporal_layer_id > 0 ||
+ if (frame_is_intra_only(cm) || cpi->svc.temporal_layer_id > 0 ||
+ is_lossless_requested(&cpi->oxcf) ||
+ rc->avg_frame_qindex[INTER_FRAME] < qp_thresh ||
+ (cpi->use_svc &&
+ cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) ||
(!cpi->use_svc && rc->avg_frame_low_motion < thresh_low_motion &&
rc->frames_since_key > 40)) {
cr->apply_cyclic_refresh = 0;
@@ -454,6 +512,22 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
cr->rate_boost_fac = 13;
}
}
+ // For screen-content: keep rate_ratio_qdelta to 2.0 (segment#1 boost) and
+ // percent_refresh (refresh rate) to 10. But reduce rate boost for segment#2
+ // (rate_boost_fac = 10 disables segment#2).
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) {
+ // Only enable feature of skipping flat_static blocks for top layer
+ // under screen content mode.
+ if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
+ cr->skip_flat_static_blocks = 1;
+ cr->percent_refresh = (cr->skip_flat_static_blocks) ? 5 : 10;
+ // Increase the amount of refresh on scene change that is encoded at max Q,
+ // increase for a few cycles of the refresh period (~100 / percent_refresh).
+ if (cr->counter_encode_maxq_scene_change < 30)
+ cr->percent_refresh = (cr->skip_flat_static_blocks) ? 10 : 15;
+ cr->rate_ratio_qdelta = 2.0;
+ cr->rate_boost_fac = 10;
+ }
// Adjust some parameters for low resolutions.
if (cm->width <= 352 && cm->height <= 288) {
if (rc->avg_frame_bandwidth < 3000) {
@@ -464,10 +538,6 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
cr->rate_ratio_qdelta = VPXMAX(cr->rate_ratio_qdelta, 2.5);
}
}
- if (cpi->svc.spatial_layer_id > 0) {
- cr->motion_thresh = 4;
- cr->rate_boost_fac = 12;
- }
if (cpi->oxcf.rc_mode == VPX_VBR) {
// To be adjusted for VBR mode, e.g., based on gf period and boost.
// For now use smaller qp-delta (than CBR), no second boosted seg, and
@@ -492,6 +562,13 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) {
num8x8bl;
if (weight_segment_target < 7 * weight_segment / 8)
weight_segment = weight_segment_target;
+ // For screen-content: don't include target for the weight segment,
+ // since for all flat areas the segment is reset, so its more accurate
+ // to just use the previous actual number of seg blocks for the weight.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ weight_segment =
+ (double)(cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) /
+ num8x8bl;
cr->weight_segment = weight_segment;
}
@@ -501,23 +578,31 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
struct segmentation *const seg = &cm->seg;
+ int scene_change_detected =
+ cpi->rc.high_source_sad ||
+ (cpi->use_svc && cpi->svc.high_source_sad_superframe);
if (cm->current_video_frame == 0) cr->low_content_avg = 0.0;
- if (!cr->apply_cyclic_refresh || (cpi->force_update_segmentation)) {
+ // Reset if resoluton change has occurred.
+ if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi);
+ if (!cr->apply_cyclic_refresh || (cpi->force_update_segmentation) ||
+ scene_change_detected) {
// Set segmentation map to 0 and disable.
unsigned char *const seg_map = cpi->segmentation_map;
memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
vp9_disable_segmentation(&cm->seg);
- if (cm->frame_type == KEY_FRAME) {
+ if (cm->frame_type == KEY_FRAME || scene_change_detected) {
memset(cr->last_coded_q_map, MAXQ,
cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
cr->sb_index = 0;
cr->reduce_refresh = 0;
+ cr->counter_encode_maxq_scene_change = 0;
}
return;
} else {
int qindex_delta = 0;
int qindex2;
const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth);
+ cr->counter_encode_maxq_scene_change++;
vpx_clear_system_state();
// Set rate threshold to some multiple (set to 2 for now) of the target
// rate (target is given by sb64_target_rate and scaled by 256).
@@ -567,9 +652,6 @@ void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) {
cr->qindex_delta[2] = qindex_delta;
vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta);
- // Reset if resoluton change has occurred.
- if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi);
-
// Update the segmentation and refresh map.
cyclic_refresh_update_map(cpi);
}
@@ -583,8 +665,19 @@ void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) {
const VP9_COMMON *const cm = &cpi->common;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
memset(cr->map, 0, cm->mi_rows * cm->mi_cols);
- memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols);
+ memset(cr->last_coded_q_map, MAXQ,
+ cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
cr->sb_index = 0;
cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 1;
+ cr->counter_encode_maxq_scene_change = 0;
+}
+
+void vp9_cyclic_refresh_limit_q(const VP9_COMP *cpi, int *q) {
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ // For now apply hard limit to frame-level decrease in q, if the cyclic
+ // refresh is active (percent_refresh > 0).
+ if (cr->percent_refresh > 0 && cpi->rc.q_1_frame - *q > 8) {
+ *q = cpi->rc.q_1_frame - 8;
+ }
}
diff --git a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
index 77fa67c9e..a4a9f1c98 100644
--- a/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/libvpx/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
-#define VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
+#ifndef VPX_VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
+#define VPX_VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
#include "vpx/vpx_integer.h"
#include "vp9/common/vp9_blockd.h"
@@ -68,6 +68,8 @@ struct CYCLIC_REFRESH {
int reduce_refresh;
double weight_segment;
int apply_cyclic_refresh;
+ int counter_encode_maxq_scene_change;
+ int skip_flat_static_blocks;
};
struct VP9_COMP;
@@ -139,8 +141,10 @@ static INLINE int cyclic_refresh_segment_id(int segment_id) {
return CR_SEGMENT_ID_BASE;
}
+void vp9_cyclic_refresh_limit_q(const struct VP9_COMP *cpi, int *q);
+
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
+#endif // VPX_VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_
diff --git a/libvpx/vp9/encoder/vp9_aq_variance.c b/libvpx/vp9/encoder/vp9_aq_variance.c
index 477f62ba5..9cd8819c3 100644
--- a/libvpx/vp9/encoder/vp9_aq_variance.c
+++ b/libvpx/vp9/encoder/vp9_aq_variance.c
@@ -19,6 +19,7 @@
#include "vp9/encoder/vp9_ratectrl.h"
#include "vp9/encoder/vp9_rd.h"
+#include "vp9/encoder/vp9_encodeframe.h"
#include "vp9/encoder/vp9_segmentation.h"
#define ENERGY_MIN (-4)
@@ -192,6 +193,40 @@ double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
return log(var + 1.0);
}
+// Get the range of sub block energy values;
+void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int *min_e,
+ int *max_e) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int bw = num_8x8_blocks_wide_lookup[bsize];
+ const int bh = num_8x8_blocks_high_lookup[bsize];
+ const int xmis = VPXMIN(cm->mi_cols - mi_col, bw);
+ const int ymis = VPXMIN(cm->mi_rows - mi_row, bh);
+ int x, y;
+
+ if (xmis < bw || ymis < bh) {
+ vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col);
+ *min_e = vp9_block_energy(cpi, mb, bsize);
+ *max_e = *min_e;
+ } else {
+ int energy;
+ *min_e = ENERGY_MAX;
+ *max_e = ENERGY_MIN;
+
+ for (y = 0; y < ymis; ++y) {
+ for (x = 0; x < xmis; ++x) {
+ vp9_setup_src_planes(mb, cpi->Source, mi_row + y, mi_col + x);
+ energy = vp9_block_energy(cpi, mb, BLOCK_8X8);
+ *min_e = VPXMIN(*min_e, energy);
+ *max_e = VPXMAX(*max_e, energy);
+ }
+ }
+ }
+
+ // Re-instate source pointers back to what they should have been on entry.
+ vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col);
+}
+
#define DEFAULT_E_MIDPOINT 10.0
int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
double energy;
diff --git a/libvpx/vp9/encoder/vp9_aq_variance.h b/libvpx/vp9/encoder/vp9_aq_variance.h
index 211a69f39..a4f872879 100644
--- a/libvpx/vp9/encoder/vp9_aq_variance.h
+++ b/libvpx/vp9/encoder/vp9_aq_variance.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_AQ_VARIANCE_H_
-#define VP9_ENCODER_VP9_AQ_VARIANCE_H_
+#ifndef VPX_VP9_ENCODER_VP9_AQ_VARIANCE_H_
+#define VPX_VP9_ENCODER_VP9_AQ_VARIANCE_H_
#include "vp9/encoder/vp9_encoder.h"
@@ -20,11 +20,15 @@ extern "C" {
unsigned int vp9_vaq_segment_id(int energy);
void vp9_vaq_frame_setup(VP9_COMP *cpi);
+void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int *min_e,
+ int *max_e);
int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
+
double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs);
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_AQ_VARIANCE_H_
+#endif // VPX_VP9_ENCODER_VP9_AQ_VARIANCE_H_
diff --git a/libvpx/vp9/encoder/vp9_bitstream.c b/libvpx/vp9/encoder/vp9_bitstream.c
index d346cd57a..76b7b123d 100644
--- a/libvpx/vp9/encoder/vp9_bitstream.c
+++ b/libvpx/vp9/encoder/vp9_bitstream.c
@@ -86,7 +86,7 @@ static void write_selected_tx_size(const VP9_COMMON *cm,
BLOCK_SIZE bsize = xd->mi[0]->sb_type;
const TX_SIZE max_tx_size = max_txsize_lookup[bsize];
const vpx_prob *const tx_probs =
- get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+ get_tx_probs(max_tx_size, get_tx_size_context(xd), &cm->fc->tx_probs);
vpx_write(w, tx_size != TX_4X4, tx_probs[0]);
if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) {
vpx_write(w, tx_size != TX_8X8, tx_probs[1]);
@@ -217,7 +217,8 @@ static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *const xd,
}
if (is_compound) {
- vpx_write(w, mi->ref_frame[0] == GOLDEN_FRAME,
+ const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+ vpx_write(w, mi->ref_frame[!idx] == cm->comp_var_ref[1],
vp9_get_pred_prob_comp_ref_p(cm, xd));
} else {
const int bit0 = mi->ref_frame[0] != LAST_FRAME;
@@ -459,7 +460,8 @@ static void write_modes_sb(
write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs,
max_mv_magnitude, interp_filter_selected);
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, subsize,
max_mv_magnitude, interp_filter_selected);
write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs,
@@ -469,7 +471,6 @@ static void write_modes_sb(
write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col + bs,
subsize, max_mv_magnitude, interp_filter_selected);
break;
- default: assert(0);
}
}
@@ -618,9 +619,10 @@ static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi,
return;
}
- case ONE_LOOP_REDUCED: {
+ default: {
int updates = 0;
int noupdates_before_first = 0;
+ assert(cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED);
for (i = 0; i < PLANE_TYPES; ++i) {
for (j = 0; j < REF_TYPES; ++j) {
for (k = 0; k < COEF_BANDS; ++k) {
@@ -670,7 +672,6 @@ static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi,
}
return;
}
- default: assert(0);
}
}
@@ -909,10 +910,24 @@ int vp9_get_refresh_mask(VP9_COMP *cpi) {
(cpi->refresh_golden_frame << cpi->alt_fb_idx);
} else {
int arf_idx = cpi->alt_fb_idx;
- if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- arf_idx = gf_group->arf_update_idx[gf_group->index];
+ GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+
+ if (cpi->multi_layer_arf) {
+ for (arf_idx = 0; arf_idx < REF_FRAMES; ++arf_idx) {
+ if (arf_idx != cpi->alt_fb_idx && arf_idx != cpi->lst_fb_idx &&
+ arf_idx != cpi->gld_fb_idx) {
+ int idx;
+ for (idx = 0; idx < gf_group->stack_size; ++idx)
+ if (arf_idx == gf_group->arf_index_stack[idx]) break;
+ if (idx == gf_group->stack_size) break;
+ }
+ }
}
+ cpi->twopass.gf_group.top_arf_idx = arf_idx;
+
+ if (cpi->use_svc && cpi->svc.use_set_ref_frame_config &&
+ cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS)
+ return cpi->svc.update_buffer_slot[cpi->svc.spatial_layer_id];
return (cpi->refresh_last_frame << cpi->lst_fb_idx) |
(cpi->refresh_golden_frame << cpi->gld_fb_idx) |
(cpi->refresh_alt_ref_frame << arf_idx);
@@ -1117,11 +1132,7 @@ static void write_frame_size_with_refs(VP9_COMP *cpi,
((cpi->svc.number_temporal_layers > 1 &&
cpi->oxcf.rc_mode == VPX_CBR) ||
(cpi->svc.number_spatial_layers > 1 &&
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame) ||
- (is_two_pass_svc(cpi) &&
- cpi->svc.encode_empty_frame_state == ENCODING &&
- cpi->svc.layer_context[0].frames_from_key_frame <
- cpi->svc.number_temporal_layers + 1))) {
+ cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) {
found = 0;
} else if (cfg != NULL) {
found =
@@ -1153,8 +1164,10 @@ static void write_profile(BITSTREAM_PROFILE profile,
case PROFILE_0: vpx_wb_write_literal(wb, 0, 2); break;
case PROFILE_1: vpx_wb_write_literal(wb, 2, 2); break;
case PROFILE_2: vpx_wb_write_literal(wb, 1, 2); break;
- case PROFILE_3: vpx_wb_write_literal(wb, 6, 3); break;
- default: assert(0);
+ default:
+ assert(profile == PROFILE_3);
+ vpx_wb_write_literal(wb, 6, 3);
+ break;
}
}
@@ -1191,7 +1204,13 @@ static void write_uncompressed_header(VP9_COMP *cpi,
write_profile(cm->profile, wb);
- vpx_wb_write_bit(wb, 0); // show_existing_frame
+ // If to use show existing frame.
+ vpx_wb_write_bit(wb, cm->show_existing_frame);
+ if (cm->show_existing_frame) {
+ vpx_wb_write_literal(wb, cpi->alt_fb_idx, 3);
+ return;
+ }
+
vpx_wb_write_bit(wb, cm->frame_type);
vpx_wb_write_bit(wb, cm->show_frame);
vpx_wb_write_bit(wb, cm->error_resilient_mode);
@@ -1201,14 +1220,6 @@ static void write_uncompressed_header(VP9_COMP *cpi,
write_bitdepth_colorspace_sampling(cm, wb);
write_frame_size(cm, wb);
} else {
- // In spatial svc if it's not error_resilient_mode then we need to code all
- // visible frames as invisible. But we need to keep the show_frame flag so
- // that the publisher could know whether it is supposed to be visible.
- // So we will code the show_frame flag as it is. Then code the intra_only
- // bit here. This will make the bitstream incompatible. In the player we
- // will change to show_frame flag to 0, then add an one byte frame with
- // show_existing_frame flag which tells the decoder which frame we want to
- // show.
if (!cm->show_frame) vpx_wb_write_bit(wb, cm->intra_only);
if (!cm->error_resilient_mode)
@@ -1341,6 +1352,10 @@ void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) {
struct vpx_write_bit_buffer saved_wb;
write_uncompressed_header(cpi, &wb);
+
+ // Skip the rest coding process if use show existing frame.
+ if (cpi->common.show_existing_frame) return;
+
saved_wb = wb;
vpx_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size
diff --git a/libvpx/vp9/encoder/vp9_bitstream.h b/libvpx/vp9/encoder/vp9_bitstream.h
index 339c3fecb..208651dc2 100644
--- a/libvpx/vp9/encoder/vp9_bitstream.h
+++ b/libvpx/vp9/encoder/vp9_bitstream.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_BITSTREAM_H_
-#define VP9_ENCODER_VP9_BITSTREAM_H_
+#ifndef VPX_VP9_ENCODER_VP9_BITSTREAM_H_
+#define VPX_VP9_ENCODER_VP9_BITSTREAM_H_
#ifdef __cplusplus
extern "C" {
@@ -38,16 +38,12 @@ void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi);
void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size);
static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) {
- return !cpi->multi_arf_allowed && cpi->refresh_golden_frame &&
- cpi->rc.is_src_frame_alt_ref &&
- (!cpi->use_svc || // Add spatial svc base layer case here
- (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id == 0 &&
- cpi->svc.layer_context[0].gold_ref_idx >= 0 &&
- cpi->oxcf.ss_enable_auto_arf[0]));
+ return cpi->refresh_golden_frame && cpi->rc.is_src_frame_alt_ref &&
+ !cpi->use_svc;
}
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_BITSTREAM_H_
+#endif // VPX_VP9_ENCODER_VP9_BITSTREAM_H_
diff --git a/libvpx/vp9/encoder/vp9_block.h b/libvpx/vp9/encoder/vp9_block.h
index 724205dd5..563fdbbde 100644
--- a/libvpx/vp9/encoder/vp9_block.h
+++ b/libvpx/vp9/encoder/vp9_block.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_BLOCK_H_
-#define VP9_ENCODER_VP9_BLOCK_H_
+#ifndef VPX_VP9_ENCODER_VP9_BLOCK_H_
+#define VPX_VP9_ENCODER_VP9_BLOCK_H_
#include "vpx_util/vpx_thread.h"
@@ -92,6 +92,7 @@ struct macroblock {
int sadperbit4;
int rddiv;
int rdmult;
+ int cb_rdmult;
int mb_energy;
// These are set to their default values at the beginning, and then adjusted
@@ -115,6 +116,12 @@ struct macroblock {
int *nmvsadcost_hp[2];
int **mvsadcost;
+ // sharpness is used to disable skip mode and change rd_mult
+ int sharpness;
+
+ // aq mode is used to adjust rd based on segment.
+ int adjust_rdmult_by_segment;
+
// These define limits to motion vector components to prevent them
// from extending outside the UMV borders
MvLimits mv_limits;
@@ -180,6 +187,8 @@ struct macroblock {
int sb_pickmode_part;
+ int zero_temp_sad_source;
+
// For each superblock: saves the content value (e.g., low/high sad/sumdiff)
// based on source sad, prior to encoding the frame.
uint8_t content_state_sb;
@@ -199,10 +208,15 @@ struct macroblock {
void (*highbd_inv_txfm_add)(const tran_low_t *input, uint16_t *dest,
int stride, int eob, int bd);
#endif
+#if CONFIG_ML_VAR_PARTITION
+ DECLARE_ALIGNED(16, uint8_t, est_pred[64 * 64]);
+#endif // CONFIG_ML_VAR_PARTITION
+
+ struct scale_factors *me_sf;
};
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_BLOCK_H_
+#endif // VPX_VP9_ENCODER_VP9_BLOCK_H_
diff --git a/libvpx/vp9/encoder/vp9_blockiness.c b/libvpx/vp9/encoder/vp9_blockiness.c
index 9ab57b57c..da68a3c3c 100644
--- a/libvpx/vp9/encoder/vp9_blockiness.c
+++ b/libvpx/vp9/encoder/vp9_blockiness.c
@@ -11,6 +11,7 @@
#include "vpx/vpx_integer.h"
#include "vpx_ports/system_state.h"
+#include "vp9/encoder/vp9_blockiness.h"
static int horizontal_filter(const uint8_t *s) {
return (s[1] - s[-2]) * 2 + (s[-1] - s[0]) * 6;
diff --git a/libvpx/vp9/encoder/vp9_blockiness.h b/libvpx/vp9/encoder/vp9_blockiness.h
new file mode 100644
index 000000000..e840cb251
--- /dev/null
+++ b/libvpx/vp9/encoder/vp9_blockiness.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2019 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_ENCODER_VP9_BLOCKINESS_H_
+#define VPX_VP9_ENCODER_VP9_BLOCKINESS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+double vp9_get_blockiness(const uint8_t *img1, int img1_pitch,
+ const uint8_t *img2, int img2_pitch, int width,
+ int height);
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_VP9_ENCODER_VP9_BLOCKINESS_H_
diff --git a/libvpx/vp9/encoder/vp9_context_tree.c b/libvpx/vp9/encoder/vp9_context_tree.c
index 2f7e54433..b74b9027c 100644
--- a/libvpx/vp9/encoder/vp9_context_tree.c
+++ b/libvpx/vp9/encoder/vp9_context_tree.c
@@ -12,7 +12,10 @@
#include "vp9/encoder/vp9_encoder.h"
static const BLOCK_SIZE square[] = {
- BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64,
+ BLOCK_8X8,
+ BLOCK_16X16,
+ BLOCK_32X32,
+ BLOCK_64X64,
};
static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
@@ -136,17 +139,22 @@ void vp9_setup_pc_tree(VP9_COMMON *cm, ThreadData *td) {
}
void vp9_free_pc_tree(ThreadData *td) {
- const int tree_nodes = 64 + 16 + 4 + 1;
int i;
- // Set up all 4x4 mode contexts
- for (i = 0; i < 64; ++i) free_mode_context(&td->leaf_tree[i]);
+ if (td == NULL) return;
- // Sets up all the leaf nodes in the tree.
- for (i = 0; i < tree_nodes; ++i) free_tree_contexts(&td->pc_tree[i]);
+ if (td->leaf_tree != NULL) {
+ // Set up all 4x4 mode contexts
+ for (i = 0; i < 64; ++i) free_mode_context(&td->leaf_tree[i]);
+ vpx_free(td->leaf_tree);
+ td->leaf_tree = NULL;
+ }
- vpx_free(td->pc_tree);
- td->pc_tree = NULL;
- vpx_free(td->leaf_tree);
- td->leaf_tree = NULL;
+ if (td->pc_tree != NULL) {
+ const int tree_nodes = 64 + 16 + 4 + 1;
+ // Sets up all the leaf nodes in the tree.
+ for (i = 0; i < tree_nodes; ++i) free_tree_contexts(&td->pc_tree[i]);
+ vpx_free(td->pc_tree);
+ td->pc_tree = NULL;
+ }
}
diff --git a/libvpx/vp9/encoder/vp9_context_tree.h b/libvpx/vp9/encoder/vp9_context_tree.h
index 73423c075..d2cdb1010 100644
--- a/libvpx/vp9/encoder/vp9_context_tree.h
+++ b/libvpx/vp9/encoder/vp9_context_tree.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_CONTEXT_TREE_H_
-#define VP9_ENCODER_VP9_CONTEXT_TREE_H_
+#ifndef VPX_VP9_ENCODER_VP9_CONTEXT_TREE_H_
+#define VPX_VP9_ENCODER_VP9_CONTEXT_TREE_H_
#include "vp9/common/vp9_blockd.h"
#include "vp9/encoder/vp9_block.h"
@@ -56,6 +56,7 @@ typedef struct {
// scope of refactoring.
int rate;
int64_t dist;
+ int64_t rdcost;
#if CONFIG_VP9_TEMPORAL_DENOISING
unsigned int newmv_sse;
@@ -75,6 +76,8 @@ typedef struct {
// Used for the machine learning-based early termination
int32_t sum_y_eobs;
+ // Skip certain ref frames during RD search of rectangular partitions.
+ uint8_t skip_ref_frame_mask;
} PICK_MODE_CONTEXT;
typedef struct PC_TREE {
@@ -97,4 +100,4 @@ void vp9_free_pc_tree(struct ThreadData *td);
} // extern "C"
#endif
-#endif /* VP9_ENCODER_VP9_CONTEXT_TREE_H_ */
+#endif // VPX_VP9_ENCODER_VP9_CONTEXT_TREE_H_
diff --git a/libvpx/vp9/encoder/vp9_cost.h b/libvpx/vp9/encoder/vp9_cost.h
index 70a1a2e0e..638d72a91 100644
--- a/libvpx/vp9/encoder/vp9_cost.h
+++ b/libvpx/vp9/encoder/vp9_cost.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_COST_H_
-#define VP9_ENCODER_VP9_COST_H_
+#ifndef VPX_VP9_ENCODER_VP9_COST_H_
+#define VPX_VP9_ENCODER_VP9_COST_H_
#include "vpx_dsp/prob.h"
#include "vpx/vpx_integer.h"
@@ -55,4 +55,4 @@ void vp9_cost_tokens_skip(int *costs, const vpx_prob *probs, vpx_tree tree);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_COST_H_
+#endif // VPX_VP9_ENCODER_VP9_COST_H_
diff --git a/libvpx/vp9/encoder/vp9_denoiser.c b/libvpx/vp9/encoder/vp9_denoiser.c
index b08ccaa66..2820b71b4 100644
--- a/libvpx/vp9/encoder/vp9_denoiser.c
+++ b/libvpx/vp9/encoder/vp9_denoiser.c
@@ -189,7 +189,7 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx,
int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv,
int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx,
- int use_svc, int spatial_layer) {
+ int use_svc, int spatial_layer, int use_gf_temporal_ref) {
const int sse_diff = (ctx->newmv_sse == UINT_MAX)
? 0
: ((int)ctx->zeromv_sse - (int)ctx->newmv_sse);
@@ -220,7 +220,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
// If the best reference frame uses inter-prediction and there is enough of a
// difference in sum-squared-error, use it.
if (frame != INTRA_FRAME && frame != ALTREF_FRAME &&
- (frame != GOLDEN_FRAME || num_spatial_layers == 1) &&
+ (frame != GOLDEN_FRAME || num_spatial_layers == 1 ||
+ use_gf_temporal_ref) &&
sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) {
mi->ref_frame[0] = ctx->best_reference_frame;
mi->mode = ctx->best_sse_inter_mode;
@@ -230,7 +231,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
frame = ctx->best_zeromv_reference_frame;
ctx->newmv_sse = ctx->zeromv_sse;
// Bias to last reference.
- if (num_spatial_layers > 1 || frame == ALTREF_FRAME ||
+ if ((num_spatial_layers > 1 && !use_gf_temporal_ref) ||
+ frame == ALTREF_FRAME ||
(frame != LAST_FRAME &&
((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) ||
denoiser->denoising_level >= kDenHigh))) {
@@ -261,6 +263,14 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
denoise_layer_idx = num_spatial_layers - spatial_layer - 1;
}
+ // Force copy (no denoise, copy source in denoised buffer) if
+ // running_avg_y[frame] is NULL.
+ if (denoiser->running_avg_y[frame].buffer_alloc == NULL) {
+ // Restore everything to its original state
+ *mi = saved_mi;
+ return COPY_BLOCK;
+ }
+
if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) {
// Restore everything to its original state
*mi = saved_mi;
@@ -326,7 +336,8 @@ static VP9_DENOISER_DECISION perform_motion_compensation(
void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
- VP9_DENOISER_DECISION *denoiser_decision) {
+ VP9_DENOISER_DECISION *denoiser_decision,
+ int use_gf_temporal_ref) {
int mv_col, mv_row;
int motion_magnitude = 0;
int zeromv_filter = 0;
@@ -349,6 +360,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
int is_skin = 0;
int increase_denoising = 0;
int consec_zeromv = 0;
+ int last_is_reference = cpi->ref_frame_flags & VP9_LAST_FLAG;
mv_col = ctx->best_sse_mv.as_mv.col;
mv_row = ctx->best_sse_mv.as_mv.row;
motion_magnitude = mv_row * mv_row + mv_col * mv_col;
@@ -379,7 +391,7 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
// zero/small motion in skin detection is high, i.e, > 4).
if (consec_zeromv < 4) {
i = ymis;
- j = xmis;
+ break;
}
}
}
@@ -392,12 +404,18 @@ void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
}
if (!is_skin && denoiser->denoising_level == kDenHigh) increase_denoising = 1;
- if (denoiser->denoising_level >= kDenLow && !ctx->sb_skip_denoising)
+ // Copy block if LAST_FRAME is not a reference.
+ // Last doesn't always exist when SVC layers are dynamically changed, e.g. top
+ // spatial layer doesn't have last reference when it's brought up for the
+ // first time on the fly.
+ if (last_is_reference && denoiser->denoising_level >= kDenLow &&
+ !ctx->sb_skip_denoising)
decision = perform_motion_compensation(
&cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
motion_magnitude, is_skin, &zeromv_filter, consec_zeromv,
cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx,
- cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id);
+ cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id,
+ use_gf_temporal_ref);
if (decision == FILTER_BLOCK) {
decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start,
@@ -445,16 +463,16 @@ static void swap_frame_buffer(YV12_BUFFER_CONFIG *const dest,
}
void vp9_denoiser_update_frame_info(
- VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
- int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
- int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
- int svc_base_is_key, int second_spatial_layer) {
+ VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, struct SVC *svc,
+ FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame,
+ int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx,
+ int resized, int svc_refresh_denoiser_buffers, int second_spatial_layer) {
const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0;
// Copy source into denoised reference buffers on KEY_FRAME or
// if the just encoded frame was resized. For SVC, copy source if the base
// spatial layer was key frame.
if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset ||
- svc_base_is_key) {
+ svc_refresh_denoiser_buffers) {
int i;
// Start at 1 so as not to overwrite the INTRA_FRAME
for (i = 1; i < denoiser->num_ref_frames; ++i) {
@@ -465,32 +483,43 @@ void vp9_denoiser_update_frame_info(
return;
}
- // If more than one refresh occurs, must copy frame buffer.
- if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) {
- if (refresh_alt_ref_frame) {
- copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],
- &denoiser->running_avg_y[INTRA_FRAME + shift]);
- }
- if (refresh_golden_frame) {
- copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],
- &denoiser->running_avg_y[INTRA_FRAME + shift]);
- }
- if (refresh_last_frame) {
- copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],
- &denoiser->running_avg_y[INTRA_FRAME + shift]);
+ if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->use_set_ref_frame_config) {
+ int i;
+ for (i = 0; i < REF_FRAMES; i++) {
+ if (svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i))
+ copy_frame(&denoiser->running_avg_y[i + 1 + shift],
+ &denoiser->running_avg_y[INTRA_FRAME + shift]);
}
} else {
- if (refresh_alt_ref_frame) {
- swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],
- &denoiser->running_avg_y[INTRA_FRAME + shift]);
- }
- if (refresh_golden_frame) {
- swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],
- &denoiser->running_avg_y[INTRA_FRAME + shift]);
- }
- if (refresh_last_frame) {
- swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],
- &denoiser->running_avg_y[INTRA_FRAME + shift]);
+ // If more than one refresh occurs, must copy frame buffer.
+ if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) >
+ 1) {
+ if (refresh_alt_ref_frame) {
+ copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],
+ &denoiser->running_avg_y[INTRA_FRAME + shift]);
+ }
+ if (refresh_golden_frame) {
+ copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],
+ &denoiser->running_avg_y[INTRA_FRAME + shift]);
+ }
+ if (refresh_last_frame) {
+ copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],
+ &denoiser->running_avg_y[INTRA_FRAME + shift]);
+ }
+ } else {
+ if (refresh_alt_ref_frame) {
+ swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift],
+ &denoiser->running_avg_y[INTRA_FRAME + shift]);
+ }
+ if (refresh_golden_frame) {
+ swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift],
+ &denoiser->running_avg_y[INTRA_FRAME + shift]);
+ }
+ if (refresh_last_frame) {
+ swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift],
+ &denoiser->running_avg_y[INTRA_FRAME + shift]);
+ }
}
}
}
@@ -539,26 +568,38 @@ static int vp9_denoiser_realloc_svc_helper(VP9_COMMON *cm,
}
int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,
- int svc_buf_shift, int refresh_alt,
- int refresh_gld, int refresh_lst, int alt_fb_idx,
- int gld_fb_idx, int lst_fb_idx) {
+ struct SVC *svc, int svc_buf_shift,
+ int refresh_alt, int refresh_gld, int refresh_lst,
+ int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) {
int fail = 0;
- if (refresh_alt) {
- // Increase the frame buffer index by 1 to map it to the buffer index in the
- // denoiser.
- fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
- alt_fb_idx + 1 + svc_buf_shift);
- if (fail) return 1;
- }
- if (refresh_gld) {
- fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
- gld_fb_idx + 1 + svc_buf_shift);
- if (fail) return 1;
- }
- if (refresh_lst) {
- fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
- lst_fb_idx + 1 + svc_buf_shift);
- if (fail) return 1;
+ if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->use_set_ref_frame_config) {
+ int i;
+ for (i = 0; i < REF_FRAMES; i++) {
+ if (cm->frame_type == KEY_FRAME ||
+ svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i)) {
+ fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
+ i + 1 + svc_buf_shift);
+ }
+ }
+ } else {
+ if (refresh_alt) {
+ // Increase the frame buffer index by 1 to map it to the buffer index in
+ // the denoiser.
+ fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
+ alt_fb_idx + 1 + svc_buf_shift);
+ if (fail) return 1;
+ }
+ if (refresh_gld) {
+ fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
+ gld_fb_idx + 1 + svc_buf_shift);
+ if (fail) return 1;
+ }
+ if (refresh_lst) {
+ fail = vp9_denoiser_realloc_svc_helper(cm, denoiser,
+ lst_fb_idx + 1 + svc_buf_shift);
+ if (fail) return 1;
+ }
}
return 0;
}
@@ -651,6 +692,7 @@ int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
denoiser->denoising_level = kDenLow;
denoiser->prev_denoising_level = kDenLow;
denoiser->reset = 0;
+ denoiser->current_denoiser_frame = 0;
return 0;
}
@@ -675,13 +717,29 @@ void vp9_denoiser_free(VP9_DENOISER *denoiser) {
vpx_free_frame_buffer(&denoiser->last_source);
}
-void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level) {
+static void force_refresh_longterm_ref(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ // If long term reference is used, force refresh of that slot, so
+ // denoiser buffer for long term reference stays in sync.
+ if (svc->use_gf_temporal_ref_current_layer) {
+ int index = svc->spatial_layer_id;
+ if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1;
+ assert(index >= 0);
+ cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx;
+ cpi->refresh_alt_ref_frame = 1;
+ }
+}
+
+void vp9_denoiser_set_noise_level(VP9_COMP *const cpi, int noise_level) {
+ VP9_DENOISER *const denoiser = &cpi->denoiser;
denoiser->denoising_level = noise_level;
if (denoiser->denoising_level > kDenLowLow &&
- denoiser->prev_denoising_level == kDenLowLow)
+ denoiser->prev_denoising_level == kDenLowLow) {
denoiser->reset = 1;
- else
+ force_refresh_longterm_ref(cpi);
+ } else {
denoiser->reset = 0;
+ }
denoiser->prev_denoising_level = denoiser->denoising_level;
}
@@ -713,6 +771,56 @@ int64_t vp9_scale_acskip_thresh(int64_t threshold,
return threshold;
}
+void vp9_denoiser_reset_on_first_frame(VP9_COMP *const cpi) {
+ if (vp9_denoise_svc_non_key(cpi) &&
+ cpi->denoiser.current_denoiser_frame == 0) {
+ cpi->denoiser.reset = 1;
+ force_refresh_longterm_ref(cpi);
+ }
+}
+
+void vp9_denoiser_update_ref_frame(VP9_COMP *const cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
+
+ if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
+ cpi->denoiser.denoising_level > kDenLowLow) {
+ int svc_refresh_denoiser_buffers = 0;
+ int denoise_svc_second_layer = 0;
+ FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type;
+ cpi->denoiser.current_denoiser_frame++;
+ if (cpi->use_svc) {
+ const int svc_buf_shift =
+ svc->number_spatial_layers - svc->spatial_layer_id == 2
+ ? cpi->denoiser.num_ref_frames
+ : 0;
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ svc_refresh_denoiser_buffers =
+ lc->is_key_frame || svc->spatial_layer_sync[svc->spatial_layer_id];
+ denoise_svc_second_layer =
+ svc->number_spatial_layers - svc->spatial_layer_id == 2 ? 1 : 0;
+ // Check if we need to allocate extra buffers in the denoiser
+ // for refreshed frames.
+ if (vp9_denoiser_realloc_svc(cm, &cpi->denoiser, svc, svc_buf_shift,
+ cpi->refresh_alt_ref_frame,
+ cpi->refresh_golden_frame,
+ cpi->refresh_last_frame, cpi->alt_fb_idx,
+ cpi->gld_fb_idx, cpi->lst_fb_idx))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to re-allocate denoiser for SVC");
+ }
+ vp9_denoiser_update_frame_info(
+ &cpi->denoiser, *cpi->Source, svc, frame_type,
+ cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame,
+ cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx,
+ cpi->lst_fb_idx, cpi->resize_pending, svc_refresh_denoiser_buffers,
+ denoise_svc_second_layer);
+ }
+}
+
#ifdef OUTPUT_YUV_DENOISED
static void make_grayscale(YV12_BUFFER_CONFIG *yuv) {
int r, c;
diff --git a/libvpx/vp9/encoder/vp9_denoiser.h b/libvpx/vp9/encoder/vp9_denoiser.h
index f4da24cbf..1973e9898 100644
--- a/libvpx/vp9/encoder/vp9_denoiser.h
+++ b/libvpx/vp9/encoder/vp9_denoiser.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_DENOISER_H_
-#define VP9_ENCODER_DENOISER_H_
+#ifndef VPX_VP9_ENCODER_VP9_DENOISER_H_
+#define VPX_VP9_ENCODER_VP9_DENOISER_H_
#include "vp9/encoder/vp9_block.h"
#include "vp9/encoder/vp9_skin_detection.h"
@@ -50,6 +50,7 @@ typedef struct vp9_denoiser {
int reset;
int num_ref_frames;
int num_layers;
+ unsigned int current_denoiser_frame;
VP9_DENOISER_LEVEL denoising_level;
VP9_DENOISER_LEVEL prev_denoising_level;
} VP9_DENOISER;
@@ -70,14 +71,15 @@ struct VP9_COMP;
struct SVC;
void vp9_denoiser_update_frame_info(
- VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, FRAME_TYPE frame_type,
- int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame,
- int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized,
- int svc_base_is_key, int second_spatial_layer);
+ VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, struct SVC *svc,
+ FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame,
+ int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx,
+ int resized, int svc_refresh_denoiser_buffers, int second_spatial_layer);
void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row,
int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx,
- VP9_DENOISER_DECISION *denoiser_decision);
+ VP9_DENOISER_DECISION *denoiser_decision,
+ int use_gf_temporal_ref);
void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx);
@@ -86,9 +88,9 @@ void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse,
PICK_MODE_CONTEXT *ctx);
int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser,
- int svc_buf_shift, int refresh_alt,
- int refresh_gld, int refresh_lst, int alt_fb_idx,
- int gld_fb_idx, int lst_fb_idx);
+ struct SVC *svc, int svc_buf_shift,
+ int refresh_alt, int refresh_gld, int refresh_lst,
+ int alt_fb_idx, int gld_fb_idx, int lst_fb_idx);
int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser,
int use_svc, int noise_sen, int width, int height,
@@ -110,7 +112,9 @@ static INLINE int total_adj_strong_thresh(BLOCK_SIZE bs,
void vp9_denoiser_free(VP9_DENOISER *denoiser);
-void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level);
+void vp9_denoiser_set_noise_level(struct VP9_COMP *const cpi, int noise_level);
+
+void vp9_denoiser_reset_on_first_frame(struct VP9_COMP *const cpi);
int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level,
int content_state, int temporal_layer_id);
@@ -119,8 +123,10 @@ int64_t vp9_scale_acskip_thresh(int64_t threshold,
VP9_DENOISER_LEVEL noise_level, int abs_sumdiff,
int temporal_layer_id);
+void vp9_denoiser_update_ref_frame(struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_ENCODER_DENOISER_H_
+#endif // VPX_VP9_ENCODER_VP9_DENOISER_H_
diff --git a/libvpx/vp9/encoder/vp9_encodeframe.c b/libvpx/vp9/encoder/vp9_encodeframe.c
index 682477df1..5adefac1a 100644
--- a/libvpx/vp9/encoder/vp9_encodeframe.c
+++ b/libvpx/vp9/encoder/vp9_encodeframe.c
@@ -42,6 +42,8 @@
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_ethread.h"
#include "vp9/encoder/vp9_extend.h"
+#include "vp9/encoder/vp9_multi_thread.h"
+#include "vp9/encoder/vp9_partition_models.h"
#include "vp9/encoder/vp9_pickmode.h"
#include "vp9/encoder/vp9_rd.h"
#include "vp9/encoder/vp9_rdopt.h"
@@ -52,33 +54,6 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
int output_enabled, int mi_row, int mi_col,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx);
-// Machine learning-based early termination parameters.
-static const double train_mean[24] = {
- 303501.697372, 3042630.372158, 24.694696, 1.392182,
- 689.413511, 162.027012, 1.478213, 0.0,
- 135382.260230, 912738.513263, 28.845217, 1.515230,
- 544.158492, 131.807995, 1.436863, 0.0,
- 43682.377587, 208131.711766, 28.084737, 1.356677,
- 138.254122, 119.522553, 1.252322, 0.0
-};
-
-static const double train_stdm[24] = {
- 673689.212982, 5996652.516628, 0.024449, 1.989792,
- 985.880847, 0.014638, 2.001898, 0.0,
- 208798.775332, 1812548.443284, 0.018693, 1.838009,
- 396.986910, 0.015657, 1.332541, 0.0,
- 55888.847031, 448587.962714, 0.017900, 1.904776,
- 98.652832, 0.016598, 1.320992, 0.0
-};
-
-// Error tolerance: 0.01%-0.0.05%-0.1%
-static const double classifiers[24] = {
- 0.111736, 0.289977, 0.042219, 0.204765, 0.120410, -0.143863,
- 0.282376, 0.847811, 0.637161, 0.131570, 0.018636, 0.202134,
- 0.112797, 0.028162, 0.182450, 1.124367, 0.386133, 0.083700,
- 0.050028, 0.150873, 0.061119, 0.109318, 0.127255, 0.625211
-};
-
// This is used as a reference when computing the source variance for the
// purpose of activity masking.
// Eventually this should be replaced by custom no-reference routines,
@@ -205,6 +180,64 @@ static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
return BLOCK_8X8;
}
+static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, int segment_index) {
+ VP9_COMMON *const cm = &cpi->common;
+ const struct segmentation *const seg = &cm->seg;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *mi = xd->mi[0];
+
+ const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
+ const uint8_t *const map =
+ seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
+
+ // Initialize the segmentation index as 0.
+ mi->segment_id = 0;
+
+ // Skip the rest if AQ mode is disabled.
+ if (!seg->enabled) return;
+
+ switch (aq_mode) {
+ case CYCLIC_REFRESH_AQ:
+ mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ break;
+ case VARIANCE_AQ:
+ if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
+ cpi->force_update_segmentation ||
+ (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
+ int min_energy;
+ int max_energy;
+ // Get sub block energy range
+ if (bsize >= BLOCK_32X32) {
+ vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
+ &max_energy);
+ } else {
+ min_energy = bsize <= BLOCK_16X16 ? x->mb_energy
+ : vp9_block_energy(cpi, x, bsize);
+ }
+ mi->segment_id = vp9_vaq_segment_id(min_energy);
+ } else {
+ mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ }
+ break;
+ case LOOKAHEAD_AQ:
+ mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ break;
+ case EQUATOR360_AQ:
+ if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation)
+ mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows);
+ else
+ mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
+ break;
+ case PSNR_AQ: mi->segment_id = segment_index; break;
+ default:
+ // NO_AQ or PSNR_AQ
+ break;
+ }
+
+ vp9_init_plane_quantizers(cpi, x);
+}
+
// Lighter version of set_offsets that only sets the mode info
// pointers.
static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
@@ -222,18 +255,14 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
BLOCK_SIZE bsize) {
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
- MODE_INFO *mi;
const int mi_width = num_8x8_blocks_wide_lookup[bsize];
const int mi_height = num_8x8_blocks_high_lookup[bsize];
- const struct segmentation *const seg = &cm->seg;
MvLimits *const mv_limits = &x->mv_limits;
set_skip_context(xd, mi_row, mi_col);
set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
- mi = xd->mi[0];
-
// Set up destination pointers.
vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
@@ -256,22 +285,6 @@ static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
x->rddiv = cpi->rd.RDDIV;
x->rdmult = cpi->rd.RDMULT;
- // Setup segment ID.
- if (seg->enabled) {
- if (cpi->oxcf.aq_mode != VARIANCE_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ &&
- cpi->oxcf.aq_mode != EQUATOR360_AQ) {
- const uint8_t *const map =
- seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
- mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
- }
- vp9_init_plane_quantizers(cpi, x);
-
- x->encode_breakout = cpi->segment_encode_breakout[mi->segment_id];
- } else {
- mi->segment_id = 0;
- x->encode_breakout = cpi->encode_breakout;
- }
-
// required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
xd->tile = *tile;
}
@@ -385,16 +398,13 @@ static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
node->split[i] = &vt->split[i].part_variances.none;
break;
}
- case BLOCK_4X4: {
+ default: {
v4x4 *vt = (v4x4 *)data;
+ assert(bsize == BLOCK_4X4);
node->part_variances = &vt->part_variances;
for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
break;
}
- default: {
- assert(0);
- break;
- }
}
}
@@ -408,7 +418,8 @@ static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
static void get_variance(var *v) {
v->variance =
(int)(256 * (v->sum_square_error -
- ((v->sum_error * v->sum_error) >> v->log2_count)) >>
+ (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
+ v->log2_count)) >>
v->log2_count);
}
@@ -450,7 +461,7 @@ static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
// No check for vert/horiz split as too few samples for variance.
if (bsize == bsize_min) {
// Variance already computed to set the force_split.
- if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
+ if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
if (mi_col + block_width / 2 < cm->mi_cols &&
mi_row + block_height / 2 < cm->mi_rows &&
vt.part_variances->none.variance < threshold) {
@@ -460,9 +471,9 @@ static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x,
return 0;
} else if (bsize > bsize_min) {
// Variance already computed to set the force_split.
- if (cm->frame_type == KEY_FRAME) get_variance(&vt.part_variances->none);
+ if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
// For key frame: take split for bsize above 32X32 or very high variance.
- if (cm->frame_type == KEY_FRAME &&
+ if (frame_is_intra_only(cm) &&
(bsize > BLOCK_32X32 ||
vt.part_variances->none.variance > (threshold << 4))) {
return 0;
@@ -534,7 +545,7 @@ static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
int content_state) {
VP9_COMMON *const cm = &cpi->common;
- const int is_key_frame = (cm->frame_type == KEY_FRAME);
+ const int is_key_frame = frame_is_intra_only(cm);
const int threshold_multiplier = is_key_frame ? 20 : 1;
int64_t threshold_base =
(int64_t)(threshold_multiplier * cpi->y_dequant[q][1]);
@@ -586,6 +597,7 @@ static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q,
} else {
thresholds[1] = (5 * threshold_base) >> 1;
}
+ if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX;
}
}
@@ -593,7 +605,7 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
int content_state) {
VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
- const int is_key_frame = (cm->frame_type == KEY_FRAME);
+ const int is_key_frame = frame_is_intra_only(cm);
if (sf->partition_search_type != VAR_BASED_PARTITION &&
sf->partition_search_type != REFERENCE_PARTITION) {
return;
@@ -620,6 +632,11 @@ void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q,
cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000
? (cpi->y_dequant[q][1] << 3)
: 8000;
+ if (cpi->rc.high_source_sad ||
+ (cpi->use_svc && cpi->svc.high_source_sad_superframe)) {
+ cpi->vbp_threshold_sad = 0;
+ cpi->vbp_threshold_copy = 0;
+ }
}
cpi->vbp_threshold_minmax = 15 + (q >> 3);
}
@@ -885,13 +902,13 @@ static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x,
set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize);
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col);
copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col);
copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs);
copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs);
break;
- default: assert(0);
}
}
}
@@ -951,7 +968,9 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
PARTITION_TYPE partition_high;
if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0;
- if (mi_row >= (cm->mi_rows >> 1) || mi_col >= (cm->mi_cols >> 1)) return 0;
+ if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] ||
+ mi_col >= svc->mi_cols[svc->spatial_layer_id - 1])
+ return 0;
// Find corresponding (mi_col/mi_row) block down-scaled by 2x2.
start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col;
@@ -1004,7 +1023,8 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high,
subsize_high);
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition_high == PARTITION_SPLIT);
if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col,
mi_row_high, mi_col_high))
return 1;
@@ -1020,7 +1040,6 @@ static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
mi_col_high + bs_high))
return 1;
break;
- default: assert(0);
}
}
@@ -1067,13 +1086,13 @@ static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
prev_part[start_pos] = subsize;
if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
update_partition_svc(cpi, subsize, mi_row, mi_col);
update_partition_svc(cpi, subsize, mi_row + bs, mi_col);
update_partition_svc(cpi, subsize, mi_row, mi_col + bs);
update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs);
break;
- default: assert(0);
}
}
}
@@ -1108,13 +1127,13 @@ static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize,
prev_part[start_pos] = subsize;
if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize;
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
update_prev_partition_helper(cpi, subsize, mi_row, mi_col);
update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col);
update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs);
update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs);
break;
- default: assert(0);
}
}
}
@@ -1206,6 +1225,7 @@ static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift,
cpi->content_state_sb_fd[sb_offset] = 0;
}
}
+ if (tmp_sad == 0) x->zero_temp_sad_source = 1;
return tmp_sad;
}
@@ -1241,21 +1261,38 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
int pixels_wide = 64, pixels_high = 64;
int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] };
+ int scene_change_detected =
+ cpi->rc.high_source_sad ||
+ (cpi->use_svc && cpi->svc.high_source_sad_superframe);
// For the variance computation under SVC mode, we treat the frame as key if
// the reference (base layer frame) is key frame (i.e., is_key_frame == 1).
- const int is_key_frame =
- (cm->frame_type == KEY_FRAME ||
+ int is_key_frame =
+ (frame_is_intra_only(cm) ||
(is_one_pass_cbr_svc(cpi) &&
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
// Always use 4x4 partition for key frame.
- const int use_4x4_partition = cm->frame_type == KEY_FRAME;
+ const int use_4x4_partition = frame_is_intra_only(cm);
const int low_res = (cm->width <= 352 && cm->height <= 288);
int variance4x4downsample[16];
int segment_id;
int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3);
+ // For SVC: check if LAST frame is NULL or if the resolution of LAST is
+ // different than the current frame resolution, and if so, treat this frame
+ // as a key frame, for the purpose of the superblock partitioning.
+ // LAST == NULL can happen in some cases where enhancement spatial layers are
+ // enabled dyanmically in the stream and the only reference is the spatial
+ // reference (GOLDEN).
+ if (cpi->use_svc) {
+ const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, LAST_FRAME);
+ if (ref == NULL || ref->y_crop_height != cm->height ||
+ ref->y_crop_width != cm->width)
+ is_key_frame = 1;
+ }
+
set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
+ set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0);
segment_id = xd->mi[0]->segment_id;
if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame))
@@ -1289,6 +1326,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
// If source_sad is low copy the partition without computing the y_sad.
if (x->skip_low_source_sad && cpi->sf.copy_partition_flag &&
+ !scene_change_detected &&
copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) {
x->sb_use_mv_part = 1;
if (cpi->sf.svc_use_lowres_part &&
@@ -1317,7 +1355,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
// Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
// 5-20 for the 16x16 blocks.
- force_split[0] = 0;
+ force_split[0] = scene_change_detected;
if (!is_key_frame) {
// In the case of spatial/temporal scalable coding, the assumption here is
@@ -1333,7 +1371,8 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
assert(yv12 != NULL);
- if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) {
+ if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
+ cpi->svc.use_gf_temporal_ref_current_layer) {
// For now, GOLDEN will not be used for non-zero spatial layers, since
// it may not be a temporal reference.
yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
@@ -1374,10 +1413,26 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
xd->plane[0].pre[0].stride);
} else {
- y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
+ const MV dummy_mv = { 0, 0 };
+ y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
+ &dummy_mv);
x->sb_use_mv_part = 1;
x->sb_mvcol_part = mi->mv[0].as_mv.col;
x->sb_mvrow_part = mi->mv[0].as_mv.row;
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
+ cpi->svc.spatial_layer_id == 0 &&
+ cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source &&
+ cm->width > 640 && cm->height > 480) {
+ // Disable split below 16x16 block size when scroll motion is detected.
+ // TODO(marpan/jianj): Improve this condition: issue is that search
+ // range is hard-coded/limited in vp9_int_pro_motion_estimation() so
+ // scroll motion may not be detected here.
+ if ((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) ||
+ y_sad < 100000) {
+ compute_minmax_variance = 0;
+ thresholds[2] = INT64_MAX;
+ }
+ }
}
y_sad_last = y_sad;
@@ -1513,9 +1568,9 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
}
}
- if (is_key_frame || (low_res &&
- vt.split[i].split[j].part_variances.none.variance >
- threshold_4x4avg)) {
+ if (is_key_frame ||
+ (low_res && vt.split[i].split[j].part_variances.none.variance >
+ threshold_4x4avg)) {
force_split[split_index] = 0;
// Go down to 4x4 down-sampling for variance.
variance4x4downsample[i2 + j] = 1;
@@ -1648,11 +1703,11 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
}
}
- if (cm->frame_type != KEY_FRAME && cpi->sf.copy_partition_flag) {
+ if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) {
update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset);
}
- if (cm->frame_type != KEY_FRAME && cpi->sf.svc_use_lowres_part &&
+ if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2)
update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col);
@@ -1836,14 +1891,33 @@ static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
vp9_rd_cost_init(rd_cost);
}
-static int set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
- int8_t segment_id) {
+static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ AQ_MODE aq_mode) {
int segment_qindex;
VP9_COMMON *const cm = &cpi->common;
+ const uint8_t *const map =
+ cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
+
vp9_init_plane_quantizers(cpi, x);
vpx_clear_system_state();
- segment_qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
- return vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
+ segment_qindex =
+ vp9_get_qindex(&cm->seg, x->e_mbd.mi[0]->segment_id, cm->base_qindex);
+
+ if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) {
+ if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
+ return;
+ }
+
+ if (aq_mode == CYCLIC_REFRESH_AQ) {
+ // If segment is boosted, use rdmult for that segment.
+ if (cyclic_refresh_segment_id_boosted(
+ get_segment_id(cm, map, bsize, mi_row, mi_col)))
+ x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
+ return;
+ }
+
+ x->rdmult = vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
}
static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
@@ -1914,44 +1988,8 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt;
}
- if (aq_mode == VARIANCE_AQ) {
- const int energy =
- bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize);
-
- if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame ||
- cpi->force_update_segmentation ||
- (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
- mi->segment_id = vp9_vaq_segment_id(energy);
- } else {
- const uint8_t *const map =
- cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
- mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
- }
- x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
- } else if (aq_mode == LOOKAHEAD_AQ) {
- const uint8_t *const map = cpi->segmentation_map;
-
- // I do not change rdmult here consciously.
- mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
- } else if (aq_mode == EQUATOR360_AQ) {
- if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) {
- mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows);
- } else {
- const uint8_t *const map =
- cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
- mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
- }
- x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
- } else if (aq_mode == COMPLEXITY_AQ) {
- x->rdmult = set_segment_rdmult(cpi, x, mi->segment_id);
- } else if (aq_mode == CYCLIC_REFRESH_AQ) {
- const uint8_t *const map =
- cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
- // If segment is boosted, use rdmult for that segment.
- if (cyclic_refresh_segment_id_boosted(
- get_segment_id(cm, map, bsize, mi_row, mi_col)))
- x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
- }
+ set_segment_index(cpi, x, mi_row, mi_col, bsize, 0);
+ set_segment_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode);
// Find best coding mode & reconstruct the MB so it is available
// as a predictor for MBs that follow in the SB
@@ -1979,11 +2017,14 @@ static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
}
- x->rdmult = orig_rdmult;
-
// TODO(jingning) The rate-distortion optimization flow needs to be
// refactored to provide proper exit/return handle.
- if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
+ if (rd_cost->rate == INT_MAX)
+ rd_cost->rdcost = INT64_MAX;
+ else
+ rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
+
+ x->rdmult = orig_rdmult;
ctx->rate = rd_cost->rate;
ctx->dist = rd_cost->dist;
@@ -2013,8 +2054,10 @@ static void update_stats(VP9_COMMON *cm, ThreadData *td) {
[has_second_ref(mi)]++;
if (has_second_ref(mi)) {
- counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
- [ref0 == GOLDEN_FRAME]++;
+ const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref];
+ const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd);
+ const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1];
+ counts->comp_ref[ctx][bit]++;
} else {
counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
[ref0 != LAST_FRAME]++;
@@ -2110,6 +2153,10 @@ static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td,
PICK_MODE_CONTEXT *ctx) {
MACROBLOCK *const x = &td->mb;
set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
+
+ if (cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ)
+ x->rdmult = x->cb_rdmult;
+
update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
@@ -2168,7 +2215,8 @@ static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile,
subsize, &pc_tree->horizontal[1]);
}
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
if (bsize == BLOCK_8X8) {
encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
pc_tree->leaf_split[0]);
@@ -2183,7 +2231,6 @@ static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile,
subsize, pc_tree->split[3]);
}
break;
- default: assert(0 && "Invalid partition type."); break;
}
if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
@@ -2441,7 +2488,7 @@ static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
}
x->skip = ctx->skip;
- x->skip_txfm[0] = mi->segment_id ? 0 : ctx->skip_txfm[0];
+ x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0];
}
static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
@@ -2509,7 +2556,8 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
subsize, &pc_tree->horizontal[1]);
}
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
subsize = get_subsize(bsize, PARTITION_SPLIT);
encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
pc_tree->split[0]);
@@ -2520,7 +2568,6 @@ static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs,
output_enabled, subsize, pc_tree->split[3]);
break;
- default: assert(0 && "Invalid partition type."); break;
}
if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
@@ -2617,6 +2664,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
ctx, INT64_MAX);
break;
case PARTITION_HORZ:
+ pc_tree->horizontal[0].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
subsize, &pc_tree->horizontal[0], INT64_MAX);
if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
@@ -2626,6 +2674,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
vp9_rd_cost_init(&tmp_rdc);
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+ pc_tree->horizontal[1].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col,
&tmp_rdc, subsize, &pc_tree->horizontal[1], INT64_MAX);
if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
@@ -2638,6 +2687,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
}
break;
case PARTITION_VERT:
+ pc_tree->vertical[0].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
subsize, &pc_tree->vertical[0], INT64_MAX);
if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
@@ -2647,6 +2697,7 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
vp9_rd_cost_init(&tmp_rdc);
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
+ pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1),
&tmp_rdc, subsize,
&pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX);
@@ -2659,7 +2710,8 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
last_part_rdc.rdcost += tmp_rdc.rdcost;
}
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
if (bsize == BLOCK_8X8) {
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
subsize, pc_tree->leaf_split[0], INT64_MAX);
@@ -2689,7 +2741,6 @@ static void rd_use_partition(VP9_COMP *cpi, ThreadData *td,
last_part_rdc.dist += tmp_rdc.dist;
}
break;
- default: assert(0); break;
}
pl = partition_plane_context(xd, mi_row, mi_col, bsize);
@@ -3018,14 +3069,59 @@ static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
}
#endif
-// Calculate the score used in machine-learning based partition search early
-// termination.
-static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd,
- PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
- BLOCK_SIZE bsize) {
- const double *clf;
- const double *mean;
- const double *sd;
+// Calculate prediction based on the given input features and neural net config.
+// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
+// layer.
+static void nn_predict(const float *features, const NN_CONFIG *nn_config,
+ float *output) {
+ int num_input_nodes = nn_config->num_inputs;
+ int buf_index = 0;
+ float buf[2][NN_MAX_NODES_PER_LAYER];
+ const float *input_nodes = features;
+
+ // Propagate hidden layers.
+ const int num_layers = nn_config->num_hidden_layers;
+ int layer, node, i;
+ assert(num_layers <= NN_MAX_HIDDEN_LAYERS);
+ for (layer = 0; layer < num_layers; ++layer) {
+ const float *weights = nn_config->weights[layer];
+ const float *bias = nn_config->bias[layer];
+ float *output_nodes = buf[buf_index];
+ const int num_output_nodes = nn_config->num_hidden_nodes[layer];
+ assert(num_output_nodes < NN_MAX_NODES_PER_LAYER);
+ for (node = 0; node < num_output_nodes; ++node) {
+ float val = 0.0f;
+ for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i];
+ val += bias[node];
+ // ReLU as activation function.
+ val = VPXMAX(val, 0.0f);
+ output_nodes[node] = val;
+ weights += num_input_nodes;
+ }
+ num_input_nodes = num_output_nodes;
+ input_nodes = output_nodes;
+ buf_index = 1 - buf_index;
+ }
+
+ // Final output layer.
+ {
+ const float *weights = nn_config->weights[num_layers];
+ for (node = 0; node < nn_config->num_outputs; ++node) {
+ const float *bias = nn_config->bias[num_layers];
+ float val = 0.0f;
+ for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i];
+ output[node] = val + bias[node];
+ weights += num_input_nodes;
+ }
+ }
+}
+
+#define FEATURES 7
+// Machine-learning based partition search early termination.
+// Return 1 to skip split and rect partitions.
+static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd,
+ PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
+ BLOCK_SIZE bsize) {
const int mag_mv =
abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row);
const int left_in_image = !!xd->left_mi;
@@ -3035,11 +3131,32 @@ static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd,
int above_par = 0; // above_partitioning
int left_par = 0; // left_partitioning
int last_par = 0; // last_partitioning
- BLOCK_SIZE context_size;
- double score;
int offset = 0;
+ int i;
+ BLOCK_SIZE context_size;
+ const NN_CONFIG *nn_config = NULL;
+ const float *mean, *sd, *linear_weights;
+ float nn_score, linear_score;
+ float features[FEATURES];
assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]);
+ vpx_clear_system_state();
+
+ switch (bsize) {
+ case BLOCK_64X64:
+ offset = 0;
+ nn_config = &vp9_partition_nnconfig_64x64;
+ break;
+ case BLOCK_32X32:
+ offset = 8;
+ nn_config = &vp9_partition_nnconfig_32x32;
+ break;
+ case BLOCK_16X16:
+ offset = 16;
+ nn_config = &vp9_partition_nnconfig_16x16;
+ break;
+ default: assert(0 && "Unexpected block size."); return 0;
+ }
if (above_in_image) {
context_size = xd->above_mi->sb_type;
@@ -3065,25 +3182,458 @@ static double compute_score(VP9_COMMON *const cm, MACROBLOCKD *const xd,
last_par = 1;
}
- if (bsize == BLOCK_64X64)
- offset = 0;
- else if (bsize == BLOCK_32X32)
- offset = 8;
- else if (bsize == BLOCK_16X16)
- offset = 16;
-
- // early termination score calculation
- clf = &classifiers[offset];
- mean = &train_mean[offset];
- sd = &train_stdm[offset];
- score = clf[0] * (((double)ctx->rate - mean[0]) / sd[0]) +
- clf[1] * (((double)ctx->dist - mean[1]) / sd[1]) +
- clf[2] * (((double)mag_mv / 2 - mean[2]) * sd[2]) +
- clf[3] * (((double)(left_par + above_par) / 2 - mean[3]) * sd[3]) +
- clf[4] * (((double)ctx->sum_y_eobs - mean[4]) / sd[4]) +
- clf[5] * (((double)cm->base_qindex - mean[5]) * sd[5]) +
- clf[6] * (((double)last_par - mean[6]) * sd[6]) + clf[7];
- return score;
+ mean = &vp9_partition_feature_mean[offset];
+ sd = &vp9_partition_feature_std[offset];
+ features[0] = ((float)ctx->rate - mean[0]) / sd[0];
+ features[1] = ((float)ctx->dist - mean[1]) / sd[1];
+ features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2];
+ features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3];
+ features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4];
+ features[5] = ((float)cm->base_qindex - mean[5]) * sd[5];
+ features[6] = ((float)last_par - mean[6]) * sd[6];
+
+ // Predict using linear model.
+ linear_weights = &vp9_partition_linear_weights[offset];
+ linear_score = linear_weights[FEATURES];
+ for (i = 0; i < FEATURES; ++i)
+ linear_score += linear_weights[i] * features[i];
+ if (linear_score > 0.1f) return 0;
+
+ // Predict using neural net model.
+ nn_predict(features, nn_config, &nn_score);
+
+ if (linear_score < -0.0f && nn_score < 0.1f) return 1;
+ if (nn_score < -0.0f && linear_score < 0.1f) return 1;
+ return 0;
+}
+#undef FEATURES
+
+#define FEATURES 4
+// ML-based partition search breakout.
+static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize,
+ const MACROBLOCK *const x,
+ const RD_COST *const rd_cost) {
+ DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 };
+ const VP9_COMMON *const cm = &cpi->common;
+ float features[FEATURES];
+ const float *linear_weights = NULL; // Linear model weights.
+ float linear_score = 0.0f;
+ const int qindex = cm->base_qindex;
+ const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2);
+ const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720;
+ const int resolution_ctx = is_720p_or_larger ? 1 : 0;
+
+ switch (bsize) {
+ case BLOCK_64X64:
+ linear_weights = vp9_partition_breakout_weights_64[resolution_ctx][q_ctx];
+ break;
+ case BLOCK_32X32:
+ linear_weights = vp9_partition_breakout_weights_32[resolution_ctx][q_ctx];
+ break;
+ case BLOCK_16X16:
+ linear_weights = vp9_partition_breakout_weights_16[resolution_ctx][q_ctx];
+ break;
+ case BLOCK_8X8:
+ linear_weights = vp9_partition_breakout_weights_8[resolution_ctx][q_ctx];
+ break;
+ default: assert(0 && "Unexpected block size."); return 0;
+ }
+ if (!linear_weights) return 0;
+
+ { // Generate feature values.
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int ac_q =
+ vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
+#else
+ const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ const int num_pels_log2 = num_pels_log2_lookup[bsize];
+ int feature_index = 0;
+ unsigned int var, sse;
+ float rate_f, dist_f;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ var =
+ vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd);
+ } else {
+ var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp9_64_zeros, 0, &sse);
+ }
+#else
+ var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ vp9_64_zeros, 0, &sse);
+#endif
+ var = var >> num_pels_log2;
+
+ vpx_clear_system_state();
+
+ rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX);
+ dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2);
+ rate_f =
+ ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
+ rate_f;
+
+ features[feature_index++] = rate_f;
+ features[feature_index++] = dist_f;
+ features[feature_index++] = (float)var;
+ features[feature_index++] = (float)ac_q;
+ assert(feature_index == FEATURES);
+ }
+
+ { // Calculate the output score.
+ int i;
+ linear_score = linear_weights[FEATURES];
+ for (i = 0; i < FEATURES; ++i)
+ linear_score += linear_weights[i] * features[i];
+ }
+
+ return linear_score >= cpi->sf.ml_partition_search_breakout_thresh[q_ctx];
+}
+#undef FEATURES
+
+#define FEATURES 17
+#define LABELS 4
+static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x,
+ BLOCK_SIZE bsize,
+ const PC_TREE *const pc_tree,
+ int *allow_horz, int *allow_vert,
+ int64_t ref_rd, int mi_row, int mi_col) {
+ const NN_CONFIG *nn_config = NULL;
+ float score[LABELS] = {
+ 0.0f,
+ };
+ int thresh = -1;
+ int i;
+
+ if (ref_rd <= 0 || ref_rd > 1000000000) return;
+
+ switch (bsize) {
+ case BLOCK_8X8: break;
+ case BLOCK_16X16:
+ nn_config = &vp9_rect_part_nnconfig_16;
+ thresh = cpi->sf.ml_prune_rect_partition_threhold[1];
+ break;
+ case BLOCK_32X32:
+ nn_config = &vp9_rect_part_nnconfig_32;
+ thresh = cpi->sf.ml_prune_rect_partition_threhold[2];
+ break;
+ case BLOCK_64X64:
+ nn_config = &vp9_rect_part_nnconfig_64;
+ thresh = cpi->sf.ml_prune_rect_partition_threhold[3];
+ break;
+ default: assert(0 && "Unexpected block size."); return;
+ }
+ if (!nn_config || thresh < 0) return;
+
+ // Feature extraction and model score calculation.
+ {
+ const int64_t none_rdcost = pc_tree->none.rdcost;
+ const VP9_COMMON *const cm = &cpi->common;
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int dc_q =
+ vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8);
+#else
+ const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ int feature_index = 0;
+ unsigned int block_var = 0;
+ unsigned int sub_block_var[4] = { 0 };
+ float features[FEATURES];
+
+ features[feature_index++] =
+ (float)(pc_tree->partitioning == PARTITION_NONE);
+ features[feature_index++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
+
+ // Calculate source pixel variance.
+ {
+ struct buf_2d buf;
+ const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+ const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+
+ (void)xd;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ block_var = vp9_high_get_sby_perpixel_variance(cpi, &x->plane[0].src,
+ bsize, xd->bd);
+ } else {
+ block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+ }
+#else
+ block_var = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ buf.stride = x->plane[0].src.stride;
+ for (i = 0; i < 4; ++i) {
+ const int x_idx = (i & 1) * bs / 2;
+ const int y_idx = (i >> 1) * bs / 2;
+ buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ sub_block_var[i] =
+ vp9_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
+ } else {
+ sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize);
+ }
+#else
+ sub_block_var[i] = vp9_get_sby_perpixel_variance(cpi, &buf, subsize);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+ }
+
+ features[feature_index++] = logf((float)block_var + 1.0f);
+ features[feature_index++] = logf((float)ref_rd + 1.0f);
+ features[feature_index++] = (none_rdcost > 0 && none_rdcost < 1000000000)
+ ? (float)pc_tree->none.skippable
+ : 0.0f;
+
+ for (i = 0; i < 4; ++i) {
+ const int64_t this_rd = pc_tree->split[i]->none.rdcost;
+ const int rd_valid = this_rd > 0 && this_rd < 1000000000;
+ // Ratio between sub-block RD and whole block RD.
+ features[feature_index++] =
+ rd_valid ? ((float)this_rd / (float)ref_rd) : 1.0f;
+ // Sub-block skippable.
+ features[feature_index++] =
+ rd_valid ? ((float)pc_tree->split[i]->none.skippable) : 0.0f;
+ }
+
+ {
+ const float denom = (float)(block_var + 1);
+ const float low_b = 0.1f;
+ const float high_b = 10.0f;
+ for (i = 0; i < 4; ++i) {
+ // Ratio between the quarter sub-block variance and the
+ // whole-block variance.
+ float var_ratio = (float)(sub_block_var[i] + 1) / denom;
+ if (var_ratio < low_b) var_ratio = low_b;
+ if (var_ratio > high_b) var_ratio = high_b;
+ features[feature_index++] = var_ratio;
+ }
+ }
+ assert(feature_index == FEATURES);
+ nn_predict(features, nn_config, score);
+ }
+
+ // Make decisions based on the model score.
+ {
+ int max_score = -1000;
+ int horz = 0, vert = 0;
+ int int_score[LABELS];
+ for (i = 0; i < LABELS; ++i) {
+ int_score[i] = (int)(100 * score[i]);
+ max_score = VPXMAX(int_score[i], max_score);
+ }
+ thresh = max_score - thresh;
+ for (i = 0; i < LABELS; ++i) {
+ if (int_score[i] >= thresh) {
+ if ((i >> 0) & 1) horz = 1;
+ if ((i >> 1) & 1) vert = 1;
+ }
+ }
+ *allow_horz = *allow_horz && horz;
+ *allow_vert = *allow_vert && vert;
+ }
+}
+#undef FEATURES
+#undef LABELS
+
+// Use a neural net model to prune partition-none and partition-split search.
+// The model uses prediction residue variance and quantization step size as
+// input features.
+#define FEATURES 6
+static void ml_predict_var_rd_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row,
+ int mi_col, int *none, int *split) {
+ VP9_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *xd = &x->e_mbd;
+ MODE_INFO *mi = xd->mi[0];
+ const NN_CONFIG *nn_config = NULL;
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]);
+ uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ ? (CONVERT_TO_BYTEPTR(pred_buffer))
+ : pred_buffer;
+#else
+ DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]);
+ uint8_t *const pred_buf = pred_buffer;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ const int speed = cpi->oxcf.speed;
+ int i;
+ float thresh = 0.0f;
+
+ switch (bsize) {
+ case BLOCK_64X64:
+ nn_config = &vp9_var_rd_part_nnconfig_64;
+ thresh = speed > 0 ? 3.5f : 3.0f;
+ break;
+ case BLOCK_32X32:
+ nn_config = &vp9_var_rd_part_nnconfig_32;
+ thresh = speed > 0 ? 3.5f : 3.0f;
+ break;
+ case BLOCK_16X16:
+ nn_config = &vp9_var_rd_part_nnconfig_16;
+ thresh = speed > 0 ? 3.5f : 4.0f;
+ break;
+ case BLOCK_8X8:
+ nn_config = &vp9_var_rd_part_nnconfig_8;
+ if (cm->width >= 720 && cm->height >= 720)
+ thresh = speed > 0 ? 2.5f : 2.0f;
+ else
+ thresh = speed > 0 ? 3.5f : 2.0f;
+ break;
+ default: assert(0 && "Unexpected block size."); return;
+ }
+
+ if (!nn_config) return;
+
+ mi->ref_frame[1] = NONE;
+ mi->sb_type = bsize;
+ // Do a simple single motion search to find a prediction for current block.
+ // The variance of the residue will be used as input features.
+ {
+ const MV_REFERENCE_FRAME ref =
+ cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME;
+ YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref);
+ MV ref_mv = { 0, 0 };
+ MV ref_mv_full = { 0, 0 };
+ const int step_param = 1;
+ const MvLimits tmp_mv_limits = x->mv_limits;
+ const SEARCH_METHODS search_method = NSTEP;
+ const int sadpb = x->sadperbit16;
+ MV best_mv = { 0, 0 };
+ int cost_list[5];
+
+ assert(yv12 != NULL);
+ if (!yv12) return;
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[ref - 1].sf);
+ mi->ref_frame[0] = ref;
+ vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
+ vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param,
+ search_method, sadpb, cond_cost_list(cpi, cost_list),
+ &ref_mv, &best_mv, 0, 0);
+ best_mv.row *= 8;
+ best_mv.col *= 8;
+ x->mv_limits = tmp_mv_limits;
+ mi->mv[0].as_mv = best_mv;
+
+ set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
+ xd->plane[0].dst.buf = pred_buf;
+ xd->plane[0].dst.stride = 64;
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ }
+
+ vpx_clear_system_state();
+
+ {
+ float features[FEATURES] = { 0.0f };
+#if CONFIG_VP9_HIGHBITDEPTH
+ const int dc_q =
+ vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8);
+#else
+ const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ int feature_idx = 0;
+ float score;
+
+ // Generate model input features.
+ features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
+ vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+ // Get the variance of the residue as input features.
+ {
+ const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+ const uint8_t *pred = pred_buf;
+ const uint8_t *src = x->plane[0].src.buf;
+ const int src_stride = x->plane[0].src.stride;
+ const int pred_stride = 64;
+ unsigned int sse;
+ // Variance of whole block.
+ const unsigned int var =
+ cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
+ const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
+
+ features[feature_idx++] = logf((float)var + 1.0f);
+ for (i = 0; i < 4; ++i) {
+ const int x_idx = (i & 1) * bs / 2;
+ const int y_idx = (i >> 1) * bs / 2;
+ const int src_offset = y_idx * src_stride + x_idx;
+ const int pred_offset = y_idx * pred_stride + x_idx;
+ // Variance of quarter block.
+ const unsigned int sub_var =
+ cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
+ pred + pred_offset, pred_stride, &sse);
+ const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
+ features[feature_idx++] = var_ratio;
+ }
+ }
+ assert(feature_idx == FEATURES);
+
+ // Feed the features into the model to get the confidence score.
+ nn_predict(features, nn_config, &score);
+
+ // Higher score means that the model has higher confidence that the split
+ // partition is better than the non-split partition. So if the score is
+ // high enough, we skip the none-split partition search; if the score is
+ // low enough, we skip the split partition search.
+ if (score > thresh) *none = 0;
+ if (score < -thresh) *split = 0;
+ }
+}
+#undef FEATURES
+#undef LABELS
+
+static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
+ int mi_col, int orig_rdmult) {
+ const int gf_group_index = cpi->twopass.gf_group.index;
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[gf_group_index];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+ int tpl_stride = tpl_frame->stride;
+ int64_t intra_cost = 0;
+ int64_t mc_dep_cost = 0;
+ int mi_wide = num_8x8_blocks_wide_lookup[bsize];
+ int mi_high = num_8x8_blocks_high_lookup[bsize];
+ int row, col;
+
+ int dr = 0;
+ int count = 0;
+ double r0, rk, beta;
+
+ if (tpl_frame->is_valid == 0) return orig_rdmult;
+
+ if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult;
+
+ if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult;
+
+ for (row = mi_row; row < mi_row + mi_high; ++row) {
+ for (col = mi_col; col < mi_col + mi_wide; ++col) {
+ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+
+ if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
+
+ intra_cost += this_stats->intra_cost;
+ mc_dep_cost += this_stats->mc_dep_cost;
+
+ ++count;
+ }
+ }
+
+ vpx_clear_system_state();
+
+ r0 = cpi->rd.r0;
+ rk = (double)intra_cost / mc_dep_cost;
+ beta = r0 / rk;
+ dr = vp9_get_adaptive_rdmult(cpi, beta);
+
+ dr = VPXMIN(dr, orig_rdmult * 3 / 2);
+ dr = VPXMAX(dr, orig_rdmult * 1 / 2);
+
+ dr = VPXMAX(1, dr);
+
+ return dr;
}
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
@@ -3102,7 +3652,7 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
PARTITION_CONTEXT sl[8], sa[8];
TOKENEXTRA *tp_orig = *tp;
- PICK_MODE_CONTEXT *ctx = &pc_tree->none;
+ PICK_MODE_CONTEXT *const ctx = &pc_tree->none;
int i;
const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
BLOCK_SIZE subsize;
@@ -3133,15 +3683,22 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist;
int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
+ int must_split = 0;
+ int partition_mul = cpi->sf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ
+ ? x->cb_rdmult
+ : cpi->rd.RDMULT;
+ // Ref frames picked in the [i_th] quarter subblock during square partition
+ // RD search. It may be used to prune ref frame selection of rect partitions.
+ uint8_t ref_frames_used[4] = { 0, 0, 0, 0 };
(void)*tp_orig;
assert(num_8x8_blocks_wide_lookup[bsize] ==
num_8x8_blocks_high_lookup[bsize]);
- // Adjust dist breakout threshold according to the partition size.
dist_breakout_thr >>=
8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+
rate_breakout_thr *= num_pels_log2_lookup[bsize];
vp9_rd_cost_init(&this_rdc);
@@ -3165,10 +3722,18 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
}
+ // Get sub block energy range
+ if (bsize >= BLOCK_16X16) {
+ int min_energy, max_energy;
+ vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy,
+ &max_energy);
+ must_split = (min_energy < -3) && (max_energy - min_energy > 2);
+ }
+
// Determine partition types in search according to the speed features.
// The threshold set here has to be of square block size.
if (cpi->sf.auto_min_max_partition_size) {
- partition_none_allowed &= (bsize <= max_size && bsize >= min_size);
+ partition_none_allowed &= (bsize <= max_size);
partition_horz_allowed &=
((bsize <= max_size && bsize > min_size) || force_horz_split);
partition_vert_allowed &=
@@ -3177,7 +3742,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (cpi->sf.use_square_partition_only &&
- bsize > cpi->sf.use_square_only_threshold) {
+ (bsize > cpi->sf.use_square_only_thresh_high ||
+ bsize < cpi->sf.use_square_only_thresh_low)) {
if (cpi->use_svc) {
if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless)
partition_horz_allowed &= force_horz_split;
@@ -3250,15 +3816,37 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
#endif
+ pc_tree->partitioning = PARTITION_NONE;
+
+ if (cpi->sf.ml_var_partition_pruning) {
+ const int do_ml_var_partition_pruning =
+ !frame_is_intra_only(cm) && partition_none_allowed && do_split &&
+ mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows &&
+ mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols;
+ if (do_ml_var_partition_pruning) {
+ ml_predict_var_rd_paritioning(cpi, x, bsize, mi_row, mi_col,
+ &partition_none_allowed, &do_split);
+ }
+ }
+
// PARTITION_NONE
if (partition_none_allowed) {
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx,
best_rdc.rdcost);
+ ctx->rdcost = this_rdc.rdcost;
if (this_rdc.rate != INT_MAX) {
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ const int ref1 = ctx->mic.ref_frame[0];
+ const int ref2 = ctx->mic.ref_frame[1];
+ for (i = 0; i < 4; ++i) {
+ ref_frames_used[i] |= (1 << ref1);
+ if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+ }
+ }
if (bsize >= BLOCK_8X8) {
+ this_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
+ cpi->partition_cost[pl][PARTITION_NONE], 0);
this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
- this_rdc.rdcost =
- RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
}
if (this_rdc.rdcost < best_rdc.rdcost) {
@@ -3267,31 +3855,41 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
best_rdc = this_rdc;
if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
- if (!cpi->sf.ml_partition_search_early_termination) {
- // If all y, u, v transform blocks in this partition are skippable,
- // and the dist & rate are within the thresholds, the partition search
- // is terminated for current branch of the partition search tree.
- if (!x->e_mbd.lossless && ctx->skippable &&
- ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
- (best_rdc.dist < dist_breakout_thr &&
- best_rdc.rate < rate_breakout_thr))) {
- do_split = 0;
- do_rect = 0;
- }
- } else {
+ if (cpi->sf.ml_partition_search_early_termination) {
// Currently, the machine-learning based partition search early
// termination is only used while bsize is 16x16, 32x32 or 64x64,
// VPXMIN(cm->width, cm->height) >= 480, and speed = 0.
if (!x->e_mbd.lossless &&
!segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) &&
ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) {
- if (compute_score(cm, xd, ctx, mi_row, mi_col, bsize) < 0.0) {
+ if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) {
do_split = 0;
do_rect = 0;
}
}
}
+ if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) {
+ const int use_ml_based_breakout =
+ cpi->sf.use_ml_partition_search_breakout &&
+ cm->base_qindex >= 100;
+ if (use_ml_based_breakout) {
+ if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) {
+ do_split = 0;
+ do_rect = 0;
+ }
+ } else {
+ if (!cpi->sf.ml_partition_search_early_termination) {
+ if ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
+ (best_rdc.dist < dist_breakout_thr &&
+ best_rdc.rate < rate_breakout_thr)) {
+ do_split = 0;
+ do_rect = 0;
+ }
+ }
+ }
+ }
+
#if CONFIG_FP_MB_STATS
// Check if every 16x16 first pass block statistics has zero
// motion and the corresponding first pass residue is small enough.
@@ -3341,10 +3939,13 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
+ } else {
+ vp9_zero(ctx->pred_mv);
+ ctx->mic.interp_filter = EIGHTTAP;
}
// store estimated motion vector
- if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx);
+ store_pred_mv(x, ctx);
// If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an
// intra block and used for context purposes.
@@ -3357,35 +3958,65 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
// PARTITION_SPLIT
// TODO(jingning): use the motion vectors given by the above search as
// the starting point of motion search in the following partition type check.
- if (do_split) {
+ pc_tree->split[0]->none.rdcost = 0;
+ pc_tree->split[1]->none.rdcost = 0;
+ pc_tree->split[2]->none.rdcost = 0;
+ pc_tree->split[3]->none.rdcost = 0;
+ if (do_split || must_split) {
subsize = get_subsize(bsize, PARTITION_SPLIT);
+ load_pred_mv(x, ctx);
if (bsize == BLOCK_8X8) {
i = 4;
if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
pc_tree->leaf_split[0], best_rdc.rdcost);
-
- if (sum_rdc.rate == INT_MAX) sum_rdc.rdcost = INT64_MAX;
+ if (sum_rdc.rate == INT_MAX) {
+ sum_rdc.rdcost = INT64_MAX;
+ } else {
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0];
+ const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1];
+ for (i = 0; i < 4; ++i) {
+ ref_frames_used[i] |= (1 << ref1);
+ if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+ }
+ }
+ }
} else {
- for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
+ for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split);
+ ++i) {
const int x_idx = (i & 1) * mi_step;
const int y_idx = (i >> 1) * mi_step;
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
-
pc_tree->split[i]->index = i;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions)
+ pc_tree->split[i]->none.rate = INT_MAX;
rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
mi_col + x_idx, subsize, &this_rdc,
+ // A must split test here increases the number of sub
+ // partitions but hurts metrics results quite a bit,
+ // so this extra test is commented out pending
+ // further tests on whether it adds much in terms of
+ // visual quality.
+ // (must_split) ? best_rdc.rdcost
+ // : best_rdc.rdcost - sum_rdc.rdcost,
best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
if (this_rdc.rate == INT_MAX) {
sum_rdc.rdcost = INT64_MAX;
break;
} else {
+ if (cpi->sf.prune_ref_frame_for_rect_partitions &&
+ pc_tree->split[i]->none.rate != INT_MAX) {
+ const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0];
+ const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1];
+ ref_frames_used[i] |= (1 << ref1);
+ if (ref2 > 0) ref_frames_used[i] |= (1 << ref2);
+ }
sum_rdc.rate += this_rdc.rate;
sum_rdc.dist += this_rdc.dist;
sum_rdc.rdcost += this_rdc.rdcost;
@@ -3393,51 +4024,87 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
}
- if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
+ if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) {
+ sum_rdc.rdcost += RDCOST(partition_mul, x->rddiv,
+ cpi->partition_cost[pl][PARTITION_SPLIT], 0);
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
- if (sum_rdc.rdcost < best_rdc.rdcost) {
+ if ((sum_rdc.rdcost < best_rdc.rdcost) ||
+ (must_split && (sum_rdc.dist < best_rdc.dist))) {
best_rdc = sum_rdc;
pc_tree->partitioning = PARTITION_SPLIT;
// Rate and distortion based partition search termination clause.
if (!cpi->sf.ml_partition_search_early_termination &&
- !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
- (best_rdc.dist < dist_breakout_thr &&
- best_rdc.rate < rate_breakout_thr))) {
+ !x->e_mbd.lossless &&
+ ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
+ (best_rdc.dist < dist_breakout_thr &&
+ best_rdc.rate < rate_breakout_thr))) {
do_rect = 0;
}
}
} else {
// skip rectangular partition test when larger block size
// gives better rd cost
- if ((cpi->sf.less_rectangular_check) &&
- ((bsize > cpi->sf.use_square_only_threshold) ||
- (best_rdc.dist < dist_breakout_thr)))
+ if (cpi->sf.less_rectangular_check &&
+ (bsize > cpi->sf.use_square_only_thresh_high ||
+ best_rdc.dist < dist_breakout_thr))
do_rect &= !partition_none_allowed;
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
}
+ pc_tree->horizontal[0].skip_ref_frame_mask = 0;
+ pc_tree->horizontal[1].skip_ref_frame_mask = 0;
+ pc_tree->vertical[0].skip_ref_frame_mask = 0;
+ pc_tree->vertical[1].skip_ref_frame_mask = 0;
+ if (cpi->sf.prune_ref_frame_for_rect_partitions) {
+ uint8_t used_frames;
+ used_frames = ref_frames_used[0] | ref_frames_used[1];
+ if (used_frames) pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[2] | ref_frames_used[3];
+ if (used_frames) pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[0] | ref_frames_used[2];
+ if (used_frames) pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames;
+ used_frames = ref_frames_used[1] | ref_frames_used[3];
+ if (used_frames) pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames;
+ }
+
+ {
+ const int do_ml_rect_partition_pruning =
+ !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split &&
+ (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8;
+ if (do_ml_rect_partition_pruning) {
+ ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed,
+ &partition_vert_allowed, best_rdc.rdcost, mi_row,
+ mi_col);
+ }
+ }
+
// PARTITION_HORZ
if (partition_horz_allowed &&
(do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
+ const int part_mode_rate = cpi->partition_cost[pl][PARTITION_HORZ];
+ const int64_t part_mode_rdcost =
+ RDCOST(partition_mul, x->rddiv, part_mode_rate, 0);
subsize = get_subsize(bsize, PARTITION_HORZ);
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+ load_pred_mv(x, ctx);
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
- &pc_tree->horizontal[0], best_rdc.rdcost);
+ &pc_tree->horizontal[0],
+ best_rdc.rdcost - part_mode_rdcost);
+ if (sum_rdc.rdcost < INT64_MAX) {
+ sum_rdc.rdcost += part_mode_rdcost;
+ sum_rdc.rate += part_mode_rate;
+ }
if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
bsize > BLOCK_8X8) {
PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
-
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter;
@@ -3454,16 +4121,12 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- best_rdc = sum_rdc;
- pc_tree->partitioning = PARTITION_HORZ;
+ best_rdc = sum_rdc;
+ pc_tree->partitioning = PARTITION_HORZ;
- if ((cpi->sf.less_rectangular_check) &&
- (bsize > cpi->sf.use_square_only_threshold))
- do_rect = 0;
- }
+ if (cpi->sf.less_rectangular_check &&
+ bsize > cpi->sf.use_square_only_thresh_high)
+ do_rect = 0;
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
}
@@ -3471,21 +4134,26 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
// PARTITION_VERT
if (partition_vert_allowed &&
(do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) {
+ const int part_mode_rate = cpi->partition_cost[pl][PARTITION_VERT];
+ const int64_t part_mode_rdcost =
+ RDCOST(partition_mul, x->rddiv, part_mode_rate, 0);
subsize = get_subsize(bsize, PARTITION_VERT);
-
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
+ load_pred_mv(x, ctx);
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->vertical[0].pred_interp_filter = pred_interp_filter;
rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
- &pc_tree->vertical[0], best_rdc.rdcost);
+ &pc_tree->vertical[0], best_rdc.rdcost - part_mode_rdcost);
+ if (sum_rdc.rdcost < INT64_MAX) {
+ sum_rdc.rdcost += part_mode_rdcost;
+ sum_rdc.rate += part_mode_rate;
+ }
+
if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
bsize > BLOCK_8X8) {
update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize,
&pc_tree->vertical[0]);
-
- if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx);
if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
partition_none_allowed)
pc_tree->vertical[1].pred_interp_filter = pred_interp_filter;
@@ -3502,12 +4170,8 @@ static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
}
if (sum_rdc.rdcost < best_rdc.rdcost) {
- sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
- sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
- if (sum_rdc.rdcost < best_rdc.rdcost) {
- best_rdc = sum_rdc;
- pc_tree->partitioning = PARTITION_VERT;
- }
+ best_rdc = sum_rdc;
+ pc_tree->partitioning = PARTITION_VERT;
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
}
@@ -3582,7 +4246,10 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
}
}
- vp9_zero(x->pred_mv);
+ for (i = 0; i < MAX_REF_FRAMES; ++i) {
+ x->pred_mv[i].row = INT16_MAX;
+ x->pred_mv[i].col = INT16_MAX;
+ }
td->pc_root->index = 0;
if (seg->enabled) {
@@ -3613,12 +4280,21 @@ static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, td->pc_root);
} else {
+ int orig_rdmult = cpi->rd.RDMULT;
+ x->cb_rdmult = orig_rdmult;
+ if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) {
+ int dr =
+ get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult);
+ x->cb_rdmult = dr;
+ }
+
// If required set upper and lower partition size limits
if (sf->auto_min_max_partition_size) {
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
&x->min_partition_size, &x->max_partition_size);
}
+ td->pc_root->none.rdcost = 0;
rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rdc, INT64_MAX, td->pc_root);
}
@@ -3703,6 +4379,36 @@ static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x,
vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
}
+static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x,
+ RD_COST *rd_cost, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ TileDataEnc *tile_data, int mi_row,
+ int mi_col) {
+ if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) {
+ vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
+ } else {
+ if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF)
+ vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
+ else if (bsize >= BLOCK_8X8)
+ vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize,
+ ctx);
+ else
+ vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
+ }
+}
+
+static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x,
+ RD_COST *rd_cost, BLOCK_SIZE bsize,
+ PICK_MODE_CONTEXT *ctx,
+ TileDataEnc *tile_data, int mi_row,
+ int mi_col) {
+ if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) {
+ vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
+ } else {
+ vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx);
+ }
+}
+
static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
MACROBLOCK *const x, int mi_row, int mi_col,
RD_COST *rd_cost, BLOCK_SIZE bsize,
@@ -3718,6 +4424,9 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
int plane;
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
+
+ set_segment_index(cpi, x, mi_row, mi_col, bsize, 0);
+
mi = xd->mi[0];
mi->sb_type = bsize;
@@ -3733,14 +4442,23 @@ static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data,
if (cyclic_refresh_segment_id_boosted(mi->segment_id))
x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
- if (cm->frame_type == KEY_FRAME)
+ if (frame_is_intra_only(cm))
hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
+ else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)
+ hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row,
+ mi_col);
else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP))
set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
- else if (bsize >= BLOCK_8X8)
- vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx);
- else
+ else if (bsize >= BLOCK_8X8) {
+ if (cpi->rc.hybrid_intra_scene_change)
+ hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row,
+ mi_col);
+ else
+ vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize,
+ ctx);
+ } else {
vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx);
+ }
duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
@@ -3830,6 +4548,78 @@ static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
}
}
+#if CONFIG_ML_VAR_PARTITION
+#define FEATURES 6
+#define LABELS 2
+static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE bsize, int mi_row,
+ int mi_col) {
+ VP9_COMMON *const cm = &cpi->common;
+ const NN_CONFIG *nn_config = NULL;
+
+ switch (bsize) {
+ case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break;
+ case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break;
+ case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break;
+ case BLOCK_8X8: break;
+ default: assert(0 && "Unexpected block size."); return -1;
+ }
+
+ if (!nn_config) return -1;
+
+ vpx_clear_system_state();
+
+ {
+ const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f;
+ float features[FEATURES] = { 0.0f };
+ const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth);
+ int feature_idx = 0;
+ float score[LABELS];
+
+ features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
+ vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
+ {
+ const int bs = 4 * num_4x4_blocks_wide_lookup[bsize];
+ const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
+ const int sb_offset_row = 8 * (mi_row & 7);
+ const int sb_offset_col = 8 * (mi_col & 7);
+ const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col;
+ const uint8_t *src = x->plane[0].src.buf;
+ const int src_stride = x->plane[0].src.stride;
+ const int pred_stride = 64;
+ unsigned int sse;
+ int i;
+ // Variance of whole block.
+ const unsigned int var =
+ cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse);
+ const float factor = (var == 0) ? 1.0f : (1.0f / (float)var);
+
+ features[feature_idx++] = logf((float)var + 1.0f);
+ for (i = 0; i < 4; ++i) {
+ const int x_idx = (i & 1) * bs / 2;
+ const int y_idx = (i >> 1) * bs / 2;
+ const int src_offset = y_idx * src_stride + x_idx;
+ const int pred_offset = y_idx * pred_stride + x_idx;
+ // Variance of quarter block.
+ const unsigned int sub_var =
+ cpi->fn_ptr[subsize].vf(src + src_offset, src_stride,
+ pred + pred_offset, pred_stride, &sse);
+ const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var;
+ features[feature_idx++] = var_ratio;
+ }
+ }
+
+ assert(feature_idx == FEATURES);
+ nn_predict(features, nn_config, score);
+ if (score[0] > thresh) return PARTITION_SPLIT;
+ if (score[0] < -thresh) return PARTITION_NONE;
+ return -1;
+ }
+}
+#undef FEATURES
+#undef LABELS
+#endif // CONFIG_ML_VAR_PARTITION
+
static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data, TOKENEXTRA **tp,
int mi_row, int mi_col, BLOCK_SIZE bsize,
@@ -3859,6 +4649,11 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
!force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
int partition_vert_allowed =
!force_horz_split && xss <= yss && bsize >= BLOCK_8X8;
+#if CONFIG_ML_VAR_PARTITION
+ const int use_ml_based_partitioning =
+ sf->partition_search_type == ML_BASED_PARTITION;
+#endif // CONFIG_ML_VAR_PARTITION
+
(void)*tp_orig;
// Avoid checking for rectangular partitions for speed >= 6.
@@ -3889,6 +4684,20 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
partition_vert_allowed &= force_vert_split;
}
+#if CONFIG_ML_VAR_PARTITION
+ if (use_ml_based_partitioning) {
+ if (partition_none_allowed || do_split) do_rect = 0;
+ if (partition_none_allowed && do_split) {
+ const int ml_predicted_partition =
+ ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col);
+ if (ml_predicted_partition == PARTITION_NONE) do_split = 0;
+ if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0;
+ }
+ }
+#endif // CONFIG_ML_VAR_PARTITION
+
+ if (!partition_none_allowed && !do_split) do_rect = 1;
+
ctx->pred_pixel_ready =
!(partition_vert_allowed || partition_horz_allowed || do_split);
@@ -3902,26 +4711,28 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
ctx->skip = x->skip;
if (this_rdc.rate != INT_MAX) {
- int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
+ const int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
this_rdc.rdcost =
RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist);
if (this_rdc.rdcost < best_rdc.rdcost) {
- int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
- int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
-
- dist_breakout_thr >>=
- 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
-
- rate_breakout_thr *= num_pels_log2_lookup[bsize];
-
best_rdc = this_rdc;
if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
- if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
- this_rdc.dist < dist_breakout_thr) {
- do_split = 0;
- do_rect = 0;
+#if CONFIG_ML_VAR_PARTITION
+ if (!use_ml_based_partitioning)
+#endif // CONFIG_ML_VAR_PARTITION
+ {
+ int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist;
+ int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate;
+ dist_breakout_thr >>=
+ 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+ rate_breakout_thr *= num_pels_log2_lookup[bsize];
+ if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr &&
+ this_rdc.dist < dist_breakout_thr) {
+ do_split = 0;
+ do_rect = 0;
+ }
}
}
}
@@ -3969,7 +4780,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
// PARTITION_HORZ
if (partition_horz_allowed && do_rect) {
subsize = get_subsize(bsize, PARTITION_HORZ);
- if (sf->adaptive_motion_search) load_pred_mv(x, ctx);
+ load_pred_mv(x, ctx);
pc_tree->horizontal[0].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->horizontal[0]);
@@ -4013,7 +4824,7 @@ static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
// PARTITION_VERT
if (partition_vert_allowed && do_rect) {
subsize = get_subsize(bsize, PARTITION_VERT);
- if (sf->adaptive_motion_search) load_pred_mv(x, ctx);
+ load_pred_mv(x, ctx);
pc_tree->vertical[0].pred_pixel_ready = 1;
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
&pc_tree->vertical[0]);
@@ -4173,7 +4984,8 @@ static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td,
}
}
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
subsize = get_subsize(bsize, PARTITION_SPLIT);
nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
subsize, output_enabled, rd_cost,
@@ -4203,7 +5015,6 @@ static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td,
rd_cost->dist += this_rdc.dist;
}
break;
- default: assert(0 && "Invalid partition type."); break;
}
}
@@ -4292,7 +5103,8 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
output_enabled, subsize, &pc_tree->horizontal[1]);
}
break;
- case PARTITION_SPLIT:
+ default:
+ assert(partition == PARTITION_SPLIT);
subsize = get_subsize(bsize, PARTITION_SPLIT);
if (bsize == BLOCK_8X8) {
nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
@@ -4313,13 +5125,117 @@ static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td,
dummy_cost, pc_tree->split[3]);
}
break;
- default: assert(0 && "Invalid partition type."); break;
}
if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
update_partition_context(xd, mi_row, mi_col, subsize, bsize);
}
+#if CONFIG_ML_VAR_PARTITION
+// Get a prediction(stored in x->est_pred) for the whole 64x64 superblock.
+static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile,
+ MACROBLOCK *x, int mi_row, int mi_col) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int is_key_frame = frame_is_intra_only(cm);
+
+ set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
+
+ if (!is_key_frame) {
+ MACROBLOCKD *xd = &x->e_mbd;
+ MODE_INFO *mi = xd->mi[0];
+ YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
+ const YV12_BUFFER_CONFIG *yv12_g = NULL;
+ const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 +
+ (mi_row + 4 < cm->mi_rows);
+ int pixels_wide = 64, pixels_high = 64;
+ unsigned int y_sad_g, y_sad_thr;
+ unsigned int y_sad = UINT_MAX;
+
+ assert(yv12 != NULL);
+
+ if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
+ if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
+
+ if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) ||
+ cpi->svc.use_gf_temporal_ref_current_layer) {
+ // For now, GOLDEN will not be used for non-zero spatial layers, since
+ // it may not be a temporal reference.
+ yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ }
+
+ // Only compute y_sad_g (sad for golden reference) for speed < 8.
+ if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 &&
+ (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
+ vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+ &cm->frame_refs[GOLDEN_FRAME - 1].sf);
+ y_sad_g = cpi->fn_ptr[bsize].sdf(
+ x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
+ xd->plane[0].pre[0].stride);
+ } else {
+ y_sad_g = UINT_MAX;
+ }
+
+ if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+ cpi->rc.is_src_frame_alt_ref) {
+ yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME);
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[ALTREF_FRAME - 1].sf);
+ mi->ref_frame[0] = ALTREF_FRAME;
+ y_sad_g = UINT_MAX;
+ } else {
+ vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
+ &cm->frame_refs[LAST_FRAME - 1].sf);
+ mi->ref_frame[0] = LAST_FRAME;
+ }
+ mi->ref_frame[1] = NONE;
+ mi->sb_type = BLOCK_64X64;
+ mi->mv[0].as_int = 0;
+ mi->interp_filter = BILINEAR;
+
+ {
+ const MV dummy_mv = { 0, 0 };
+ y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col,
+ &dummy_mv);
+ x->sb_use_mv_part = 1;
+ x->sb_mvcol_part = mi->mv[0].as_mv.col;
+ x->sb_mvrow_part = mi->mv[0].as_mv.row;
+ }
+
+ // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad
+ // are close if short_circuit_low_temp_var is on.
+ y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad;
+ if (y_sad_g < y_sad_thr) {
+ vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
+ &cm->frame_refs[GOLDEN_FRAME - 1].sf);
+ mi->ref_frame[0] = GOLDEN_FRAME;
+ mi->mv[0].as_int = 0;
+ y_sad = y_sad_g;
+ } else {
+ x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
+ }
+
+ set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
+ xd->plane[0].dst.buf = x->est_pred;
+ xd->plane[0].dst.stride = 64;
+ vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
+ } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+ switch (xd->bd) {
+ case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break;
+ case 10:
+ memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0]));
+ break;
+ case 12:
+ memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0]));
+ break;
+ }
+#else
+ memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0]));
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ }
+}
+#endif // CONFIG_ML_VAR_PARTITION
+
static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
TileDataEnc *tile_data, int mi_row,
TOKENEXTRA **tp) {
@@ -4350,6 +5266,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
BLOCK_SIZE bsize = BLOCK_64X64;
int seg_skip = 0;
+ int i;
(*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
sb_col_in_tile);
@@ -4359,7 +5276,10 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
}
x->source_variance = UINT_MAX;
- vp9_zero(x->pred_mv);
+ for (i = 0; i < MAX_REF_FRAMES; ++i) {
+ x->pred_mv[i].row = INT16_MAX;
+ x->pred_mv[i].col = INT16_MAX;
+ }
vp9_rd_cost_init(&dummy_rdc);
x->color_sensitivity[0] = 0;
x->color_sensitivity[1] = 0;
@@ -4367,6 +5287,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
x->skip_low_source_sad = 0;
x->lowvar_highsumdiff = 0;
x->content_state_sb = 0;
+ x->zero_temp_sad_source = 0;
x->sb_use_mv_part = 0;
x->sb_mvcol_part = 0;
x->sb_mvrow_part = 0;
@@ -4406,6 +5327,17 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
break;
+#if CONFIG_ML_VAR_PARTITION
+ case ML_BASED_PARTITION:
+ get_estimated_pred(cpi, tile_info, x, mi_row, mi_col);
+ x->max_partition_size = BLOCK_64X64;
+ x->min_partition_size = BLOCK_8X8;
+ x->sb_pickmode_part = 1;
+ nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
+ BLOCK_64X64, &dummy_rdc, 1, INT64_MAX,
+ td->pc_root);
+ break;
+#endif // CONFIG_ML_VAR_PARTITION
case SOURCE_VAR_BASED_PARTITION:
set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
@@ -4417,14 +5349,15 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
break;
- case REFERENCE_PARTITION:
+ default:
+ assert(partition_search_type == REFERENCE_PARTITION);
x->sb_pickmode_part = 1;
set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
// Use nonrd_pick_partition on scene-cut for VBR mode.
// nonrd_pick_partition does not support 4x4 partition, so avoid it
// on key frame for now.
if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad &&
- cpi->oxcf.speed < 6 && cm->frame_type != KEY_FRAME &&
+ cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) &&
(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
// Use lower max_partition_size for low resoultions.
if (cm->width <= 352 && cm->height <= 288)
@@ -4440,7 +5373,7 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
// TODO(marpan): Seems like nonrd_select_partition does not support
// 4x4 partition. Since 4x4 is used on key frame, use this switch
// for now.
- if (cm->frame_type == KEY_FRAME)
+ if (frame_is_intra_only(cm))
nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
else
@@ -4449,7 +5382,6 @@ static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td,
}
break;
- default: assert(0); break;
}
// Update ref_frame usage for inter frame if this group is ARF group.
@@ -4516,16 +5448,12 @@ static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
&var16->sse, &var16->sum);
var16->var = variance_highbd(var16);
break;
- case VPX_BITS_12:
+ default:
+ assert(cm->bit_depth == VPX_BITS_12);
vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
&var16->sse, &var16->sum);
var16->var = variance_highbd(var16);
break;
- default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10"
- " or VPX_BITS_12");
- return -1;
}
} else {
vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse,
@@ -4620,8 +5548,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
if (cpi->tile_data != NULL) vpx_free(cpi->tile_data);
- CHECK_MEM_ERROR(cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows *
- sizeof(*cpi->tile_data)));
+ CHECK_MEM_ERROR(
+ cm, cpi->tile_data,
+ vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
cpi->allocated_tiles = tile_cols * tile_rows;
for (tile_row = 0; tile_row < tile_rows; ++tile_row)
@@ -4632,6 +5561,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
for (i = 0; i < BLOCK_SIZES; ++i) {
for (j = 0; j < MAX_MODES; ++j) {
tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
+#if CONFIG_CONSISTENT_RECODE
+ tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;
+#endif
tile_data->mode_map[i][j] = j;
}
}
@@ -4645,6 +5577,9 @@ void vp9_init_tile_data(VP9_COMP *cpi) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
TileInfo *tile_info = &this_tile->tile_info;
+ if (cpi->sf.adaptive_rd_thresh_row_mt &&
+ this_tile->row_base_thresh_freq_fact == NULL)
+ vp9_row_mt_alloc_rd_thresh(cpi, this_tile);
vp9_tile_init(tile_info, cm, tile_row, tile_col);
cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
@@ -4735,10 +5670,10 @@ static void encode_frame_internal(VP9_COMP *cpi) {
MACROBLOCK *const x = &td->mb;
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
+ const int gf_group_index = cpi->twopass.gf_group.index;
xd->mi = cm->mi_grid_visible;
xd->mi[0] = cm->mi;
-
vp9_zero(*td->counts);
vp9_zero(cpi->td.rd_counts);
@@ -4756,8 +5691,12 @@ static void encode_frame_internal(VP9_COMP *cpi) {
x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
#endif // CONFIG_VP9_HIGHBITDEPTH
x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
-
+#if CONFIG_CONSISTENT_RECODE
+ x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1;
+#endif
if (xd->lossless) x->optimize = 0;
+ x->sharpness = cpi->oxcf.sharpness;
+ x->adjust_rdmult_by_segment = (cpi->oxcf.aq_mode == VARIANCE_AQ);
cm->tx_mode = select_tx_mode(cpi, xd);
@@ -4799,6 +5738,28 @@ static void encode_frame_internal(VP9_COMP *cpi) {
if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
source_var_based_partition_search_method(cpi);
+ } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE &&
+ cpi->sf.enable_tpl_model) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+
+ int tpl_stride = tpl_frame->stride;
+ int64_t intra_cost_base = 0;
+ int64_t mc_dep_cost_base = 0;
+ int row, col;
+
+ for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
+ for (col = 0; col < cm->mi_cols; ++col) {
+ TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+ intra_cost_base += this_stats->intra_cost;
+ mc_dep_cost_base += this_stats->mc_dep_cost;
+ }
+ }
+
+ vpx_clear_system_state();
+
+ if (tpl_frame->is_valid)
+ cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
}
{
@@ -4881,9 +5842,48 @@ static int compute_frame_aq_offset(struct VP9_COMP *cpi) {
return sum_delta / (cm->mi_rows * cm->mi_cols);
}
+#if CONFIG_CONSISTENT_RECODE
+static void restore_encode_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+ int tile_col, tile_row;
+ int i, j;
+ RD_OPT *rd_opt = &cpi->rd;
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ for (j = 0; j < REFERENCE_MODES; j++)
+ rd_opt->prediction_type_threshes[i][j] =
+ rd_opt->prediction_type_threshes_prev[i][j];
+
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
+ rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j];
+ }
+
+ if (cpi->tile_data != NULL) {
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact[i][j] =
+ tile_data->thresh_freq_fact_prev[i][j];
+ }
+ }
+ }
+ }
+
+ cm->interp_filter = cpi->sf.default_interp_filter;
+}
+#endif
+
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
+#if CONFIG_CONSISTENT_RECODE
+ restore_encode_params(cpi);
+#endif
+
// In the longer term the encoder should be generalized to match the
// decoder such that we allow compound where one of the 3 buffers has a
// different sign bias and that buffer is then the fixed ref. However, this
@@ -4891,16 +5891,11 @@ void vp9_encode_frame(VP9_COMP *cpi) {
// side behavior is where the ALT ref buffer has opposite sign bias to
// the other two.
if (!frame_is_intra_only(cm)) {
- if ((cm->ref_frame_sign_bias[ALTREF_FRAME] ==
- cm->ref_frame_sign_bias[GOLDEN_FRAME]) ||
- (cm->ref_frame_sign_bias[ALTREF_FRAME] ==
- cm->ref_frame_sign_bias[LAST_FRAME])) {
- cpi->allow_comp_inter_inter = 0;
- } else {
+ if (vp9_compound_reference_allowed(cm)) {
cpi->allow_comp_inter_inter = 1;
- cm->comp_fixed_ref = ALTREF_FRAME;
- cm->comp_var_ref[0] = LAST_FRAME;
- cm->comp_var_ref[1] = GOLDEN_FRAME;
+ vp9_setup_compound_reference_mode(cm);
+ } else {
+ cpi->allow_comp_inter_inter = 0;
}
}
@@ -5064,7 +6059,8 @@ static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi,
for (y = 0; y < ymis; y++)
for (x = 0; x < xmis; x++) {
int map_offset = block_index + y * cm->mi_cols + x;
- if (is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
+ if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) &&
+ mi->segment_id <= CR_SEGMENT_ID_BOOST2) {
if (abs(mv.row) < 8 && abs(mv.col) < 8) {
if (cpi->consec_zero_mv[map_offset] < 255)
cpi->consec_zero_mv[map_offset]++;
@@ -5159,7 +6155,11 @@ static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t,
++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
- if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0)
+ if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 &&
+ (!cpi->use_svc ||
+ (cpi->use_svc &&
+ !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
+ cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)))
update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize);
}
}
diff --git a/libvpx/vp9/encoder/vp9_encodeframe.h b/libvpx/vp9/encoder/vp9_encodeframe.h
index cf5ae3d8a..1798c0048 100644
--- a/libvpx/vp9/encoder/vp9_encodeframe.h
+++ b/libvpx/vp9/encoder/vp9_encodeframe.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_ENCODEFRAME_H_
-#define VP9_ENCODER_VP9_ENCODEFRAME_H_
+#ifndef VPX_VP9_ENCODER_VP9_ENCODEFRAME_H_
+#define VPX_VP9_ENCODER_VP9_ENCODEFRAME_H_
#include "vpx/vpx_integer.h"
@@ -49,4 +49,4 @@ void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_ENCODEFRAME_H_
+#endif // VPX_VP9_ENCODER_VP9_ENCODEFRAME_H_
diff --git a/libvpx/vp9/encoder/vp9_encodemb.c b/libvpx/vp9/encoder/vp9_encodemb.c
index f3c17f255..a68a0926a 100644
--- a/libvpx/vp9/encoder/vp9_encodemb.c
+++ b/libvpx/vp9/encoder/vp9_encodemb.c
@@ -50,7 +50,8 @@ void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) {
}
static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = {
- { 10, 6 }, { 8, 5 },
+ { 10, 6 },
+ { 8, 5 },
};
// 'num' can be negative, but 'shift' must be non-negative.
@@ -76,13 +77,19 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
const scan_order *const so = get_scan(xd, tx_size, plane_type, block);
const int16_t *const scan = so->scan;
const int16_t *const nb = so->neighbors;
+ const MODE_INFO *mbmi = xd->mi[0];
+ const int sharpness = mb->sharpness;
+ const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type];
const int64_t rdmult =
- ((int64_t)mb->rdmult * plane_rd_mult[ref][plane_type]) >> 1;
+ (sharpness == 0 ? rdadj >> 1
+ : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4);
+
const int64_t rddiv = mb->rddiv;
int64_t rd_cost0, rd_cost1;
int64_t rate0, rate1;
int16_t t0, t1;
int i, final_eob;
+ int count_high_values_after_eob = 0;
#if CONFIG_VP9_HIGHBITDEPTH
const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd);
#else
@@ -200,9 +207,9 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
const int band_next = band_translate[i + 1];
const int token_next =
(i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN;
- unsigned int(
- *const token_costs_next)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] =
- token_costs + band_next;
+ unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS]
+ [ENTROPY_TOKENS] =
+ token_costs + band_next;
token_cache[rc] = vp9_pt_energy_class[t0];
ctx_next = get_coef_context(nb, token_cache, i + 1);
token_tree_sel_next = (x == 0);
@@ -262,6 +269,7 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
assert(distortion0 <= distortion_for_zero);
token_cache[rc] = vp9_pt_energy_class[t0];
}
+ if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++;
assert(accu_error >= 0);
x_prev = qcoeff[rc]; // Update based on selected quantized value.
@@ -272,6 +280,7 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
if (best_eob_cost_cur < best_block_rd_cost) {
best_block_rd_cost = best_eob_cost_cur;
final_eob = i + 1;
+ count_high_values_after_eob = 0;
if (use_x1) {
before_best_eob_qc = x1;
before_best_eob_dqc = dqc1;
@@ -283,19 +292,31 @@ int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
}
}
}
- assert(final_eob <= eob);
- if (final_eob > 0) {
- int rc;
- assert(before_best_eob_qc != 0);
- i = final_eob - 1;
- rc = scan[i];
- qcoeff[rc] = before_best_eob_qc;
- dqcoeff[rc] = before_best_eob_dqc;
- }
- for (i = final_eob; i < eob; i++) {
- int rc = scan[i];
- qcoeff[rc] = 0;
- dqcoeff[rc] = 0;
+ if (count_high_values_after_eob > 0) {
+ final_eob = eob - 1;
+ for (; final_eob >= 0; final_eob--) {
+ const int rc = scan[final_eob];
+ const int x = qcoeff[rc];
+ if (x) {
+ break;
+ }
+ }
+ final_eob++;
+ } else {
+ assert(final_eob <= eob);
+ if (final_eob > 0) {
+ int rc;
+ assert(before_best_eob_qc != 0);
+ i = final_eob - 1;
+ rc = scan[i];
+ qcoeff[rc] = before_best_eob_qc;
+ dqcoeff[rc] = before_best_eob_dqc;
+ }
+ for (i = final_eob; i < eob; i++) {
+ int rc = scan[i];
+ qcoeff[rc] = 0;
+ dqcoeff[rc] = 0;
+ }
}
mb->plane[plane].eobs[block] = final_eob;
return final_eob;
@@ -357,13 +378,13 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp,
p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
- default: assert(0);
}
return;
}
@@ -387,13 +408,13 @@ void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col,
p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant,
eob, scan_order->scan, scan_order->iscan);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp,
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
- default: assert(0); break;
}
}
@@ -433,13 +454,13 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
eob);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round,
p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0],
eob);
break;
- default: assert(0);
}
return;
}
@@ -461,12 +482,12 @@ void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col,
vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0],
qcoeff, dqcoeff, pd->dequant[0], eob);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0],
qcoeff, dqcoeff, pd->dequant[0], eob);
break;
- default: assert(0); break;
}
}
@@ -510,14 +531,14 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round,
p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
- default: assert(0);
}
return;
}
@@ -543,13 +564,13 @@ void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant,
p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob,
scan_order->scan, scan_order->iscan);
break;
- default: assert(0); break;
}
}
@@ -633,14 +654,14 @@ static void encode_block(int plane, int block, int row, int col,
vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
xd->bd);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block],
xd->bd);
break;
- default: assert(0 && "Invalid transform size");
}
return;
}
@@ -656,13 +677,13 @@ static void encode_block(int plane, int block, int row, int col,
case TX_8X8:
vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
// this is like vp9_short_idct4x4 but has a special case around eob<=1
// which is significant (not just an optimization) for the lossless
// case.
x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]);
break;
- default: assert(0 && "Invalid transform size"); break;
}
}
@@ -847,7 +868,8 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
xd->bd);
}
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
if (!x->skip_recode) {
vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src,
src_stride, dst, dst_stride, xd->bd);
@@ -875,7 +897,6 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
}
}
break;
- default: assert(0); return;
}
if (*eob) *(args->skip) = 0;
return;
@@ -929,7 +950,8 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
if (!x->skip_encode && *eob)
vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
if (!x->skip_recode) {
vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst,
dst_stride);
@@ -954,7 +976,6 @@ void vp9_encode_block_intra(int plane, int block, int row, int col,
vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type);
}
break;
- default: assert(0); break;
}
if (*eob) *(args->skip) = 0;
}
diff --git a/libvpx/vp9/encoder/vp9_encodemb.h b/libvpx/vp9/encoder/vp9_encodemb.h
index cf943bedf..fa41f70ef 100644
--- a/libvpx/vp9/encoder/vp9_encodemb.h
+++ b/libvpx/vp9/encoder/vp9_encodemb.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_ENCODEMB_H_
-#define VP9_ENCODER_VP9_ENCODEMB_H_
+#ifndef VPX_VP9_ENCODER_VP9_ENCODEMB_H_
+#define VPX_VP9_ENCODER_VP9_ENCODEMB_H_
#include "./vpx_config.h"
#include "vp9/encoder/vp9_block.h"
@@ -48,4 +48,4 @@ void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_ENCODEMB_H_
+#endif // VPX_VP9_ENCODER_VP9_ENCODEMB_H_
diff --git a/libvpx/vp9/encoder/vp9_encodemv.h b/libvpx/vp9/encoder/vp9_encodemv.h
index 9fc7ab8dc..2f1be4b23 100644
--- a/libvpx/vp9/encoder/vp9_encodemv.h
+++ b/libvpx/vp9/encoder/vp9_encodemv.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_ENCODEMV_H_
-#define VP9_ENCODER_VP9_ENCODEMV_H_
+#ifndef VPX_VP9_ENCODER_VP9_ENCODEMV_H_
+#define VPX_VP9_ENCODER_VP9_ENCODEMV_H_
#include "vp9/encoder/vp9_encoder.h"
@@ -27,7 +27,7 @@ void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref,
unsigned int *const max_mv_magnitude);
void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2],
- const nmv_context *mvctx, int usehp);
+ const nmv_context *ctx, int usehp);
void vp9_update_mv_count(ThreadData *td);
@@ -35,4 +35,4 @@ void vp9_update_mv_count(ThreadData *td);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_ENCODEMV_H_
+#endif // VPX_VP9_ENCODER_VP9_ENCODEMV_H_
diff --git a/libvpx/vp9/encoder/vp9_encoder.c b/libvpx/vp9/encoder/vp9_encoder.c
index 2ae59dd98..bf35b3570 100644
--- a/libvpx/vp9/encoder/vp9_encoder.c
+++ b/libvpx/vp9/encoder/vp9_encoder.c
@@ -35,6 +35,7 @@
#include "vp9/common/vp9_reconinter.h"
#include "vp9/common/vp9_reconintra.h"
#include "vp9/common/vp9_tile_common.h"
+#include "vp9/common/vp9_scan.h"
#include "vp9/encoder/vp9_alt_ref_aq.h"
#include "vp9/encoder/vp9_aq_360.h"
@@ -42,14 +43,21 @@
#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_aq_variance.h"
#include "vp9/encoder/vp9_bitstream.h"
+#if CONFIG_INTERNAL_STATS
+#include "vp9/encoder/vp9_blockiness.h"
+#endif
#include "vp9/encoder/vp9_context_tree.h"
#include "vp9/encoder/vp9_encodeframe.h"
+#include "vp9/encoder/vp9_encodemb.h"
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_encoder.h"
-#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_ethread.h"
+#include "vp9/encoder/vp9_extend.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/encoder/vp9_mbgraph.h"
+#if CONFIG_NON_GREEDY_MV
+#include "vp9/encoder/vp9_mcomp.h"
+#endif
#include "vp9/encoder/vp9_multi_thread.h"
#include "vp9/encoder/vp9_noise_estimate.h"
#include "vp9/encoder/vp9_picklpf.h"
@@ -65,12 +73,12 @@
#define AM_SEGMENT_ID_INACTIVE 7
#define AM_SEGMENT_ID_ACTIVE 0
-#define ALTREF_HIGH_PRECISION_MV 1 // Whether to use high precision mv
- // for altref computation.
-#define HIGH_PRECISION_MV_QTHRESH 200 // Q threshold for high precision
- // mv. Choose a very high value for
- // now so that HIGH_PRECISION is always
- // chosen.
+// Whether to use high precision mv for altref computation.
+#define ALTREF_HIGH_PRECISION_MV 1
+
+// Q threshold for high precision mv. Choose a very high value for now so that
+// HIGH_PRECISION is always chosen.
+#define HIGH_PRECISION_MV_QTHRESH 200
#define FRAME_SIZE_FACTOR 128 // empirical params for context model threshold
#define FRAME_RATE_FACTOR 8
@@ -84,6 +92,9 @@ static FILE *yuv_skinmap_file = NULL;
#ifdef OUTPUT_YUV_REC
FILE *yuv_rec_file;
#endif
+#ifdef OUTPUT_YUV_SVC_SRC
+FILE *yuv_svc_src[3] = { NULL, NULL, NULL };
+#endif
#if 0
FILE *framepsnr;
@@ -483,14 +494,10 @@ static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) {
*hr = 3;
*hs = 5;
break;
- case ONETWO:
- *hr = 1;
- *hs = 2;
- break;
default:
+ assert(mode == ONETWO);
*hr = 1;
- *hs = 1;
- assert(0);
+ *hs = 2;
break;
}
}
@@ -547,6 +554,74 @@ static void apply_active_map(VP9_COMP *cpi) {
}
}
+static void apply_roi_map(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ struct segmentation *const seg = &cm->seg;
+ vpx_roi_map_t *roi = &cpi->roi;
+ const int *delta_q = roi->delta_q;
+ const int *delta_lf = roi->delta_lf;
+ const int *skip = roi->skip;
+ int ref_frame[8];
+ int internal_delta_q[MAX_SEGMENTS];
+ int i;
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
+
+ // TODO(jianj): Investigate why ROI not working in speed < 5 or in non
+ // realtime mode.
+ if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return;
+ if (!roi->enabled) return;
+
+ memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame));
+
+ vp9_enable_segmentation(seg);
+ vp9_clearall_segfeatures(seg);
+ // Select delta coding method;
+ seg->abs_delta = SEGMENT_DELTADATA;
+
+ memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols));
+
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ // Translate the external delta q values to internal values.
+ internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i]));
+ if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i];
+ vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q);
+ vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF);
+ if (internal_delta_q[i] != 0) {
+ vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
+ vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]);
+ }
+ if (delta_lf[i] != 0) {
+ vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF);
+ vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]);
+ }
+ if (skip[i] != 0) {
+ vp9_enable_segfeature(seg, i, SEG_LVL_SKIP);
+ vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]);
+ }
+ if (ref_frame[i] >= 0) {
+ int valid_ref = 1;
+ // ALTREF is not used as reference for nonrd_pickmode with 0 lag.
+ if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode)
+ valid_ref = 0;
+ // If GOLDEN is selected, make sure it's set as reference.
+ if (ref_frame[i] == GOLDEN_FRAME &&
+ !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) {
+ valid_ref = 0;
+ }
+ // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are
+ // same reference.
+ if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0)
+ ref_frame[i] = LAST_FRAME;
+ if (valid_ref) {
+ vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME);
+ vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]);
+ }
+ }
+ }
+ roi->enabled = 1;
+}
+
static void init_level_info(Vp9LevelInfo *level_info) {
Vp9LevelStats *const level_stats = &level_info->level_stats;
Vp9LevelSpec *const level_spec = &level_info->level_spec;
@@ -557,6 +632,13 @@ static void init_level_info(Vp9LevelInfo *level_info) {
level_spec->min_altref_distance = INT_MAX;
}
+static int check_seg_range(int seg_data[8], int range) {
+ return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
+ abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
+ abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
+ abs(seg_data[6]) > range || abs(seg_data[7]) > range);
+}
+
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
int i;
const Vp9LevelSpec *this_level;
@@ -583,6 +665,61 @@ VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level;
}
+int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
+ unsigned int cols, int delta_q[8], int delta_lf[8],
+ int skip[8], int ref_frame[8]) {
+ VP9_COMMON *cm = &cpi->common;
+ vpx_roi_map_t *roi = &cpi->roi;
+ const int range = 63;
+ const int ref_frame_range = 3; // Alt-ref
+ const int skip_range = 1;
+ const int frame_rows = cpi->common.mi_rows;
+ const int frame_cols = cpi->common.mi_cols;
+
+ // Check number of rows and columns match
+ if (frame_rows != (int)rows || frame_cols != (int)cols) {
+ return -1;
+ }
+
+ if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) ||
+ !check_seg_range(ref_frame, ref_frame_range) ||
+ !check_seg_range(skip, skip_range))
+ return -1;
+
+ // Also disable segmentation if no deltas are specified.
+ if (!map ||
+ (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] |
+ delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] |
+ delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] |
+ delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] |
+ skip[5] | skip[6] | skip[7]) &&
+ (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 &&
+ ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 &&
+ ref_frame[6] == -1 && ref_frame[7] == -1))) {
+ vp9_disable_segmentation(&cm->seg);
+ cpi->roi.enabled = 0;
+ return 0;
+ }
+
+ if (roi->roi_map) {
+ vpx_free(roi->roi_map);
+ roi->roi_map = NULL;
+ }
+ CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols));
+
+ // Copy to ROI sturcture in the compressor.
+ memcpy(roi->roi_map, map, rows * cols);
+ memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0]));
+ memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0]));
+ memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0]));
+ memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0]));
+ roi->enabled = 1;
+ roi->rows = rows;
+ roi->cols = cols;
+
+ return 0;
+}
+
int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
int cols) {
if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) {
@@ -660,8 +797,17 @@ static void setup_frame(VP9_COMP *cpi) {
if (!cpi->use_svc) cm->frame_context_idx = cpi->refresh_alt_ref_frame;
}
+ // TODO(jingning): Overwrite the frame_context_idx index in multi-layer ARF
+ // case. Need some further investigation on if we could apply this to single
+ // layer ARF case as well.
+ if (cpi->multi_layer_arf && !cpi->use_svc) {
+ GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ cm->frame_context_idx = clamp(gf_group->layer_depth[gf_group->index] - 1, 0,
+ FRAME_CONTEXTS - 1);
+ }
+
if (cm->frame_type == KEY_FRAME) {
- if (!is_two_pass_svc(cpi)) cpi->refresh_golden_frame = 1;
+ cpi->refresh_golden_frame = 1;
cpi->refresh_alt_ref_frame = 1;
vp9_zero(cpi->interp_filter_selected);
} else {
@@ -713,12 +859,17 @@ static void vp9_enc_free_mi(VP9_COMMON *cm) {
cm->mi_grid_base = NULL;
vpx_free(cm->prev_mi_grid_base);
cm->prev_mi_grid_base = NULL;
+ cm->mi_alloc_size = 0;
}
static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) {
// Current mip will be the prev_mip for the next frame.
MODE_INFO **temp_base = cm->prev_mi_grid_base;
MODE_INFO *temp = cm->prev_mip;
+
+ // Skip update prev_mi frame in show_existing_frame mode.
+ if (cm->show_existing_frame) return;
+
cm->prev_mip = cm->mip;
cm->mip = temp;
@@ -817,6 +968,9 @@ static void dealloc_compressor_data(VP9_COMP *cpi) {
vpx_free(cpi->active_map.map);
cpi->active_map.map = NULL;
+ vpx_free(cpi->roi.roi_map);
+ cpi->roi.roi_map = NULL;
+
vpx_free(cpi->consec_zero_mv);
cpi->consec_zero_mv = NULL;
@@ -1121,8 +1275,9 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) {
// For 1 pass cbr: allocate scaled_frame that may be used as an intermediate
// buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a
- // target of 1/4x1/4.
- if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc) {
+ // target of 1/4x1/4. number_spatial_layers must be greater than 2.
+ if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc &&
+ cpi->svc.number_spatial_layers > 2) {
cpi->svc.scaled_temp_is_alloc = 1;
if (vpx_realloc_frame_buffer(
&cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1,
@@ -1213,15 +1368,9 @@ static void set_tile_limits(VP9_COMP *cpi) {
int min_log2_tile_cols, max_log2_tile_cols;
vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols);
- if (is_two_pass_svc(cpi) && (cpi->svc.encode_empty_frame_state == ENCODING ||
- cpi->svc.number_spatial_layers > 1)) {
- cm->log2_tile_cols = 0;
- cm->log2_tile_rows = 0;
- } else {
- cm->log2_tile_cols =
- clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
- cm->log2_tile_rows = cpi->oxcf.tile_rows;
- }
+ cm->log2_tile_cols =
+ clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols);
+ cm->log2_tile_rows = cpi->oxcf.tile_rows;
if (cpi->oxcf.target_level == LEVEL_AUTO) {
const int level_tile_cols =
@@ -1244,24 +1393,17 @@ static void update_frame_size(VP9_COMP *cpi) {
cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base));
set_tile_limits(cpi);
-
- if (is_two_pass_svc(cpi)) {
- if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, cm->width, cm->height,
- cm->subsampling_x, cm->subsampling_y,
-#if CONFIG_VP9_HIGHBITDEPTH
- cm->use_highbitdepth,
-#endif
- VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
- NULL, NULL, NULL))
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to reallocate alt_ref_buffer");
- }
}
static void init_buffer_indices(VP9_COMP *cpi) {
- cpi->lst_fb_idx = 0;
- cpi->gld_fb_idx = 1;
- cpi->alt_fb_idx = 2;
+ int ref_frame;
+
+ for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame)
+ cpi->ref_fb_idx[ref_frame] = ref_frame;
+
+ cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1];
+ cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1];
+ cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1];
}
static void init_level_constraint(LevelConstraint *lc) {
@@ -1610,7 +1752,8 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
vpx_highbd_sad4x4x4d_bits10)
break;
- case VPX_BITS_12:
+ default:
+ assert(cm->bit_depth == VPX_BITS_12);
HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12,
vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16,
vpx_highbd_12_sub_pixel_variance32x16,
@@ -1689,11 +1832,6 @@ static void highbd_set_var_fns(VP9_COMP *const cpi) {
vpx_highbd_12_sub_pixel_avg_variance4x4,
vpx_highbd_sad4x4x4d_bits12)
break;
-
- default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, "
- "VPX_BITS_10 or VPX_BITS_12");
}
}
}
@@ -1757,6 +1895,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) {
int last_w = cpi->oxcf.width;
int last_h = cpi->oxcf.height;
+ vp9_init_quantizer(cpi);
if (cm->profile != oxcf->profile) cm->profile = oxcf->profile;
cm->bit_depth = oxcf->bit_depth;
cm->color_space = oxcf->color_space;
@@ -2017,8 +2156,9 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
realloc_segmentation_maps(cpi);
- CHECK_MEM_ERROR(cm, cpi->skin_map, vpx_calloc(cm->mi_rows * cm->mi_cols,
- sizeof(cpi->skin_map[0])));
+ CHECK_MEM_ERROR(
+ cm, cpi->skin_map,
+ vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0])));
CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create());
@@ -2062,8 +2202,6 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
#endif
cpi->refresh_alt_ref_frame = 0;
- cpi->multi_arf_last_grp_enabled = 0;
-
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
init_level_info(&cpi->level_info);
@@ -2104,9 +2242,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
if (cpi->b_calculate_consistency) {
CHECK_MEM_ERROR(cm, cpi->ssim_vars,
- vpx_malloc(sizeof(*cpi->ssim_vars) * 4 *
- cpi->common.mi_rows * cpi->common.mi_cols));
+ vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,
+ sizeof(*cpi->ssim_vars) * 4));
cpi->worst_consistency = 100.0;
+ } else {
+ cpi->ssim_vars = NULL;
}
#endif
@@ -2141,6 +2281,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
#ifdef OUTPUT_YUV_REC
yuv_rec_file = fopen("rec.yuv", "wb");
#endif
+#ifdef OUTPUT_YUV_SVC_SRC
+ yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb");
+ yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb");
+ yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb");
+#endif
#if 0
framepsnr = fopen("framepsnr.stt", "a");
@@ -2219,6 +2364,11 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
vp9_set_speed_features_framesize_independent(cpi);
vp9_set_speed_features_framesize_dependent(cpi);
+#if CONFIG_NON_GREEDY_MV
+ cpi->feature_score_loc_alloc = 0;
+#endif // CONFIG_NON_GREEDY_MV
+ for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
+
// Allocate memory to store variances for a frame.
CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
cpi->source_var_thresh = 0;
@@ -2293,6 +2443,17 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
vp9_loop_filter_init(cm);
+ // Set up the unit scaling factor used during motion search.
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
+ cm->width, cm->height,
+ cm->use_highbitdepth);
+#else
+ vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height,
+ cm->width, cm->height);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ cpi->td.mb.me_sf = &cpi->me_sf;
+
cm->error.setjmp = 0;
return cpi;
@@ -2307,11 +2468,15 @@ VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
void vp9_remove_compressor(VP9_COMP *cpi) {
VP9_COMMON *cm;
- unsigned int i;
+ unsigned int i, frame;
int t;
if (!cpi) return;
+#if CONFIG_INTERNAL_STATS
+ vpx_free(cpi->ssim_vars);
+#endif
+
cm = &cpi->common;
if (cm->current_video_frame > 0) {
#if CONFIG_INTERNAL_STATS
@@ -2383,7 +2548,6 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
fclose(f);
}
-
#endif
#if 0
@@ -2402,6 +2566,16 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
vp9_denoiser_free(&(cpi->denoiser));
#endif
+#if CONFIG_NON_GREEDY_MV
+ vpx_free(cpi->feature_score_loc_arr);
+ vpx_free(cpi->feature_score_loc_sort);
+ vpx_free(cpi->feature_score_loc_heap);
+#endif
+ for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
+ vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
+ cpi->tpl_stats[frame].is_valid = 0;
+ }
+
for (t = 0; t < cpi->num_workers; ++t) {
VPxWorker *const worker = &cpi->workers[t];
EncWorkerData *const thread_data = &cpi->tile_thr_data[t];
@@ -2459,6 +2633,11 @@ void vp9_remove_compressor(VP9_COMP *cpi) {
#ifdef OUTPUT_YUV_REC
fclose(yuv_rec_file);
#endif
+#ifdef OUTPUT_YUV_SVC_SRC
+ fclose(yuv_svc_src[0]);
+ fclose(yuv_svc_src[1]);
+ fclose(yuv_svc_src[2]);
+#endif
#if 0
@@ -2754,11 +2933,14 @@ static int big_rate_miss(VP9_COMP *cpi) {
// test in two pass for the first
static int two_pass_first_group_inter(VP9_COMP *cpi) {
- TWO_PASS *const twopass = &cpi->twopass;
- GF_GROUP *const gf_group = &twopass->gf_group;
- if ((cpi->oxcf.pass == 2) &&
- (gf_group->index == gf_group->first_inter_index)) {
- return 1;
+ if (cpi->oxcf.pass == 2) {
+ TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
+ const int gfg_index = gf_group->index;
+
+ if (gfg_index == 0) return gf_group->update_type[gfg_index] == LF_UPDATE;
+ return gf_group->update_type[gfg_index - 1] != LF_UPDATE &&
+ gf_group->update_type[gfg_index] == LF_UPDATE;
} else {
return 0;
}
@@ -2808,9 +2990,18 @@ static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q,
return force_recode;
}
-void vp9_update_reference_frames(VP9_COMP *cpi) {
+static void update_ref_frames(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
BufferPool *const pool = cm->buffer_pool;
+ GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+
+ // Pop ARF.
+ if (cm->show_existing_frame) {
+ cpi->lst_fb_idx = cpi->alt_fb_idx;
+ cpi->alt_fb_idx =
+ stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
+ --gf_group->stack_size;
+ }
// At this point the new frame has been encoded.
// If any buffer copy / swapping is signaled it should be done here.
@@ -2836,23 +3027,23 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
tmp = cpi->alt_fb_idx;
cpi->alt_fb_idx = cpi->gld_fb_idx;
cpi->gld_fb_idx = tmp;
-
- if (is_two_pass_svc(cpi)) {
- cpi->svc.layer_context[0].gold_ref_idx = cpi->gld_fb_idx;
- cpi->svc.layer_context[0].alt_ref_idx = cpi->alt_fb_idx;
- }
} else { /* For non key/golden frames */
if (cpi->refresh_alt_ref_frame) {
- int arf_idx = cpi->alt_fb_idx;
- if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- arf_idx = gf_group->arf_update_idx[gf_group->index];
- }
+ int arf_idx = gf_group->top_arf_idx;
+
+ // Push new ARF into stack.
+ stack_push(gf_group->arf_index_stack, cpi->alt_fb_idx,
+ gf_group->stack_size);
+ ++gf_group->stack_size;
+
+ assert(arf_idx < REF_FRAMES);
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx);
memcpy(cpi->interp_filter_selected[ALTREF_FRAME],
cpi->interp_filter_selected[0],
sizeof(cpi->interp_filter_selected[0]));
+
+ cpi->alt_fb_idx = arf_idx;
}
if (cpi->refresh_golden_frame) {
@@ -2877,69 +3068,39 @@ void vp9_update_reference_frames(VP9_COMP *cpi) {
cpi->interp_filter_selected[0],
sizeof(cpi->interp_filter_selected[0]));
}
-#if CONFIG_VP9_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
- cpi->denoiser.denoising_level > kDenLowLow) {
- int svc_base_is_key = 0;
- int denoise_svc_second_layer = 0;
- if (cpi->use_svc) {
- int realloc_fail = 0;
- const int svc_buf_shift =
- cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2
- ? cpi->denoiser.num_ref_frames
- : 0;
- int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
- cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
- svc_base_is_key = lc->is_key_frame;
- denoise_svc_second_layer =
- cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? 1
- : 0;
- // Check if we need to allocate extra buffers in the denoiser
- // for
- // refreshed frames.
- realloc_fail = vp9_denoiser_realloc_svc(
- cm, &cpi->denoiser, svc_buf_shift, cpi->refresh_alt_ref_frame,
- cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx,
- cpi->gld_fb_idx, cpi->lst_fb_idx);
- if (realloc_fail)
- vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
- "Failed to re-allocate denoiser for SVC");
- }
- vp9_denoiser_update_frame_info(
- &cpi->denoiser, *cpi->Source, cpi->common.frame_type,
- cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame,
- cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx,
- cpi->lst_fb_idx, cpi->resize_pending, svc_base_is_key,
- denoise_svc_second_layer);
+
+ if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) {
+ cpi->alt_fb_idx =
+ stack_pop(gf_group->arf_index_stack, gf_group->stack_size);
+ --gf_group->stack_size;
}
+}
+
+void vp9_update_reference_frames(VP9_COMP *cpi) {
+ update_ref_frames(cpi);
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ vp9_denoiser_update_ref_frame(cpi);
#endif
- if (is_one_pass_cbr_svc(cpi)) {
- // Keep track of frame index for each reference frame.
- SVC *const svc = &cpi->svc;
- if (cm->frame_type == KEY_FRAME) {
- svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
- svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
- svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
- } else {
- if (cpi->refresh_last_frame)
- svc->ref_frame_index[cpi->lst_fb_idx] = svc->current_superframe;
- if (cpi->refresh_golden_frame)
- svc->ref_frame_index[cpi->gld_fb_idx] = svc->current_superframe;
- if (cpi->refresh_alt_ref_frame)
- svc->ref_frame_index[cpi->alt_fb_idx] = svc->current_superframe;
- }
- }
+
+ if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi);
}
static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) {
MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
struct loopfilter *lf = &cm->lf;
-
- const int is_reference_frame =
+ int is_reference_frame =
(cm->frame_type == KEY_FRAME || cpi->refresh_last_frame ||
cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
+ if (cpi->use_svc &&
+ cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS)
+ is_reference_frame = !cpi->svc.non_reference_frame;
+
+ // Skip loop filter in show_existing_frame mode.
+ if (cm->show_existing_frame) {
+ lf->filter_level = 0;
+ return;
+ }
if (xd->lossless) {
lf->filter_level = 0;
@@ -3066,8 +3227,8 @@ void vp9_scale_references(VP9_COMP *cpi) {
if (cpi->oxcf.pass == 0 && !cpi->use_svc) {
// Check for release of scaled reference.
buf_idx = cpi->scaled_ref_idx[ref_frame - 1];
- buf = (buf_idx != INVALID_IDX) ? &pool->frame_bufs[buf_idx] : NULL;
- if (buf != NULL) {
+ if (buf_idx != INVALID_IDX) {
+ buf = &pool->frame_bufs[buf_idx];
--buf->ref_count;
cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX;
}
@@ -3098,22 +3259,21 @@ static void release_scaled_references(VP9_COMP *cpi) {
refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0;
for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
const int idx = cpi->scaled_ref_idx[i - 1];
- RefCntBuffer *const buf =
- idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL;
- const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
- if (buf != NULL &&
- (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
- buf->buf.y_crop_height == ref->y_crop_height))) {
- --buf->ref_count;
- cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
+ if (idx != INVALID_IDX) {
+ RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
+ const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i);
+ if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width &&
+ buf->buf.y_crop_height == ref->y_crop_height)) {
+ --buf->ref_count;
+ cpi->scaled_ref_idx[i - 1] = INVALID_IDX;
+ }
}
}
} else {
- for (i = 0; i < MAX_REF_FRAMES; ++i) {
+ for (i = 0; i < REFS_PER_FRAME; ++i) {
const int idx = cpi->scaled_ref_idx[i];
- RefCntBuffer *const buf =
- idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[idx] : NULL;
- if (buf != NULL) {
+ if (idx != INVALID_IDX) {
+ RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx];
--buf->ref_count;
cpi->scaled_ref_idx[i] = INVALID_IDX;
}
@@ -3172,11 +3332,9 @@ static void output_frame_level_debug_stats(VP9_COMP *cpi) {
case VPX_BITS_10:
dc_quant_devisor = 16.0;
break;
- case VPX_BITS_12:
- dc_quant_devisor = 64.0;
- break;
default:
- assert(0 && "bit_depth must be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
+ assert(cm->bit_depth == VPX_BITS_12);
+ dc_quant_devisor = 64.0;
break;
}
#else
@@ -3308,6 +3466,11 @@ static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index,
// Decide q and q bounds.
*q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index);
+ if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) {
+ *q = cpi->rc.worst_quality;
+ cpi->rc.force_max_q = 0;
+ }
+
if (!frame_is_intra_only(cm)) {
vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
}
@@ -3415,9 +3578,7 @@ static void set_frame_size(VP9_COMP *cpi) {
#endif
}
- if ((oxcf->pass == 2) &&
- (!cpi->use_svc || (is_two_pass_svc(cpi) &&
- cpi->svc.encode_empty_frame_state != ENCODING))) {
+ if ((oxcf->pass == 2) && !cpi->use_svc) {
vp9_set_target_rate(cpi);
}
@@ -3464,19 +3625,75 @@ static void set_frame_size(VP9_COMP *cpi) {
set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME);
}
-static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
- uint8_t *dest) {
+#if CONFIG_CONSISTENT_RECODE
+static void save_encode_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
- int q = 0, bottom_index = 0, top_index = 0; // Dummy variables.
+ const int tile_cols = 1 << cm->log2_tile_cols;
+ const int tile_rows = 1 << cm->log2_tile_rows;
+ int tile_col, tile_row;
+ int i, j;
+ RD_OPT *rd_opt = &cpi->rd;
+ for (i = 0; i < MAX_REF_FRAMES; i++) {
+ for (j = 0; j < REFERENCE_MODES; j++)
+ rd_opt->prediction_type_threshes_prev[i][j] =
+ rd_opt->prediction_type_threshes[i][j];
+
+ for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++)
+ rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j];
+ }
+
+ if (cpi->tile_data != NULL) {
+ for (tile_row = 0; tile_row < tile_rows; ++tile_row)
+ for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
+ TileDataEnc *tile_data =
+ &cpi->tile_data[tile_row * tile_cols + tile_col];
+ for (i = 0; i < BLOCK_SIZES; ++i) {
+ for (j = 0; j < MAX_MODES; ++j) {
+ tile_data->thresh_freq_fact_prev[i][j] =
+ tile_data->thresh_freq_fact[i][j];
+ }
+ }
+ }
+ }
+}
+#endif
+
+static INLINE void set_raw_source_frame(VP9_COMP *cpi) {
+#ifdef ENABLE_KF_DENOISE
+ if (is_spatial_denoise_enabled(cpi)) {
+ cpi->raw_source_frame = vp9_scale_if_required(
+ cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source,
+ (oxcf->pass == 0), EIGHTTAP, 0);
+ } else {
+ cpi->raw_source_frame = cpi->Source;
+ }
+#else
+ cpi->raw_source_frame = cpi->Source;
+#endif
+}
+
+static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
+ uint8_t *dest) {
+ VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
+ int q = 0, bottom_index = 0, top_index = 0;
+ int no_drop_scene_change = 0;
const INTERP_FILTER filter_scaler =
(is_one_pass_cbr_svc(cpi))
- ? cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id]
+ ? svc->downsample_filter_type[svc->spatial_layer_id]
: EIGHTTAP;
const int phase_scaler =
(is_one_pass_cbr_svc(cpi))
- ? cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id]
+ ? svc->downsample_filter_phase[svc->spatial_layer_id]
: 0;
+ if (cm->show_existing_frame) {
+ if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
+ return 1;
+ }
+
+ svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe;
+
// Flag to check if its valid to compute the source sad (used for
// scene detection and for superblock content state in CBR mode).
// The flag may get reset below based on SVC or resizing state.
@@ -3489,30 +3706,36 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
if (is_one_pass_cbr_svc(cpi) &&
cpi->un_scaled_source->y_width == cm->width << 2 &&
cpi->un_scaled_source->y_height == cm->height << 2 &&
- cpi->svc.scaled_temp.y_width == cm->width << 1 &&
- cpi->svc.scaled_temp.y_height == cm->height << 1) {
+ svc->scaled_temp.y_width == cm->width << 1 &&
+ svc->scaled_temp.y_height == cm->height << 1) {
// For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take
// advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2
// result will be saved in scaled_temp and might be used later.
- const INTERP_FILTER filter_scaler2 = cpi->svc.downsample_filter_type[1];
- const int phase_scaler2 = cpi->svc.downsample_filter_phase[1];
+ const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1];
+ const int phase_scaler2 = svc->downsample_filter_phase[1];
cpi->Source = vp9_svc_twostage_scale(
- cm, cpi->un_scaled_source, &cpi->scaled_source, &cpi->svc.scaled_temp,
+ cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp,
filter_scaler, phase_scaler, filter_scaler2, phase_scaler2);
- cpi->svc.scaled_one_half = 1;
+ svc->scaled_one_half = 1;
} else if (is_one_pass_cbr_svc(cpi) &&
cpi->un_scaled_source->y_width == cm->width << 1 &&
cpi->un_scaled_source->y_height == cm->height << 1 &&
- cpi->svc.scaled_one_half) {
+ svc->scaled_one_half) {
// If the spatial layer is 1/2x1/2 and the scaling is already done in the
// two-stage scaling, use the result directly.
- cpi->Source = &cpi->svc.scaled_temp;
- cpi->svc.scaled_one_half = 0;
+ cpi->Source = &svc->scaled_temp;
+ svc->scaled_one_half = 0;
} else {
cpi->Source = vp9_scale_if_required(
cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0),
filter_scaler, phase_scaler);
}
+#ifdef OUTPUT_YUV_SVC_SRC
+ // Write out at most 3 spatial layers.
+ if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) {
+ vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source);
+ }
+#endif
// Unfiltered raw source used in metrics calculation if the source
// has been filtered.
if (is_psnr_calc_enabled(cpi)) {
@@ -3530,9 +3753,9 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
}
if ((cpi->use_svc &&
- (cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1 ||
- cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1 ||
- cpi->svc.current_superframe < 1)) ||
+ (svc->spatial_layer_id < svc->number_spatial_layers - 1 ||
+ svc->temporal_layer_id < svc->number_temporal_layers - 1 ||
+ svc->current_superframe < 1)) ||
cpi->resize_pending || cpi->resize_state || cpi->external_resize ||
cpi->resize_state != ORIG) {
cpi->compute_source_sad_onepass = 0;
@@ -3562,53 +3785,101 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
cpi->Last_Source->y_height != cpi->Source->y_height)
cpi->compute_source_sad_onepass = 0;
- if (cm->frame_type == KEY_FRAME || cpi->resize_pending != 0) {
+ if (frame_is_intra_only(cm) || cpi->resize_pending != 0) {
memset(cpi->consec_zero_mv, 0,
cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
}
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc)
+ vp9_denoiser_reset_on_first_frame(cpi);
+#endif
vp9_update_noise_estimate(cpi);
// Scene detection is always used for VBR mode or screen-content case.
// For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now
// (need to check encoding time cost for doing this for speed 8).
cpi->rc.high_source_sad = 0;
- if (cpi->compute_source_sad_onepass && cm->show_frame &&
+ cpi->rc.hybrid_intra_scene_change = 0;
+ cpi->rc.re_encode_maxq_scene_change = 0;
+ if (cm->show_frame && cpi->oxcf.mode == REALTIME &&
(cpi->oxcf.rc_mode == VPX_VBR ||
cpi->oxcf.content == VP9E_CONTENT_SCREEN ||
- (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8 && !cpi->use_svc)))
+ (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8)))
vp9_scene_detection_onepass(cpi);
+ if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) {
+ svc->high_source_sad_superframe = cpi->rc.high_source_sad;
+ svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion;
+ // On scene change reset temporal layer pattern to TL0.
+ // Note that if the base/lower spatial layers are skipped: instead of
+ // inserting base layer here, we force max-q for the next superframe
+ // with lower spatial layers: this is done in vp9_encodedframe_overshoot()
+ // when max-q is decided for the current layer.
+ // Only do this reset for bypass/flexible mode.
+ if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 &&
+ svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ // rc->high_source_sad will get reset so copy it to restore it.
+ int tmp_high_source_sad = cpi->rc.high_source_sad;
+ vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME);
+ cpi->rc.high_source_sad = tmp_high_source_sad;
+ }
+ }
+
+ // For 1 pass CBR, check if we are dropping this frame.
+ // Never drop on key frame, if base layer is key for svc,
+ // on scene change, or if superframe has layer sync.
+ if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) &&
+ !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0]))
+ no_drop_scene_change = 1;
+ if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
+ !frame_is_intra_only(cm) && !no_drop_scene_change &&
+ !svc->superframe_has_layer_sync &&
+ (!cpi->use_svc ||
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
+ if (vp9_rc_drop_frame(cpi)) return 0;
+ }
+
// For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame
// when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can
// avoid this frame-level upsampling (for non intra_only frames).
if (frame_is_intra_only(cm) == 0 &&
- !(is_one_pass_cbr_svc(cpi) && cpi->svc.force_zero_mode_spatial_ref)) {
+ !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) {
vp9_scale_references(cpi);
}
set_size_independent_vars(cpi);
set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
+ // search method and step parameter might be changed in speed settings.
+ init_motion_estimation(cpi);
+
if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi);
if (cpi->sf.svc_use_lowres_part &&
- cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) {
- if (cpi->svc.prev_partition_svc == NULL) {
+ svc->spatial_layer_id == svc->number_spatial_layers - 2) {
+ if (svc->prev_partition_svc == NULL) {
CHECK_MEM_ERROR(
- cm, cpi->svc.prev_partition_svc,
+ cm, svc->prev_partition_svc,
(BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows,
- sizeof(*cpi->svc.prev_partition_svc)));
+ sizeof(*svc->prev_partition_svc)));
}
}
- if (cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
+ // TODO(jianj): Look into issue of skin detection with high bitdepth.
+ if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 &&
cpi->oxcf.rc_mode == VPX_CBR &&
cpi->oxcf.content != VP9E_CONTENT_SCREEN &&
cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
cpi->use_skin_detection = 1;
}
+ // Enable post encode frame dropping for CBR on non key frame, when
+ // ext_use_post_encode_drop is specified by user.
+ cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop &&
+ cpi->oxcf.rc_mode == VPX_CBR &&
+ cm->frame_type != KEY_FRAME;
+
vp9_set_quantizer(cm, q);
vp9_set_variance_partition_thresholds(cpi, q, 0);
@@ -3616,6 +3887,33 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
suppress_active_map(cpi);
+ if (cpi->use_svc) {
+ // On non-zero spatial layer, check for disabling inter-layer
+ // prediction.
+ if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi);
+ vp9_svc_assert_constraints_pattern(cpi);
+ }
+
+ if (cpi->rc.last_post_encode_dropped_scene_change) {
+ cpi->rc.high_source_sad = 1;
+ svc->high_source_sad_superframe = 1;
+ // For now disable use_source_sad since Last_Source will not be the previous
+ // encoded but the dropped one.
+ cpi->sf.use_source_sad = 0;
+ cpi->rc.last_post_encode_dropped_scene_change = 0;
+ }
+ // Check if this high_source_sad (scene/slide change) frame should be
+ // encoded at high/max QP, and if so, set the q and adjust some rate
+ // control parameters.
+ if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ &&
+ (cpi->rc.high_source_sad ||
+ (cpi->use_svc && svc->high_source_sad_superframe))) {
+ if (vp9_encodedframe_overshoot(cpi, -1, &q)) {
+ vp9_set_quantizer(cm, q);
+ vp9_set_variance_partition_thresholds(cpi, q, 0);
+ }
+ }
+
// Variance adaptive and in frame q adjustment experiments are mutually
// exclusive.
if (cpi->oxcf.aq_mode == VARIANCE_AQ) {
@@ -3630,18 +3928,21 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// it may be pretty bad for rate-control,
// and I should handle it somehow
vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
+ } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) {
+ apply_roi_map(cpi);
}
apply_active_map(cpi);
vp9_encode_frame(cpi);
- // Check if we should drop this frame because of high overshoot.
- // Only for frames where high temporal-source SAD is detected.
- if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR &&
- cpi->resize_state == ORIG && cm->frame_type != KEY_FRAME &&
- cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
- cpi->rc.high_source_sad == 1) {
+ // Check if we should re-encode this frame at high Q because of high
+ // overshoot based on the encoded frame size. Only for frames where
+ // high temporal-source SAD is detected.
+ // For SVC: all spatial layers are checked for re-encoding.
+ if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
+ (cpi->rc.high_source_sad ||
+ (cpi->use_svc && svc->high_source_sad_superframe))) {
int frame_size = 0;
// Get an estimate of the encoded frame size.
save_coding_context(cpi);
@@ -3657,8 +3958,12 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
suppress_active_map(cpi);
// Turn-off cyclic refresh for re-encoded frame.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
unsigned char *const seg_map = cpi->segmentation_map;
memset(seg_map, 0, cm->mi_rows * cm->mi_cols);
+ memset(cr->last_coded_q_map, MAXQ,
+ cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map));
+ cr->sb_index = 0;
vp9_disable_segmentation(&cm->seg);
}
apply_active_map(cpi);
@@ -3668,13 +3973,14 @@ static void encode_without_recode_loop(VP9_COMP *cpi, size_t *size,
// Update some stats from cyclic refresh, and check for golden frame update.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
- cm->frame_type != KEY_FRAME)
+ !frame_is_intra_only(cm))
vp9_cyclic_refresh_postencode(cpi);
// Update the skip mb flag probabilities based on the distribution
// seen in the last encoder iteration.
// update_base_skip_probs(cpi);
vpx_clear_system_state();
+ return 1;
}
#define MAX_QSTEP_ADJ 4
@@ -3703,11 +4009,16 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
int qrange_adj = 1;
#endif
+ if (cm->show_existing_frame) {
+ if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
+ return;
+ }
+
set_size_independent_vars(cpi);
- enable_acl = cpi->sf.allow_acl
- ? (cm->frame_type == KEY_FRAME) || (cm->show_frame == 0)
- : 0;
+ enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) ||
+ (cpi->twopass.gf_group.index == 1)
+ : 0;
do {
vpx_clear_system_state();
@@ -3796,6 +4107,8 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
vp9_setup_in_frame_q_adj(cpi);
} else if (oxcf->aq_mode == LOOKAHEAD_AQ) {
vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi);
+ } else if (oxcf->aq_mode == PSNR_AQ) {
+ vp9_psnr_aq_mode_setup(&cm->seg);
}
vp9_encode_frame(cpi);
@@ -3900,8 +4213,9 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
// Special case if the projected size is > the max allowed.
if ((q == q_high) &&
((rc->projected_frame_size >= rc->max_frame_bandwidth) ||
- (rc->projected_frame_size >=
- big_rate_miss_high_threshold(cpi)))) {
+ (!rc->is_src_frame_alt_ref &&
+ (rc->projected_frame_size >=
+ big_rate_miss_high_threshold(cpi))))) {
int max_rate = VPXMAX(1, VPXMIN(rc->max_frame_bandwidth,
big_rate_miss_high_threshold(cpi)));
double q_val_high;
@@ -4006,7 +4320,7 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
#endif
// Have we been forced to adapt Q outside the expected range by an extreme
// rate miss. If so adjust the active maxQ for the subsequent frames.
- if (q > cpi->twopass.active_worst_quality) {
+ if (!rc->is_src_frame_alt_ref && (q > cpi->twopass.active_worst_quality)) {
cpi->twopass.active_worst_quality = q;
} else if (oxcf->vbr_corpus_complexity && q == q_low &&
rc->projected_frame_size < rc->this_frame_target) {
@@ -4028,12 +4342,6 @@ static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
vp9_encode_frame(cpi);
vpx_clear_system_state();
restore_coding_context(cpi);
- vp9_pack_bitstream(cpi, dest, size);
-
- vp9_encode_frame(cpi);
- vpx_clear_system_state();
-
- restore_coding_context(cpi);
}
}
@@ -4131,20 +4439,21 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(
}
}
-static void set_arf_sign_bias(VP9_COMP *cpi) {
+static void set_ref_sign_bias(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
- int arf_sign_bias;
+ RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
+ const int cur_frame_index = ref_buffer->frame_index;
+ MV_REFERENCE_FRAME ref_frame;
- if ((cpi->oxcf.pass == 2) && cpi->multi_arf_allowed) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- arf_sign_bias = cpi->rc.source_alt_ref_active &&
- (!cpi->refresh_alt_ref_frame ||
- (gf_group->rf_level[gf_group->index] == GF_ARF_LOW));
- } else {
- arf_sign_bias =
- (cpi->rc.source_alt_ref_active && !cpi->refresh_alt_ref_frame);
+ for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) {
+ const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ const RefCntBuffer *const ref_cnt_buf =
+ get_ref_cnt_buffer(&cpi->common, buf_idx);
+ if (ref_cnt_buf) {
+ cm->ref_frame_sign_bias[ref_frame] =
+ cur_frame_index < ref_cnt_buf->frame_index;
+ }
}
- cm->ref_frame_sign_bias[ALTREF_FRAME] = arf_sign_bias;
}
static int setup_interp_filter_search_mask(VP9_COMP *cpi) {
@@ -4352,6 +4661,16 @@ static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size,
}
}
+static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) {
+ RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx);
+
+ if (ref_buffer) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ ref_buffer->frame_index =
+ cm->current_video_frame + gf_group->arf_src_offset[gf_group->index];
+ }
+}
+
static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
uint8_t *dest,
unsigned int *frame_flags) {
@@ -4360,6 +4679,34 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
struct segmentation *const seg = &cm->seg;
TX_SIZE t;
+ // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0.
+ // If in constrained layer drop mode (svc.framedrop_mode != LAYER_DROP) and
+ // base spatial layer was dropped, no need to set svc.skip_enhancement_layer,
+ // as whole superframe will be dropped.
+ if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
+ cpi->oxcf.target_bandwidth == 0 &&
+ !(cpi->svc.framedrop_mode != LAYER_DROP &&
+ cpi->svc.drop_spatial_layer[0])) {
+ cpi->svc.skip_enhancement_layer = 1;
+ vp9_rc_postencode_update_drop_frame(cpi);
+ cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->last_frame_dropped = 1;
+ cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1;
+ cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1;
+ if (cpi->svc.framedrop_mode == LAYER_DROP ||
+ cpi->svc.drop_spatial_layer[0] == 0) {
+ // For the case of constrained drop mode where the base is dropped
+ // (drop_spatial_layer[0] == 1), which means full superframe dropped,
+ // we don't increment the svc frame counters. In particular temporal
+ // layer counter (which is incremented in vp9_inc_frame_in_layer())
+ // won't be incremented, so on a dropped frame we try the same
+ // temporal_layer_id on next incoming frame. This is to avoid an
+ // issue with temporal alignement with full superframe dropping.
+ vp9_inc_frame_in_layer(cpi);
+ }
+ return;
+ }
+
set_ext_overrides(cpi);
vpx_clear_system_state();
@@ -4368,8 +4715,13 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
if (is_spatial_denoise_enabled(cpi)) spatial_denoise_frame(cpi);
#endif
- // Set the arf sign bias for this frame.
- set_arf_sign_bias(cpi);
+ if (cm->show_existing_frame == 0) {
+ // Update frame index
+ set_frame_index(cpi, cm);
+
+ // Set the arf sign bias for this frame.
+ set_ref_sign_bias(cpi);
+ }
// Set default state for segment based loop filter update flags.
cm->lf.mode_ref_delta_update = 0;
@@ -4404,67 +4756,6 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cm->reset_frame_context = 2;
}
}
- if (is_two_pass_svc(cpi) && cm->error_resilient_mode == 0) {
- // Use context 0 for intra only empty frame, but the last frame context
- // for other empty frames.
- if (cpi->svc.encode_empty_frame_state == ENCODING) {
- if (cpi->svc.encode_intra_empty_frame != 0)
- cm->frame_context_idx = 0;
- else
- cm->frame_context_idx = FRAME_CONTEXTS - 1;
- } else {
- cm->frame_context_idx =
- cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id;
- }
-
- cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode;
-
- // The probs will be updated based on the frame type of its previous
- // frame if frame_parallel_decoding_mode is 0. The type may vary for
- // the frame after a key frame in base layer since we may drop enhancement
- // layers. So set frame_parallel_decoding_mode to 1 in this case.
- if (cm->frame_parallel_decoding_mode == 0) {
- if (cpi->svc.number_temporal_layers == 1) {
- if (cpi->svc.spatial_layer_id == 0 &&
- cpi->svc.layer_context[0].last_frame_type == KEY_FRAME)
- cm->frame_parallel_decoding_mode = 1;
- } else if (cpi->svc.spatial_layer_id == 0) {
- // Find the 2nd frame in temporal base layer and 1st frame in temporal
- // enhancement layers from the key frame.
- int i;
- for (i = 0; i < cpi->svc.number_temporal_layers; ++i) {
- if (cpi->svc.layer_context[0].frames_from_key_frame == 1 << i) {
- cm->frame_parallel_decoding_mode = 1;
- break;
- }
- }
- }
- }
- }
-
- // For 1 pass CBR, check if we are dropping this frame.
- // For spatial layers, for now only check for frame-dropping on first spatial
- // layer, and if decision is to drop, we drop whole super-frame.
- if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR &&
- cm->frame_type != KEY_FRAME) {
- if (vp9_rc_drop_frame(cpi) ||
- (is_one_pass_cbr_svc(cpi) && cpi->svc.rc_drop_superframe == 1)) {
- vp9_rc_postencode_update_drop_frame(cpi);
- ++cm->current_video_frame;
- cpi->ext_refresh_frame_flags_pending = 0;
- cpi->svc.rc_drop_superframe = 1;
- cpi->last_frame_dropped = 1;
- // TODO(marpan): Advancing the svc counters on dropped frames can break
- // the referencing scheme for the fixed svc patterns defined in
- // vp9_one_pass_cbr_svc_start_layer(). Look into fixing this issue, but
- // for now, don't advance the svc frame counters on dropped frame.
- // if (cpi->use_svc)
- // vp9_inc_frame_in_layer(cpi);
-
- return;
- }
- }
vpx_clear_system_state();
@@ -4472,14 +4763,25 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
memset(cpi->mode_chosen_counts, 0,
MAX_MODES * sizeof(*cpi->mode_chosen_counts));
#endif
+#if CONFIG_CONSISTENT_RECODE
+ // Backup to ensure consistency between recodes
+ save_encode_params(cpi);
+#endif
if (cpi->sf.recode_loop == DISALLOW_RECODE) {
- encode_without_recode_loop(cpi, size, dest);
+ if (!encode_without_recode_loop(cpi, size, dest)) return;
} else {
encode_with_recode_loop(cpi, size, dest);
}
- cpi->last_frame_dropped = 0;
+ // TODO(jingning): When using show existing frame mode, we assume that the
+ // current ARF will be directly used as the final reconstructed frame. This is
+ // an encoder control scheme. One could in principle explore other
+ // possibilities to arrange the reference frame buffer and their coding order.
+ if (cm->show_existing_frame) {
+ ref_cnt_fb(cm->buffer_pool->frame_bufs, &cm->new_fb_idx,
+ cm->ref_frame_map[cpi->alt_fb_idx]);
+ }
// Disable segmentation if it decrease rate/distortion ratio
if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
@@ -4527,9 +4829,33 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
// Pick the loop filter level for the frame.
loopfilter_frame(cpi, cm);
+ if (cpi->rc.use_post_encode_drop) save_coding_context(cpi);
+
// build the bitstream
vp9_pack_bitstream(cpi, dest, size);
+ if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality &&
+ cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) {
+ restore_coding_context(cpi);
+ return;
+ }
+
+ cpi->last_frame_dropped = 0;
+ cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0;
+ if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)
+ cpi->svc.num_encoded_top_layer++;
+
+ // Keep track of the frame buffer index updated/refreshed for the
+ // current encoded TL0 superframe.
+ if (cpi->svc.temporal_layer_id == 0) {
+ if (cpi->refresh_last_frame)
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx;
+ else if (cpi->refresh_golden_frame)
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx;
+ else if (cpi->refresh_alt_ref_frame)
+ cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx;
+ }
+
if (cm->seg.update_map) update_reference_segmentation_map(cpi);
if (frame_is_intra_only(cm) == 0) {
@@ -4537,17 +4863,18 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
}
vp9_update_reference_frames(cpi);
- for (t = TX_4X4; t <= TX_32X32; t++)
- full_to_model_counts(cpi->td.counts->coef[t],
- cpi->td.rd_counts.coef_counts[t]);
-
- if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode)
- vp9_adapt_coef_probs(cm);
+ if (!cm->show_existing_frame) {
+ for (t = TX_4X4; t <= TX_32X32; ++t) {
+ full_to_model_counts(cpi->td.counts->coef[t],
+ cpi->td.rd_counts.coef_counts[t]);
+ }
- if (!frame_is_intra_only(cm)) {
if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) {
- vp9_adapt_mode_probs(cm);
- vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
+ if (!frame_is_intra_only(cm)) {
+ vp9_adapt_mode_probs(cm);
+ vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv);
+ }
+ vp9_adapt_coef_probs(cm);
}
}
@@ -4567,8 +4894,9 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cm->last_frame_type = cm->frame_type;
- if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
- vp9_rc_postencode_update(cpi, *size);
+ vp9_rc_postencode_update(cpi, *size);
+
+ *size = VPXMAX(1, *size);
#if 0
output_frame_level_debug_stats(cpi);
@@ -4592,7 +4920,10 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
cm->last_height = cm->height;
// reset to normal state now that we are done.
- if (!cm->show_existing_frame) cm->last_show_frame = cm->show_frame;
+ if (!cm->show_existing_frame) {
+ cm->last_show_frame = cm->show_frame;
+ cm->prev_frame = cm->cur_frame;
+ }
if (cm->show_frame) {
vp9_swap_mi_and_prev_mi(cm);
@@ -4601,19 +4932,24 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size,
++cm->current_video_frame;
if (cpi->use_svc) vp9_inc_frame_in_layer(cpi);
}
- cm->prev_frame = cm->cur_frame;
- if (cpi->use_svc)
+ if (cpi->use_svc) {
cpi->svc
.layer_context[cpi->svc.spatial_layer_id *
cpi->svc.number_temporal_layers +
cpi->svc.temporal_layer_id]
.last_frame_type = cm->frame_type;
+ // Reset layer_sync back to 0 for next frame.
+ cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0;
+ }
cpi->force_update_segmentation = 0;
if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ)
vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi);
+
+ cpi->svc.previous_frame_is_intra_only = cm->intra_only;
+ cpi->svc.set_intra_only_frame = 0;
}
static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
@@ -4638,8 +4974,7 @@ static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest,
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
encode_frame_to_data_rate(cpi, size, dest, frame_flags);
- if (!(is_two_pass_svc(cpi) && cpi->svc.encode_empty_frame_state == ENCODING))
- vp9_twopass_postencode_update(cpi);
+ vp9_twopass_postencode_update(cpi);
}
#endif // !CONFIG_REALTIME_ONLY
@@ -4649,6 +4984,8 @@ static void init_ref_frame_bufs(VP9_COMMON *cm) {
cm->new_fb_idx = INVALID_IDX;
for (i = 0; i < REF_FRAMES; ++i) {
cm->ref_frame_map[i] = INVALID_IDX;
+ }
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
pool->frame_bufs[i].ref_count = 0;
}
}
@@ -4702,6 +5039,12 @@ int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
check_initial_width(cpi, subsampling_x, subsampling_y);
#endif // CONFIG_VP9_HIGHBITDEPTH
+#if CONFIG_VP9_HIGHBITDEPTH
+ // Disable denoiser for high bitdepth since vp9_denoiser_filter only works for
+ // 8 bits.
+ if (cm->bit_depth > 8) cpi->oxcf.noise_sensitivity = 0;
+#endif
+
#if CONFIG_VP9_TEMPORAL_DENOISING
setup_denoiser_buffer(cpi);
#endif
@@ -4822,10 +5165,6 @@ static void check_src_altref(VP9_COMP *cpi,
}
#if CONFIG_INTERNAL_STATS
-extern double vp9_get_blockiness(const uint8_t *img1, int img1_pitch,
- const uint8_t *img2, int img2_pitch, int width,
- int height);
-
static void adjust_image_stat(double y, double u, double v, double all,
ImageStat *s) {
s->stat[Y] += y;
@@ -5065,6 +5404,1114 @@ static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) {
}
}
+typedef struct GF_PICTURE {
+ YV12_BUFFER_CONFIG *frame;
+ int ref_frame[3];
+ FRAME_UPDATE_TYPE update_type;
+} GF_PICTURE;
+
+static void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture,
+ const GF_GROUP *gf_group, int *tpl_group_frames) {
+ VP9_COMMON *cm = &cpi->common;
+ int frame_idx = 0;
+ int i;
+ int gld_index = -1;
+ int alt_index = -1;
+ int lst_index = -1;
+ int arf_index_stack[MAX_ARF_LAYERS];
+ int arf_stack_size = 0;
+ int extend_frame_count = 0;
+ int pframe_qindex = cpi->tpl_stats[2].base_qindex;
+ int frame_gop_offset = 0;
+
+ RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs;
+ int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS];
+
+ memset(recon_frame_index, -1, sizeof(recon_frame_index));
+ stack_init(arf_index_stack, MAX_ARF_LAYERS);
+
+ // TODO(jingning): To be used later for gf frame type parsing.
+ (void)gf_group;
+
+ for (i = 0; i < FRAME_BUFFERS; ++i) {
+ if (frame_bufs[i].ref_count == 0) {
+ alloc_frame_mvs(cm, i);
+ if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height,
+ cm->subsampling_x, cm->subsampling_y,
+#if CONFIG_VP9_HIGHBITDEPTH
+ cm->use_highbitdepth,
+#endif
+ VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment,
+ NULL, NULL, NULL))
+ vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR,
+ "Failed to allocate frame buffer");
+
+ recon_frame_index[frame_idx] = i;
+ ++frame_idx;
+
+ if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break;
+ }
+ }
+
+ for (i = 0; i < REFS_PER_FRAME + 1; ++i) {
+ assert(recon_frame_index[i] >= 0);
+ cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf;
+ }
+
+ *tpl_group_frames = 0;
+
+ // Initialize Golden reference frame.
+ gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
+ for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1;
+ gf_picture[0].update_type = gf_group->update_type[0];
+ gld_index = 0;
+ ++*tpl_group_frames;
+
+ // Initialize base layer ARF frame
+ gf_picture[1].frame = cpi->Source;
+ gf_picture[1].ref_frame[0] = gld_index;
+ gf_picture[1].ref_frame[1] = lst_index;
+ gf_picture[1].ref_frame[2] = alt_index;
+ gf_picture[1].update_type = gf_group->update_type[1];
+ alt_index = 1;
+ ++*tpl_group_frames;
+
+ // Initialize P frames
+ for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
+ struct lookahead_entry *buf;
+ frame_gop_offset = gf_group->frame_gop_index[frame_idx];
+ buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
+
+ if (buf == NULL) break;
+
+ gf_picture[frame_idx].frame = &buf->img;
+ gf_picture[frame_idx].ref_frame[0] = gld_index;
+ gf_picture[frame_idx].ref_frame[1] = lst_index;
+ gf_picture[frame_idx].ref_frame[2] = alt_index;
+ gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx];
+
+ switch (gf_group->update_type[frame_idx]) {
+ case ARF_UPDATE:
+ stack_push(arf_index_stack, alt_index, arf_stack_size);
+ ++arf_stack_size;
+ alt_index = frame_idx;
+ break;
+ case LF_UPDATE: lst_index = frame_idx; break;
+ case OVERLAY_UPDATE:
+ gld_index = frame_idx;
+ alt_index = stack_pop(arf_index_stack, arf_stack_size);
+ --arf_stack_size;
+ break;
+ case USE_BUF_FRAME:
+ lst_index = alt_index;
+ alt_index = stack_pop(arf_index_stack, arf_stack_size);
+ --arf_stack_size;
+ break;
+ default: break;
+ }
+
+ ++*tpl_group_frames;
+
+ // The length of group of pictures is baseline_gf_interval, plus the
+ // beginning golden frame from last GOP, plus the last overlay frame in
+ // the same GOP.
+ if (frame_idx == gf_group->gf_group_size) break;
+ }
+
+ alt_index = -1;
+ ++frame_idx;
+ ++frame_gop_offset;
+
+ // Extend two frames outside the current gf group.
+ for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) {
+ struct lookahead_entry *buf =
+ vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1);
+
+ if (buf == NULL) break;
+
+ cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex;
+
+ gf_picture[frame_idx].frame = &buf->img;
+ gf_picture[frame_idx].ref_frame[0] = gld_index;
+ gf_picture[frame_idx].ref_frame[1] = lst_index;
+ gf_picture[frame_idx].ref_frame[2] = alt_index;
+ gf_picture[frame_idx].update_type = LF_UPDATE;
+ lst_index = frame_idx;
+ ++*tpl_group_frames;
+ ++extend_frame_count;
+ ++frame_gop_offset;
+ }
+}
+
+static void init_tpl_stats(VP9_COMP *cpi) {
+ int frame_idx;
+ for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+#if CONFIG_NON_GREEDY_MV
+ int rf_idx;
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ tpl_frame->mv_dist_sum[rf_idx] = 0;
+ tpl_frame->mv_cost_sum[rf_idx] = 0;
+ }
+#endif
+ memset(tpl_frame->tpl_stats_ptr, 0,
+ tpl_frame->height * tpl_frame->width *
+ sizeof(*tpl_frame->tpl_stats_ptr));
+ tpl_frame->is_valid = 0;
+ }
+}
+
+#if CONFIG_NON_GREEDY_MV
+static uint32_t motion_compensated_prediction(
+ VP9_COMP *cpi, ThreadData *td, int frame_idx, uint8_t *cur_frame_buf,
+ uint8_t *ref_frame_buf, int stride, BLOCK_SIZE bsize, int mi_row,
+ int mi_col, MV *mv, int rf_idx, double *mv_dist, double *mv_cost) {
+#else // CONFIG_NON_GREEDY_MV
+static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td,
+ int frame_idx,
+ uint8_t *cur_frame_buf,
+ uint8_t *ref_frame_buf,
+ int stride, BLOCK_SIZE bsize,
+ int mi_row, int mi_col, MV *mv) {
+#endif // CONFIG_NON_GREEDY_MV
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
+ const SEARCH_METHODS search_method = NSTEP;
+ int step_param;
+ int sadpb = x->sadperbit16;
+ uint32_t bestsme = UINT_MAX;
+ uint32_t distortion;
+ uint32_t sse;
+ int cost_list[5];
+ const MvLimits tmp_mv_limits = x->mv_limits;
+#if CONFIG_NON_GREEDY_MV
+ // lambda is used to adjust the importance of motion vector consitency.
+ // TODO(angiebird): Figure out lambda's proper value.
+ double lambda = cpi->tpl_stats[frame_idx].lambda;
+ int_mv nb_full_mvs[NB_MVS_NUM];
+#endif
+
+ MV best_ref_mv1 = { 0, 0 };
+ MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */
+
+ best_ref_mv1_full.col = best_ref_mv1.col >> 3;
+ best_ref_mv1_full.row = best_ref_mv1.row >> 3;
+
+ // Setup frame pointers
+ x->plane[0].src.buf = cur_frame_buf;
+ x->plane[0].src.stride = stride;
+ xd->plane[0].pre[0].buf = ref_frame_buf;
+ xd->plane[0].pre[0].stride = stride;
+
+ step_param = mv_sf->reduce_first_step_size;
+ step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2);
+
+ vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
+
+#if CONFIG_NON_GREEDY_MV
+ (void)search_method;
+ (void)sadpb;
+ vp9_prepare_nb_full_mvs(&cpi->tpl_stats[frame_idx], mi_row, mi_col, rf_idx,
+ bsize, nb_full_mvs);
+ vp9_full_pixel_diamond_new(cpi, x, &best_ref_mv1_full, step_param, lambda, 1,
+ &cpi->fn_ptr[bsize], nb_full_mvs, NB_MVS_NUM, mv,
+ mv_dist, mv_cost);
+#else
+ (void)frame_idx;
+ (void)mi_row;
+ (void)mi_col;
+ vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param,
+ search_method, sadpb, cond_cost_list(cpi, cost_list),
+ &best_ref_mv1, mv, 0, 0);
+#endif
+
+ /* restore UMV window */
+ x->mv_limits = tmp_mv_limits;
+
+ // TODO(yunqing): may use higher tap interp filter than 2 taps.
+ // Ignore mv costing by sending NULL pointer instead of cost array
+ bestsme = cpi->find_fractional_mv_step(
+ x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level,
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0,
+ USE_2_TAPS);
+
+ return bestsme;
+}
+
+static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
+ int ref_pos_col, int block, BLOCK_SIZE bsize) {
+ int width = 0, height = 0;
+ int bw = 4 << b_width_log2_lookup[bsize];
+ int bh = 4 << b_height_log2_lookup[bsize];
+
+ switch (block) {
+ case 0:
+ width = grid_pos_col + bw - ref_pos_col;
+ height = grid_pos_row + bh - ref_pos_row;
+ break;
+ case 1:
+ width = ref_pos_col + bw - grid_pos_col;
+ height = grid_pos_row + bh - ref_pos_row;
+ break;
+ case 2:
+ width = grid_pos_col + bw - ref_pos_col;
+ height = ref_pos_row + bh - grid_pos_row;
+ break;
+ case 3:
+ width = ref_pos_col + bw - grid_pos_col;
+ height = ref_pos_row + bh - grid_pos_row;
+ break;
+ default: assert(0);
+ }
+
+ return width * height;
+}
+
+static int round_floor(int ref_pos, int bsize_pix) {
+ int round;
+ if (ref_pos < 0)
+ round = -(1 + (-ref_pos - 1) / bsize_pix);
+ else
+ round = ref_pos / bsize_pix;
+
+ return round;
+}
+
+static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, int stride) {
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col];
+ int idx, idy;
+
+ for (idy = 0; idy < mi_height; ++idy) {
+ for (idx = 0; idx < mi_width; ++idx) {
+ TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx];
+ const int64_t mc_flow = tpl_ptr->mc_flow;
+ const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost;
+ *tpl_ptr = *src_stats;
+ tpl_ptr->mc_flow = mc_flow;
+ tpl_ptr->mc_ref_cost = mc_ref_cost;
+ tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow;
+ }
+ }
+}
+
+static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
+ int mi_row, int mi_col, const BLOCK_SIZE bsize) {
+ TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
+ TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr;
+ MV mv = tpl_stats->mv.as_mv;
+ int mv_row = mv.row >> 3;
+ int mv_col = mv.col >> 3;
+
+ int ref_pos_row = mi_row * MI_SIZE + mv_row;
+ int ref_pos_col = mi_col * MI_SIZE + mv_col;
+
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int pix_num = bw * bh;
+
+ // top-left on grid block location in pixel
+ int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh;
+ int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw;
+ int block;
+
+ for (block = 0; block < 4; ++block) {
+ int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
+ int grid_pos_col = grid_pos_col_base + bw * (block & 0x01);
+
+ if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
+ grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
+ int overlap_area = get_overlap_area(
+ grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
+ int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
+ int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
+
+ int64_t mc_flow = tpl_stats->mc_dep_cost -
+ (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
+ tpl_stats->intra_cost;
+
+ int idx, idy;
+
+ for (idy = 0; idy < mi_height; ++idy) {
+ for (idx = 0; idx < mi_width; ++idx) {
+ TplDepStats *des_stats =
+ &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride +
+ (ref_mi_col + idx)];
+
+ des_stats->mc_flow += (mc_flow * overlap_area) / pix_num;
+ des_stats->mc_ref_cost +=
+ ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) /
+ pix_num;
+ assert(overlap_area >= 0);
+ }
+ }
+ }
+ }
+}
+
+static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
+ int mi_row, int mi_col, const BLOCK_SIZE bsize) {
+ int idx, idy;
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+
+ for (idy = 0; idy < mi_height; ++idy) {
+ for (idx = 0; idx < mi_width; ++idx) {
+ TplDepStats *tpl_ptr =
+ &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)];
+ tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx,
+ BLOCK_8X8);
+ }
+ }
+}
+
+static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff,
+ TX_SIZE tx_size, int64_t *recon_error,
+ int64_t *sse) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ const struct macroblock_plane *const p = &x->plane[plane];
+ const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const scan_order *const scan_order = &vp9_default_scan_orders[tx_size];
+ uint16_t eob;
+ int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]];
+ const int shift = tx_size == TX_32X32 ? 0 : 2;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
+ p->quant_fp, qcoeff, dqcoeff, pd->dequant,
+ &eob, scan_order->scan, scan_order->iscan);
+ } else {
+ vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp,
+ p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob,
+ scan_order->scan, scan_order->iscan);
+ }
+#else
+ vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp,
+ qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan,
+ scan_order->iscan);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift;
+ *recon_error = VPXMAX(*recon_error, 1);
+
+ *sse = (*sse) >> shift;
+ *sse = VPXMAX(*sse, 1);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+static void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+ TX_SIZE tx_size) {
+ // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms.
+ switch (tx_size) {
+ case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break;
+ case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break;
+ case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break;
+ default: assert(0);
+ }
+}
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+static void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff,
+ TX_SIZE tx_size) {
+ switch (tx_size) {
+ case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break;
+ case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break;
+ case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break;
+ default: assert(0);
+ }
+}
+
+#if CONFIG_NON_GREEDY_MV
+double get_feature_score(uint8_t *buf, ptrdiff_t stride, int rows, int cols) {
+ double IxIx = 0;
+ double IxIy = 0;
+ double IyIy = 0;
+ double score;
+ int r, c;
+ vpx_clear_system_state();
+ for (r = 0; r + 1 < rows; ++r) {
+ for (c = 0; c + 1 < cols; ++c) {
+ int diff_x = buf[r * stride + c] - buf[r * stride + c + 1];
+ int diff_y = buf[r * stride + c] - buf[(r + 1) * stride + c];
+ IxIx += diff_x * diff_x;
+ IxIy += diff_x * diff_y;
+ IyIy += diff_y * diff_y;
+ }
+ }
+ IxIx /= (rows - 1) * (cols - 1);
+ IxIy /= (rows - 1) * (cols - 1);
+ IyIy /= (rows - 1) * (cols - 1);
+ score = (IxIx * IyIy - IxIy * IxIy + 0.0001) / (IxIx + IyIy + 0.0001);
+ return score;
+}
+#endif
+
+static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row,
+ int mi_col) {
+ x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
+ x->mv_limits.row_max =
+ (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND);
+ x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND));
+ x->mv_limits.col_max =
+ ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND);
+}
+
+static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
+ struct scale_factors *sf, GF_PICTURE *gf_picture,
+ int frame_idx, TplDepFrame *tpl_frame,
+ int16_t *src_diff, tran_low_t *coeff,
+ tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row,
+ int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size,
+ YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor,
+ int64_t *recon_error, int64_t *sse) {
+ VP9_COMMON *cm = &cpi->common;
+ ThreadData *td = &cpi->td;
+
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ const int pix_num = bw * bh;
+ int best_rf_idx = -1;
+ int_mv best_mv;
+ int64_t best_inter_cost = INT64_MAX;
+ int64_t inter_cost;
+ int rf_idx;
+ const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP];
+
+ int64_t best_intra_cost = INT64_MAX;
+ int64_t intra_cost;
+ PREDICTION_MODE mode;
+ int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+ MODE_INFO mi_above, mi_left;
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ TplDepStats *tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+
+ xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8);
+ xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8;
+ xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8);
+ xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8;
+ xd->above_mi = (mi_row > 0) ? &mi_above : NULL;
+ xd->left_mi = (mi_col > 0) ? &mi_left : NULL;
+
+ // Intra prediction search
+ for (mode = DC_PRED; mode <= TM_PRED; ++mode) {
+ uint8_t *src, *dst;
+ int src_stride, dst_stride;
+
+ src = xd->cur_buf->y_buffer + mb_y_offset;
+ src_stride = xd->cur_buf->y_stride;
+
+ dst = &predictor[0];
+ dst_stride = bw;
+
+ xd->mi[0]->sb_type = bsize;
+ xd->mi[0]->ref_frame[0] = INTRA_FRAME;
+
+ vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src,
+ src_stride, dst, dst_stride, 0, 0, 0);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
+ dst_stride, xd->bd);
+ highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+ intra_cost = vpx_highbd_satd(coeff, pix_num);
+ } else {
+ vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst,
+ dst_stride);
+ wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+ intra_cost = vpx_satd(coeff, pix_num);
+ }
+#else
+ vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride);
+ wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+ intra_cost = vpx_satd(coeff, pix_num);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ if (intra_cost < best_intra_cost) best_intra_cost = intra_cost;
+ }
+
+ // Motion compensated prediction
+ best_mv.as_int = 0;
+
+ set_mv_limits(cm, x, mi_row, mi_col);
+
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ int_mv mv;
+ if (ref_frame[rf_idx] == NULL) continue;
+
+#if CONFIG_NON_GREEDY_MV
+ (void)td;
+ mv.as_int =
+ get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_int;
+#else
+ motion_compensated_prediction(
+ cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
+ mi_row, mi_col, &mv.as_mv);
+#endif
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_highbd_build_inter_predictor(
+ CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset),
+ ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw,
+ &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE,
+ mi_row * MI_SIZE, xd->bd);
+ vpx_highbd_subtract_block(
+ bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset,
+ xd->cur_buf->y_stride, &predictor[0], bw, xd->bd);
+ highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+ inter_cost = vpx_highbd_satd(coeff, pix_num);
+ } else {
+ vp9_build_inter_predictor(
+ ref_frame[rf_idx]->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh,
+ 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE);
+ vpx_subtract_block(bh, bw, src_diff, bw,
+ xd->cur_buf->y_buffer + mb_y_offset,
+ xd->cur_buf->y_stride, &predictor[0], bw);
+ wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+ inter_cost = vpx_satd(coeff, pix_num);
+ }
+#else
+ vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_stride, &predictor[0], bw,
+ &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3,
+ mi_col * MI_SIZE, mi_row * MI_SIZE);
+ vpx_subtract_block(bh, bw, src_diff, bw,
+ xd->cur_buf->y_buffer + mb_y_offset,
+ xd->cur_buf->y_stride, &predictor[0], bw);
+ wht_fwd_txfm(src_diff, bw, coeff, tx_size);
+ inter_cost = vpx_satd(coeff, pix_num);
+#endif
+
+#if CONFIG_NON_GREEDY_MV
+ tpl_stats->inter_cost_arr[rf_idx] = inter_cost;
+ get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size,
+ &tpl_stats->recon_error_arr[rf_idx],
+ &tpl_stats->sse_arr[rf_idx]);
+#endif
+
+ if (inter_cost < best_inter_cost) {
+ best_rf_idx = rf_idx;
+ best_inter_cost = inter_cost;
+ best_mv.as_int = mv.as_int;
+#if CONFIG_NON_GREEDY_MV
+ *recon_error = tpl_stats->recon_error_arr[rf_idx];
+ *sse = tpl_stats->sse_arr[rf_idx];
+#else
+ get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error,
+ sse);
+#endif
+ }
+ }
+ best_intra_cost = VPXMAX(best_intra_cost, 1);
+ best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost);
+ tpl_stats->inter_cost = VPXMAX(
+ 1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
+ tpl_stats->intra_cost = VPXMAX(
+ 1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width));
+ tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx];
+ tpl_stats->mv.as_int = best_mv.as_int;
+}
+
+#if CONFIG_NON_GREEDY_MV
+static int compare_feature_score(const void *a, const void *b) {
+ const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a;
+ const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b;
+ if (aa->feature_score < bb->feature_score) {
+ return 1;
+ } else if (aa->feature_score > bb->feature_score) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx,
+ YV12_BUFFER_CONFIG **ref_frame, BLOCK_SIZE bsize,
+ int mi_row, int mi_col) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCK *x = &td->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+ TplDepStats *tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ const int mb_y_offset =
+ mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+ int rf_idx;
+
+ set_mv_limits(cm, x, mi_row, mi_col);
+
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ int_mv *mv = get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col);
+ if (ref_frame[rf_idx] == NULL) {
+ tpl_stats->ready[rf_idx] = 0;
+ continue;
+ } else {
+ tpl_stats->ready[rf_idx] = 1;
+ }
+ motion_compensated_prediction(
+ cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
+ mi_row, mi_col, &mv->as_mv, rf_idx, &tpl_stats->mv_dist[rf_idx],
+ &tpl_stats->mv_cost[rf_idx]);
+ }
+}
+
+#define CHANGE_MV_SEARCH_ORDER 1
+#define USE_PQSORT 1
+#define RE_COMPUTE_MV_INCONSISTENCY 1
+
+#if CHANGE_MV_SEARCH_ORDER
+#if USE_PQSORT
+static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size,
+ FEATURE_SCORE_LOC **output) {
+ if (*size > 0) {
+ *output = heap[0];
+ --*size;
+ if (*size > 0) {
+ int p, l, r;
+ heap[0] = heap[*size];
+ p = 0;
+ l = 2 * p + 1;
+ r = 2 * p + 2;
+ while (l < *size) {
+ FEATURE_SCORE_LOC *tmp;
+ int c = l;
+ if (r < *size && heap[r]->feature_score > heap[l]->feature_score) {
+ c = r;
+ }
+ if (heap[p]->feature_score >= heap[c]->feature_score) {
+ break;
+ }
+ tmp = heap[p];
+ heap[p] = heap[c];
+ heap[c] = tmp;
+ p = c;
+ l = 2 * p + 1;
+ r = 2 * p + 2;
+ }
+ }
+ } else {
+ assert(0);
+ }
+}
+
+static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size,
+ FEATURE_SCORE_LOC *input) {
+ int c, p;
+ FEATURE_SCORE_LOC *tmp;
+ heap[*size] = input;
+ ++*size;
+ c = *size - 1;
+ p = c >> 1;
+ while (c > 0 && heap[c]->feature_score > heap[p]->feature_score) {
+ tmp = heap[p];
+ heap[p] = heap[c];
+ heap[c] = tmp;
+ c = p;
+ p >>= 1;
+ }
+}
+
+static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int *heap_size) {
+ const int mi_unit = num_8x8_blocks_wide_lookup[bsize];
+ const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+ int i;
+ for (i = 0; i < NB_MVS_NUM; ++i) {
+ int r = dirs[i][0] * mi_unit;
+ int c = dirs[i][1] * mi_unit;
+ if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
+ mi_col + c < tpl_frame->mi_cols) {
+ FEATURE_SCORE_LOC *fs_loc =
+ &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride +
+ (mi_col + c)];
+ if (fs_loc->visited == 0) {
+ max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc);
+ }
+ }
+ }
+}
+#endif // USE_PQSORT
+#endif // CHANGE_MV_SEARCH_ORDER
+
+static void build_motion_field(VP9_COMP *cpi, MACROBLOCKD *xd, int frame_idx,
+ YV12_BUFFER_CONFIG *ref_frame[3],
+ BLOCK_SIZE bsize) {
+ VP9_COMMON *cm = &cpi->common;
+ ThreadData *td = &cpi->td;
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int fs_loc_sort_size;
+ int fs_loc_heap_size;
+ int mi_row, mi_col;
+
+ tpl_frame->lambda = 250;
+
+ fs_loc_sort_size = 0;
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ const int mb_y_offset =
+ mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+ const int bw = 4 << b_width_log2_lookup[bsize];
+ const int bh = 4 << b_height_log2_lookup[bsize];
+ TplDepStats *tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ FEATURE_SCORE_LOC *fs_loc =
+ &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col];
+ tpl_stats->feature_score = get_feature_score(
+ xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh);
+ fs_loc->visited = 0;
+ fs_loc->feature_score = tpl_stats->feature_score;
+ fs_loc->mi_row = mi_row;
+ fs_loc->mi_col = mi_col;
+ cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc;
+ ++fs_loc_sort_size;
+ }
+ }
+
+ qsort(cpi->feature_score_loc_sort, fs_loc_sort_size,
+ sizeof(*cpi->feature_score_loc_sort), compare_feature_score);
+
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ int rf_idx;
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ TplDepStats *tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ tpl_stats->ready[rf_idx] = 0;
+ }
+ }
+ }
+
+#if CHANGE_MV_SEARCH_ORDER
+#if !USE_PQSORT
+ for (i = 0; i < fs_loc_sort_size; ++i) {
+ FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i];
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
+ fs_loc->mi_col);
+ }
+#else // !USE_PQSORT
+ fs_loc_heap_size = 0;
+ max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size,
+ cpi->feature_score_loc_sort[0]);
+
+ while (fs_loc_heap_size > 0) {
+ FEATURE_SCORE_LOC *fs_loc;
+ max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc);
+
+ fs_loc->visited = 1;
+
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
+ fs_loc->mi_col);
+
+ add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col,
+ &fs_loc_heap_size);
+ }
+#endif // !USE_PQSORT
+#else // CHANGE_MV_SEARCH_ORDER
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col);
+ }
+ }
+#endif // CHANGE_MV_SEARCH_ORDER
+}
+#endif // CONFIG_NON_GREEDY_MV
+
+static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture,
+ int frame_idx, BLOCK_SIZE bsize) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+ YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame;
+ YV12_BUFFER_CONFIG *ref_frame[3] = { NULL, NULL, NULL };
+
+ VP9_COMMON *cm = &cpi->common;
+ struct scale_factors sf;
+ int rdmult, idx;
+ ThreadData *td = &cpi->td;
+ MACROBLOCK *x = &td->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
+ int mi_row, mi_col;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]);
+ DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]);
+ uint8_t *predictor;
+#else
+ DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]);
+#endif
+ DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]);
+ DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]);
+ DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]);
+ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]);
+
+ const TX_SIZE tx_size = max_txsize_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ int64_t recon_error, sse;
+#if CONFIG_NON_GREEDY_MV
+ int square_block_idx;
+#endif
+
+ // Setup scaling factor
+#if CONFIG_VP9_HIGHBITDEPTH
+ vp9_setup_scale_factors_for_frame(
+ &sf, this_frame->y_crop_width, this_frame->y_crop_height,
+ this_frame->y_crop_width, this_frame->y_crop_height,
+ cpi->common.use_highbitdepth);
+
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
+ predictor = CONVERT_TO_BYTEPTR(predictor16);
+ else
+ predictor = predictor8;
+#else
+ vp9_setup_scale_factors_for_frame(
+ &sf, this_frame->y_crop_width, this_frame->y_crop_height,
+ this_frame->y_crop_width, this_frame->y_crop_height);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+
+ // Prepare reference frame pointers. If any reference frame slot is
+ // unavailable, the pointer will be set to Null.
+ for (idx = 0; idx < 3; ++idx) {
+ int rf_idx = gf_picture[frame_idx].ref_frame[idx];
+ if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame;
+ }
+
+ xd->mi = cm->mi_grid_visible;
+ xd->mi[0] = cm->mi;
+ xd->cur_buf = this_frame;
+
+ // Get rd multiplier set up.
+ rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex);
+ set_error_per_bit(&cpi->td.mb, rdmult);
+ vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex);
+
+ tpl_frame->is_valid = 1;
+
+ cm->base_qindex = tpl_frame->base_qindex;
+ vp9_frame_init_quantizer(cpi);
+
+#if CONFIG_NON_GREEDY_MV
+ for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES;
+ ++square_block_idx) {
+ BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx);
+ build_motion_field(cpi, xd, frame_idx, ref_frame, square_bsize);
+ }
+#endif
+
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
+ src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize,
+ tx_size, ref_frame, predictor, &recon_error, &sse);
+ // Motion flow dependency dispenser.
+ tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize,
+ tpl_frame->stride);
+
+ tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col,
+ bsize);
+#if CONFIG_NON_GREEDY_MV
+ {
+ int rf_idx;
+ TplDepStats *this_tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+#if RE_COMPUTE_MV_INCONSISTENCY
+ MV this_mv =
+ get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_mv;
+ MV full_mv;
+ int_mv nb_full_mvs[NB_MVS_NUM];
+ vp9_prepare_nb_full_mvs(tpl_frame, mi_row, mi_col, rf_idx, bsize,
+ nb_full_mvs);
+ full_mv.row = this_mv.row >> 3;
+ full_mv.col = this_mv.col >> 3;
+ this_tpl_stats->mv_cost[rf_idx] =
+ vp9_nb_mvs_inconsistency(&full_mv, nb_full_mvs, NB_MVS_NUM);
+#endif // RE_COMPUTE_MV_INCONSISTENCY
+ tpl_frame->mv_dist_sum[rf_idx] += this_tpl_stats->mv_dist[rf_idx];
+ tpl_frame->mv_cost_sum[rf_idx] += this_tpl_stats->mv_cost[rf_idx];
+ }
+ }
+#endif // CONFIG_NON_GREEDY_MV
+ }
+ }
+}
+
+#if CONFIG_NON_GREEDY_MV
+#define DUMP_TPL_STATS 0
+#if DUMP_TPL_STATS
+static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) {
+ printf("%d %d\n", h, w);
+ for (int i = 0; i < h; ++i) {
+ for (int j = 0; j < w; ++j) {
+ printf("%d ", buf[(row + i) * stride + col + j]);
+ }
+ }
+ printf("\n");
+}
+
+static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) {
+ dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height,
+ frame_buf->y_width);
+ dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0,
+ frame_buf->uv_height, frame_buf->uv_width);
+ dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0,
+ frame_buf->uv_height, frame_buf->uv_width);
+}
+
+static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames,
+ const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) {
+ int frame_idx;
+ const VP9_COMMON *cm = &cpi->common;
+ for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) {
+ const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+ int idx = 0;
+ int mi_row, mi_col;
+ int rf_idx;
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ printf("=\n");
+ printf("frame_idx %d mi_rows %d mi_cols %d bsize %d\n", frame_idx,
+ cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE);
+ for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
+ for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
+ if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
+ const TplDepStats *tpl_ptr =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ int_mv mv = *get_pyramid_mv(tpl_frame, idx, bsize, mi_row, mi_col);
+ printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row, mv.as_mv.col);
+ }
+ }
+ }
+
+ dump_frame_buf(gf_picture[frame_idx].frame);
+
+ for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) {
+ for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
+ if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {
+ const TplDepStats *tpl_ptr =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ printf("%f ", tpl_ptr->feature_score);
+ }
+ }
+ }
+ printf("\n");
+
+ rf_idx = gf_picture[frame_idx].ref_frame[idx];
+ printf("has_ref %d\n", rf_idx != -1);
+ if (rf_idx != -1) {
+ YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[rf_idx].frame;
+ dump_frame_buf(ref_frame_buf);
+ }
+ }
+}
+#endif // DUMP_TPL_STATS
+#endif // CONFIG_NON_GREEDY_MV
+
+static void init_tpl_buffer(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int frame;
+
+ const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+#if CONFIG_NON_GREEDY_MV
+ int sqr_bsize;
+ int rf_idx;
+
+ // TODO(angiebird): This probably needs further modifications to support
+ // frame scaling later on.
+ if (cpi->feature_score_loc_alloc == 0) {
+ // The smallest block size of motion field is 4x4, but the mi_unit is 8x8,
+ // therefore the number of units is "mi_rows * mi_cols * 4" here.
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_arr,
+ vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->feature_score_loc_arr)));
+ CHECK_MEM_ERROR(cm, cpi->feature_score_loc_sort,
+ vpx_calloc(mi_rows * mi_cols * 4,
+ sizeof(*cpi->feature_score_loc_sort)));
+ CHECK_MEM_ERROR(cm, cpi->feature_score_loc_heap,
+ vpx_calloc(mi_rows * mi_cols * 4,
+ sizeof(*cpi->feature_score_loc_heap)));
+
+ cpi->feature_score_loc_alloc = 1;
+ }
+#endif
+
+ // TODO(jingning): Reduce the actual memory use for tpl model build up.
+ for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
+ if (cpi->tpl_stats[frame].width >= mi_cols &&
+ cpi->tpl_stats[frame].height >= mi_rows &&
+ cpi->tpl_stats[frame].tpl_stats_ptr)
+ continue;
+
+#if CONFIG_NON_GREEDY_MV
+ vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr);
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
+ CHECK_MEM_ERROR(
+ cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],
+ vpx_calloc(
+ mi_rows * mi_cols,
+ sizeof(
+ *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize])));
+ }
+ }
+#endif
+ vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
+ CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
+ vpx_calloc(mi_rows * mi_cols,
+ sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
+ cpi->tpl_stats[frame].is_valid = 0;
+ cpi->tpl_stats[frame].width = mi_cols;
+ cpi->tpl_stats[frame].height = mi_rows;
+ cpi->tpl_stats[frame].stride = mi_cols;
+ cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
+ cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
+ }
+
+ for (frame = 0; frame < REF_FRAMES; ++frame) {
+ cpi->enc_frame_buf[frame].mem_valid = 0;
+ cpi->enc_frame_buf[frame].released = 1;
+ }
+}
+
+static void setup_tpl_stats(VP9_COMP *cpi) {
+ GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
+ const GF_GROUP *gf_group = &cpi->twopass.gf_group;
+ int tpl_group_frames = 0;
+ int frame_idx;
+ const BLOCK_SIZE bsize = BLOCK_32X32;
+
+ init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames);
+
+ init_tpl_stats(cpi);
+
+ // Backward propagation from tpl_group_frames to 1.
+ for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) {
+ if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue;
+ mc_flow_dispenser(cpi, gf_picture, frame_idx, bsize);
+ }
+#if CONFIG_NON_GREEDY_MV
+#if DUMP_TPL_STATS
+ dump_tpl_stats(cpi, tpl_group_frames, gf_picture, bsize);
+#endif // DUMP_TPL_STATS
+#endif // CONFIG_NON_GREEDY_MV
+}
+
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest, int64_t *time_stamp,
int64_t *time_end, int flush) {
@@ -5077,17 +6524,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
struct lookahead_entry *last_source = NULL;
struct lookahead_entry *source = NULL;
int arf_src_index;
+ const int gf_group_index = cpi->twopass.gf_group.index;
int i;
- if (is_two_pass_svc(cpi)) {
-#if CONFIG_SPATIAL_SVC
- vp9_svc_start_frame(cpi);
- // Use a small empty frame instead of a real frame
- if (cpi->svc.encode_empty_frame_state == ENCODING)
- source = &cpi->svc.empty_frame;
-#endif
- if (oxcf->pass == 2) vp9_restore_layer_context(cpi);
- } else if (is_one_pass_cbr_svc(cpi)) {
+ if (is_one_pass_cbr_svc(cpi)) {
vp9_one_pass_cbr_svc_start_layer(cpi);
}
@@ -5098,10 +6538,12 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// Is multi-arf enabled.
// Note that at the moment multi_arf is only configured for 2 pass VBR and
// will not work properly with svc.
- if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf > 1))
- cpi->multi_arf_allowed = 1;
+ // Enable the Jingning's new "multi_layer_arf" code if "enable_auto_arf"
+ // is greater than or equal to 2.
+ if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf >= 2))
+ cpi->multi_layer_arf = 1;
else
- cpi->multi_arf_allowed = 0;
+ cpi->multi_layer_arf = 0;
// Normal defaults
cm->reset_frame_context = 0;
@@ -5115,9 +6557,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// Should we encode an arf frame.
arf_src_index = get_arf_src_index(cpi);
- // Skip alt frame if we encode the empty frame
- if (is_two_pass_svc(cpi) && source != NULL) arf_src_index = 0;
-
if (arf_src_index) {
for (i = 0; i <= arf_src_index; ++i) {
struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i);
@@ -5132,25 +6571,17 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
}
+ // Clear arf index stack before group of pictures processing starts.
+ if (gf_group_index == 1) {
+ stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2);
+ cpi->twopass.gf_group.stack_size = 0;
+ }
+
if (arf_src_index) {
assert(arf_src_index <= rc->frames_to_key);
-
if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) {
cpi->alt_ref_source = source;
-#if CONFIG_SPATIAL_SVC
- if (is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0) {
- int i;
- // Reference a hidden frame from a lower layer
- for (i = cpi->svc.spatial_layer_id - 1; i >= 0; --i) {
- if (oxcf->ss_enable_auto_arf[i]) {
- cpi->gld_fb_idx = cpi->svc.layer_context[i].alt_ref_idx;
- break;
- }
- }
- }
- cpi->svc.layer_context[cpi->svc.spatial_layer_id].has_alt_frame = 1;
-#endif
#if !CONFIG_REALTIME_ONLY
if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) &&
(oxcf->arnr_strength > 0)) {
@@ -5192,7 +6623,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
// Read in the source frame.
- if (cpi->use_svc)
+ if (cpi->use_svc || cpi->svc.set_intra_only_frame)
source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush);
else
source = vp9_lookahead_pop(cpi->lookahead, flush);
@@ -5202,8 +6633,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cm->intra_only = 0;
// if the flags indicate intra frame, but if the current picture is for
// non-zero spatial layer, it should not be an intra picture.
- if ((source->flags & VPX_EFLAG_FORCE_KF) &&
- cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) {
+ if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc &&
+ cpi->svc.spatial_layer_id > 0) {
source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF);
}
@@ -5227,7 +6658,6 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
*time_stamp = source->ts_start;
*time_end = source->ts_end;
*frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0;
-
} else {
*size = 0;
#if !CONFIG_REALTIME_ONLY
@@ -5249,7 +6679,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
// adjust frame rates based on timestamps given
if (cm->show_frame) {
- adjust_frame_rate(cpi, source);
+ if (cpi->use_svc && cpi->svc.use_set_ref_frame_config &&
+ cpi->svc.duration[cpi->svc.spatial_layer_id] > 0)
+ vp9_svc_adjust_frame_rate(cpi);
+ else
+ adjust_frame_rate(cpi, source);
}
if (is_one_pass_cbr_svc(cpi)) {
@@ -5268,24 +6702,13 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
- if (!cpi->use_svc && cpi->multi_arf_allowed) {
- if (cm->frame_type == KEY_FRAME) {
- init_buffer_indices(cpi);
- } else if (oxcf->pass == 2) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- cpi->alt_fb_idx = gf_group->arf_ref_idx[gf_group->index];
- }
- }
-
// Start with a 0 size frame.
*size = 0;
cpi->frame_flags = *frame_flags;
#if !CONFIG_REALTIME_ONLY
- if ((oxcf->pass == 2) &&
- (!cpi->use_svc || (is_two_pass_svc(cpi) &&
- cpi->svc.encode_empty_frame_state != ENCODING))) {
+ if ((oxcf->pass == 2) && !cpi->use_svc) {
vp9_rc_get_second_pass_params(cpi);
} else if (oxcf->pass == 1) {
set_frame_size(cpi);
@@ -5297,7 +6720,15 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
level_rc_framerate(cpi, arf_src_index);
if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) {
- for (i = 0; i < MAX_REF_FRAMES; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
+ for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX;
+ }
+
+ if (gf_group_index == 1 &&
+ cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
+ cpi->sf.enable_tpl_model) {
+ init_tpl_buffer(cpi);
+ vp9_estimate_qp_gop(cpi);
+ setup_tpl_stats(cpi);
}
cpi->td.mb.fp_src_pred = 0;
@@ -5309,7 +6740,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
Pass0Encode(cpi, size, dest, frame_flags);
}
#else // !CONFIG_REALTIME_ONLY
- if (oxcf->pass == 1 && (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ if (oxcf->pass == 1 && !cpi->use_svc) {
const int lossless = is_lossless_requested(oxcf);
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->oxcf.use_highbitdepth)
@@ -5324,7 +6755,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
#endif // CONFIG_VP9_HIGHBITDEPTH
cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
vp9_first_pass(cpi, source);
- } else if (oxcf->pass == 2 && (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ } else if (oxcf->pass == 2 && !cpi->use_svc) {
Pass2Encode(cpi, size, dest, frame_flags);
} else if (cpi->use_svc) {
SvcEncode(cpi, size, dest, frame_flags);
@@ -5334,6 +6765,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
}
#endif // CONFIG_REALTIME_ONLY
+ if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx;
+
if (cm->refresh_frame_context)
cm->frame_contexts[cm->frame_context_idx] = *cm->fc;
@@ -5416,7 +6849,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
ppflags.post_proc_flag = VP9D_DEBLOCK;
ppflags.deblocking_level = 0; // not used in vp9_post_proc_frame()
ppflags.noise_level = 0; // not used in vp9_post_proc_frame()
- vp9_post_proc_frame(cm, pp, &ppflags);
+ vp9_post_proc_frame(cm, pp, &ppflags,
+ cpi->un_scaled_source->y_width);
}
#endif
vpx_clear_system_state();
@@ -5462,11 +6896,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
cpi->summedp_quality += frame_ssim2 * weight;
cpi->summedp_weights += weight;
#if 0
- {
+ if (cm->show_frame) {
FILE *f = fopen("q_used.stt", "a");
fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n",
- cpi->common.current_video_frame, y2, u2, v2,
- frame_psnr2, frame_ssim2);
+ cpi->common.current_video_frame, psnr2.psnr[1],
+ psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2);
fclose(f);
}
#endif
@@ -5525,21 +6959,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
#endif
- if (is_two_pass_svc(cpi)) {
- if (cpi->svc.encode_empty_frame_state == ENCODING) {
- cpi->svc.encode_empty_frame_state = ENCODED;
- cpi->svc.encode_intra_empty_frame = 0;
- }
-
- if (cm->show_frame) {
- ++cpi->svc.spatial_layer_to_encode;
- if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
- cpi->svc.spatial_layer_to_encode = 0;
-
- // May need the empty frame after an visible frame.
- cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE;
- }
- } else if (is_one_pass_cbr_svc(cpi)) {
+ if (is_one_pass_cbr_svc(cpi)) {
if (cm->show_frame) {
++cpi->svc.spatial_layer_to_encode;
if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers)
@@ -5563,7 +6983,7 @@ int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
} else {
int ret;
#if CONFIG_VP9_POSTPROC
- ret = vp9_post_proc_frame(cm, dest, flags);
+ ret = vp9_post_proc_frame(cm, dest, flags, cpi->un_scaled_source->y_width);
#else
if (cm->frame_to_show) {
*dest = *cm->frame_to_show;
diff --git a/libvpx/vp9/encoder/vp9_encoder.h b/libvpx/vp9/encoder/vp9_encoder.h
index d723d93cb..18adfebfe 100644
--- a/libvpx/vp9/encoder/vp9_encoder.h
+++ b/libvpx/vp9/encoder/vp9_encoder.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_ENCODER_H_
-#define VP9_ENCODER_VP9_ENCODER_H_
+#ifndef VPX_VP9_ENCODER_VP9_ENCODER_H_
+#define VPX_VP9_ENCODER_VP9_ENCODER_H_
#include <stdio.h>
@@ -119,9 +119,10 @@ typedef enum {
COMPLEXITY_AQ = 2,
CYCLIC_REFRESH_AQ = 3,
EQUATOR360_AQ = 4,
+ PSNR_AQ = 5,
// AQ based on lookahead temporal
// variance (only valid for altref frames)
- LOOKAHEAD_AQ = 5,
+ LOOKAHEAD_AQ = 6,
AQ_MODE_COUNT // This should always be the last member of the enum
} AQ_MODE;
@@ -248,6 +249,8 @@ typedef struct VP9EncoderConfig {
int tile_columns;
int tile_rows;
+ int enable_tpl_model;
+
int max_threads;
unsigned int target_level;
@@ -278,11 +281,102 @@ static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
}
+typedef struct TplDepStats {
+ int64_t intra_cost;
+ int64_t inter_cost;
+ int64_t mc_flow;
+ int64_t mc_dep_cost;
+ int64_t mc_ref_cost;
+
+ int ref_frame_index;
+ int_mv mv;
+
+#if CONFIG_NON_GREEDY_MV
+ int ready[3];
+ double mv_dist[3];
+ double mv_cost[3];
+ int64_t inter_cost_arr[3];
+ int64_t recon_error_arr[3];
+ int64_t sse_arr[3];
+ double feature_score;
+#endif
+} TplDepStats;
+
+#if CONFIG_NON_GREEDY_MV
+#define SQUARE_BLOCK_SIZES 4
+#endif
+
+typedef struct TplDepFrame {
+ uint8_t is_valid;
+ TplDepStats *tpl_stats_ptr;
+ int stride;
+ int width;
+ int height;
+ int mi_rows;
+ int mi_cols;
+ int base_qindex;
+#if CONFIG_NON_GREEDY_MV
+ double lambda;
+ double mv_dist_sum[3];
+ double mv_cost_sum[3];
+ int_mv *pyramid_mv_arr[3][SQUARE_BLOCK_SIZES];
+#endif
+} TplDepFrame;
+
+#if CONFIG_NON_GREEDY_MV
+static INLINE int get_square_block_idx(BLOCK_SIZE bsize) {
+ if (bsize == BLOCK_4X4) {
+ return 0;
+ }
+ if (bsize == BLOCK_8X8) {
+ return 1;
+ }
+ if (bsize == BLOCK_16X16) {
+ return 2;
+ }
+ if (bsize == BLOCK_32X32) {
+ return 3;
+ }
+ printf("ERROR: non-square block size\n");
+ assert(0);
+ return -1;
+}
+
+static INLINE BLOCK_SIZE square_block_idx_to_bsize(int square_block_idx) {
+ if (square_block_idx == 0) {
+ return BLOCK_4X4;
+ }
+ if (square_block_idx == 1) {
+ return BLOCK_8X8;
+ }
+ if (square_block_idx == 2) {
+ return BLOCK_16X16;
+ }
+ if (square_block_idx == 3) {
+ return BLOCK_32X32;
+ }
+ printf("ERROR: invalid square_block_idx\n");
+ assert(0);
+ return BLOCK_INVALID;
+}
+
+static INLINE int_mv *get_pyramid_mv(const TplDepFrame *tpl_frame, int rf_idx,
+ BLOCK_SIZE bsize, int mi_row, int mi_col) {
+ return &tpl_frame->pyramid_mv_arr[rf_idx][get_square_block_idx(bsize)]
+ [mi_row * tpl_frame->stride + mi_col];
+}
+#endif
+
+#define TPL_DEP_COST_SCALE_LOG2 4
+
// TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
typedef struct TileDataEnc {
TileInfo tile_info;
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
- int mode_map[BLOCK_SIZES][MAX_MODES];
+#if CONFIG_CONSISTENT_RECODE
+ int thresh_freq_fact_prev[BLOCK_SIZES][MAX_MODES];
+#endif
+ int8_t mode_map[BLOCK_SIZES][MAX_MODES];
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
@@ -450,6 +544,23 @@ typedef struct ARNRFilterData {
struct scale_factors sf;
} ARNRFilterData;
+typedef struct EncFrameBuf {
+ int mem_valid;
+ int released;
+ YV12_BUFFER_CONFIG frame;
+} EncFrameBuf;
+
+// Maximum operating frame buffer size needed for a GOP using ARF reference.
+#define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS)
+#if CONFIG_NON_GREEDY_MV
+typedef struct FEATURE_SCORE_LOC {
+ int visited;
+ double feature_score;
+ int mi_row;
+ int mi_col;
+} FEATURE_SCORE_LOC;
+#endif
+
typedef struct VP9_COMP {
QUANTS quants;
ThreadData td;
@@ -473,17 +584,29 @@ typedef struct VP9_COMP {
#endif
YV12_BUFFER_CONFIG *raw_source_frame;
+ TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE];
+ YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES];
+ EncFrameBuf enc_frame_buf[REF_FRAMES];
+#if CONFIG_NON_GREEDY_MV
+ int feature_score_loc_alloc;
+ FEATURE_SCORE_LOC *feature_score_loc_arr;
+ FEATURE_SCORE_LOC **feature_score_loc_sort;
+ FEATURE_SCORE_LOC **feature_score_loc_heap;
+#endif
+
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.
// For a still frame, this flag is set to 1 to skip partition search.
int partition_search_skippable_frame;
- int scaled_ref_idx[MAX_REF_FRAMES];
+ int scaled_ref_idx[REFS_PER_FRAME];
int lst_fb_idx;
int gld_fb_idx;
int alt_fb_idx;
+ int ref_fb_idx[REF_FRAMES];
+
int refresh_last_frame;
int refresh_golden_frame;
int refresh_alt_ref_frame;
@@ -499,7 +622,6 @@ typedef struct VP9_COMP {
YV12_BUFFER_CONFIG last_frame_uf;
TOKENEXTRA *tile_tok[4][1 << 6];
- uint32_t tok_count[4][1 << 6];
TOKENLIST *tplist[4][1 << 6];
// Ambient reconstruction err target for force key frames
@@ -521,7 +643,7 @@ typedef struct VP9_COMP {
RATE_CONTROL rc;
double framerate;
- int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE];
+ int interp_filter_selected[REF_FRAMES][SWITCHABLE];
struct vpx_codec_pkt_list *output_pkt_list;
@@ -555,6 +677,7 @@ typedef struct VP9_COMP {
ActiveMap active_map;
fractional_mv_step_fp *find_fractional_mv_step;
+ struct scale_factors me_sf;
vp9_diamond_search_fn_t diamond_search_sad;
vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
uint64_t time_receive_data;
@@ -645,10 +768,8 @@ typedef struct VP9_COMP {
int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
-
- int multi_arf_allowed;
- int multi_arf_enabled;
- int multi_arf_last_grp_enabled;
+ // Indices are: max_tx_size-1, tx_size_ctx, tx_size
+ int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES];
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_DENOISER denoiser;
@@ -723,6 +844,9 @@ typedef struct VP9_COMP {
uint8_t *count_arf_frame_usage;
uint8_t *count_lastgolden_frame_usage;
+
+ int multi_layer_arf;
+ vpx_roi_map_t roi;
} VP9_COMP;
void vp9_initialize_enc(void);
@@ -737,7 +861,7 @@ void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf);
// frame is made and not just a copy of the pointer..
int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
- int64_t end_time_stamp);
+ int64_t end_time);
int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
size_t *size, uint8_t *dest, int64_t *time_stamp,
@@ -758,9 +882,11 @@ int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
int vp9_update_entropy(VP9_COMP *cpi, int update);
-int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
+int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
+ int cols);
-int vp9_get_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
+int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows,
+ int cols);
int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode,
VPX_SCALING vert_mode);
@@ -770,6 +896,27 @@ int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
void vp9_set_svc(VP9_COMP *cpi, int use_svc);
+static INLINE int stack_pop(int *stack, int stack_size) {
+ int idx;
+ const int r = stack[0];
+ for (idx = 1; idx < stack_size; ++idx) stack[idx - 1] = stack[idx];
+
+ return r;
+}
+
+static INLINE int stack_top(const int *stack) { return stack[0]; }
+
+static INLINE void stack_push(int *stack, int new_item, int stack_size) {
+ int idx;
+ for (idx = stack_size; idx > 0; --idx) stack[idx] = stack[idx - 1];
+ stack[0] = new_item;
+}
+
+static INLINE void stack_init(int *stack, int length) {
+ int idx;
+ for (idx = 0; idx < length; ++idx) stack[idx] = -1;
+}
+
int vp9_get_quantizer(struct VP9_COMP *cpi);
static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) {
@@ -795,6 +942,10 @@ static INLINE int get_ref_frame_buf_idx(const VP9_COMP *const cpi,
return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX;
}
+static INLINE RefCntBuffer *get_ref_cnt_buffer(VP9_COMMON *cm, int fb_idx) {
+ return fb_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[fb_idx] : NULL;
+}
+
static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
VP9_COMMON *const cm = &cpi->common;
@@ -858,19 +1009,14 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(
void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
-static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) {
- return cpi->use_svc && cpi->oxcf.pass != 0;
-}
-
static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) {
return (cpi->use_svc && cpi->oxcf.pass == 0);
}
#if CONFIG_VP9_TEMPORAL_DENOISING
static INLINE int denoise_svc(const struct VP9_COMP *const cpi) {
- return (!cpi->use_svc ||
- (cpi->use_svc &&
- cpi->svc.spatial_layer_id >= cpi->svc.first_layer_denoise));
+ return (!cpi->use_svc || (cpi->use_svc && cpi->svc.spatial_layer_id >=
+ cpi->svc.first_layer_denoise));
}
#endif
@@ -878,9 +1024,7 @@ static INLINE int denoise_svc(const struct VP9_COMP *const cpi) {
static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
return !(cpi->oxcf.mode == REALTIME && cpi->oxcf.rc_mode == VPX_CBR) &&
cpi->oxcf.lag_in_frames >= MIN_LOOKAHEAD_FOR_ARFS &&
- (cpi->oxcf.enable_auto_arf &&
- (!is_two_pass_svc(cpi) ||
- cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id]));
+ cpi->oxcf.enable_auto_arf;
}
static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
@@ -938,6 +1082,10 @@ static INLINE int log_tile_cols_from_picsize_level(uint32_t width,
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec);
+int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows,
+ unsigned int cols, int delta_q[8], int delta_lf[8],
+ int skip[8], int ref_frame[8]);
+
void vp9_new_framerate(VP9_COMP *cpi, double framerate);
void vp9_set_row_mt(VP9_COMP *cpi);
@@ -948,4 +1096,4 @@ void vp9_set_row_mt(VP9_COMP *cpi);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_ENCODER_H_
+#endif // VPX_VP9_ENCODER_VP9_ENCODER_H_
diff --git a/libvpx/vp9/encoder/vp9_ethread.c b/libvpx/vp9/encoder/vp9_ethread.c
index 0bd2e2145..e7f8a537d 100644
--- a/libvpx/vp9/encoder/vp9_ethread.c
+++ b/libvpx/vp9/encoder/vp9_ethread.c
@@ -270,19 +270,19 @@ void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm,
{
int i;
- CHECK_MEM_ERROR(cm, row_mt_sync->mutex_,
- vpx_malloc(sizeof(*row_mt_sync->mutex_) * rows));
- if (row_mt_sync->mutex_) {
+ CHECK_MEM_ERROR(cm, row_mt_sync->mutex,
+ vpx_malloc(sizeof(*row_mt_sync->mutex) * rows));
+ if (row_mt_sync->mutex) {
for (i = 0; i < rows; ++i) {
- pthread_mutex_init(&row_mt_sync->mutex_[i], NULL);
+ pthread_mutex_init(&row_mt_sync->mutex[i], NULL);
}
}
- CHECK_MEM_ERROR(cm, row_mt_sync->cond_,
- vpx_malloc(sizeof(*row_mt_sync->cond_) * rows));
- if (row_mt_sync->cond_) {
+ CHECK_MEM_ERROR(cm, row_mt_sync->cond,
+ vpx_malloc(sizeof(*row_mt_sync->cond) * rows));
+ if (row_mt_sync->cond) {
for (i = 0; i < rows; ++i) {
- pthread_cond_init(&row_mt_sync->cond_[i], NULL);
+ pthread_cond_init(&row_mt_sync->cond[i], NULL);
}
}
}
@@ -301,17 +301,17 @@ void vp9_row_mt_sync_mem_dealloc(VP9RowMTSync *row_mt_sync) {
#if CONFIG_MULTITHREAD
int i;
- if (row_mt_sync->mutex_ != NULL) {
+ if (row_mt_sync->mutex != NULL) {
for (i = 0; i < row_mt_sync->rows; ++i) {
- pthread_mutex_destroy(&row_mt_sync->mutex_[i]);
+ pthread_mutex_destroy(&row_mt_sync->mutex[i]);
}
- vpx_free(row_mt_sync->mutex_);
+ vpx_free(row_mt_sync->mutex);
}
- if (row_mt_sync->cond_ != NULL) {
+ if (row_mt_sync->cond != NULL) {
for (i = 0; i < row_mt_sync->rows; ++i) {
- pthread_cond_destroy(&row_mt_sync->cond_[i]);
+ pthread_cond_destroy(&row_mt_sync->cond[i]);
}
- vpx_free(row_mt_sync->cond_);
+ vpx_free(row_mt_sync->cond);
}
#endif // CONFIG_MULTITHREAD
vpx_free(row_mt_sync->cur_col);
@@ -327,11 +327,11 @@ void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c) {
const int nsync = row_mt_sync->sync_range;
if (r && !(c & (nsync - 1))) {
- pthread_mutex_t *const mutex = &row_mt_sync->mutex_[r - 1];
+ pthread_mutex_t *const mutex = &row_mt_sync->mutex[r - 1];
pthread_mutex_lock(mutex);
while (c > row_mt_sync->cur_col[r - 1] - nsync + 1) {
- pthread_cond_wait(&row_mt_sync->cond_[r - 1], mutex);
+ pthread_cond_wait(&row_mt_sync->cond[r - 1], mutex);
}
pthread_mutex_unlock(mutex);
}
@@ -365,12 +365,12 @@ void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c,
}
if (sig) {
- pthread_mutex_lock(&row_mt_sync->mutex_[r]);
+ pthread_mutex_lock(&row_mt_sync->mutex[r]);
row_mt_sync->cur_col[r] = cur;
- pthread_cond_signal(&row_mt_sync->cond_[r]);
- pthread_mutex_unlock(&row_mt_sync->mutex_[r]);
+ pthread_cond_signal(&row_mt_sync->cond[r]);
+ pthread_mutex_unlock(&row_mt_sync->mutex[r]);
}
#else
(void)row_mt_sync;
@@ -390,8 +390,9 @@ void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c,
}
#if !CONFIG_REALTIME_ONLY
-static int first_pass_worker_hook(EncWorkerData *const thread_data,
- MultiThreadHandle *multi_thread_ctxt) {
+static int first_pass_worker_hook(void *arg1, void *arg2) {
+ EncWorkerData *const thread_data = (EncWorkerData *)arg1;
+ MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
VP9_COMP *const cpi = thread_data->cpi;
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -470,8 +471,8 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
}
}
- launch_enc_workers(cpi, (VPxWorkerHook)first_pass_worker_hook,
- multi_thread_ctxt, num_workers);
+ launch_enc_workers(cpi, first_pass_worker_hook, multi_thread_ctxt,
+ num_workers);
first_tile_col = &cpi->tile_data[0];
for (i = 1; i < tile_cols; i++) {
@@ -480,8 +481,9 @@ void vp9_encode_fp_row_mt(VP9_COMP *cpi) {
}
}
-static int temporal_filter_worker_hook(EncWorkerData *const thread_data,
- MultiThreadHandle *multi_thread_ctxt) {
+static int temporal_filter_worker_hook(void *arg1, void *arg2) {
+ EncWorkerData *const thread_data = (EncWorkerData *)arg1;
+ MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
VP9_COMP *const cpi = thread_data->cpi;
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -508,8 +510,8 @@ static int temporal_filter_worker_hook(EncWorkerData *const thread_data,
tile_col = proc_job->tile_col_id;
tile_row = proc_job->tile_row_id;
this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
- mb_col_start = (this_tile->tile_info.mi_col_start) >> 1;
- mb_col_end = (this_tile->tile_info.mi_col_end + 1) >> 1;
+ mb_col_start = (this_tile->tile_info.mi_col_start) >> TF_SHIFT;
+ mb_col_end = (this_tile->tile_info.mi_col_end + TF_ROUND) >> TF_SHIFT;
mb_row = proc_job->vert_unit_row_num;
vp9_temporal_filter_iterate_row_c(cpi, thread_data->td, mb_row,
@@ -553,13 +555,14 @@ void vp9_temporal_filter_row_mt(VP9_COMP *cpi) {
}
}
- launch_enc_workers(cpi, (VPxWorkerHook)temporal_filter_worker_hook,
- multi_thread_ctxt, num_workers);
+ launch_enc_workers(cpi, temporal_filter_worker_hook, multi_thread_ctxt,
+ num_workers);
}
#endif // !CONFIG_REALTIME_ONLY
-static int enc_row_mt_worker_hook(EncWorkerData *const thread_data,
- MultiThreadHandle *multi_thread_ctxt) {
+static int enc_row_mt_worker_hook(void *arg1, void *arg2) {
+ EncWorkerData *const thread_data = (EncWorkerData *)arg1;
+ MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2;
VP9_COMP *const cpi = thread_data->cpi;
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
@@ -648,8 +651,8 @@ void vp9_encode_tiles_row_mt(VP9_COMP *cpi) {
}
}
- launch_enc_workers(cpi, (VPxWorkerHook)enc_row_mt_worker_hook,
- multi_thread_ctxt, num_workers);
+ launch_enc_workers(cpi, enc_row_mt_worker_hook, multi_thread_ctxt,
+ num_workers);
for (i = 0; i < num_workers; i++) {
VPxWorker *const worker = &cpi->workers[i];
diff --git a/libvpx/vp9/encoder/vp9_ethread.h b/libvpx/vp9/encoder/vp9_ethread.h
index a396e621d..cda0293bc 100644
--- a/libvpx/vp9/encoder/vp9_ethread.h
+++ b/libvpx/vp9/encoder/vp9_ethread.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_ETHREAD_H_
-#define VP9_ENCODER_VP9_ETHREAD_H_
+#ifndef VPX_VP9_ENCODER_VP9_ETHREAD_H_
+#define VPX_VP9_ENCODER_VP9_ETHREAD_H_
#ifdef __cplusplus
extern "C" {
@@ -33,8 +33,8 @@ typedef struct EncWorkerData {
// Encoder row synchronization
typedef struct VP9RowMTSyncData {
#if CONFIG_MULTITHREAD
- pthread_mutex_t *mutex_;
- pthread_cond_t *cond_;
+ pthread_mutex_t *mutex;
+ pthread_cond_t *cond;
#endif
// Allocate memory to store the sb/mb block index in each row.
int *cur_col;
@@ -69,4 +69,4 @@ void vp9_temporal_filter_row_mt(struct VP9_COMP *cpi);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_ETHREAD_H_
+#endif // VPX_VP9_ENCODER_VP9_ETHREAD_H_
diff --git a/libvpx/vp9/encoder/vp9_extend.h b/libvpx/vp9/encoder/vp9_extend.h
index c0dd75715..4ba7fc95e 100644
--- a/libvpx/vp9/encoder/vp9_extend.h
+++ b/libvpx/vp9/encoder/vp9_extend.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_EXTEND_H_
-#define VP9_ENCODER_VP9_EXTEND_H_
+#ifndef VPX_VP9_ENCODER_VP9_EXTEND_H_
+#define VPX_VP9_ENCODER_VP9_EXTEND_H_
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_integer.h"
@@ -28,4 +28,4 @@ void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_EXTEND_H_
+#endif // VPX_VP9_ENCODER_VP9_EXTEND_H_
diff --git a/libvpx/vp9/encoder/vp9_firstpass.c b/libvpx/vp9/encoder/vp9_firstpass.c
index fb6b132a5..8f0da48a2 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.c
+++ b/libvpx/vp9/encoder/vp9_firstpass.c
@@ -44,14 +44,11 @@
#define COMPLEXITY_STATS_OUTPUT 0
#define FIRST_PASS_Q 10.0
-#define INTRA_MODE_PENALTY 1024
+#define NORMAL_BOOST 100
#define MIN_ARF_GF_BOOST 240
#define MIN_DECAY_FACTOR 0.01
#define NEW_MV_MODE_PENALTY 32
#define DARK_THRESH 64
-#define DEFAULT_GRP_WEIGHT 1.0
-#define RC_FACTOR_MIN 0.75
-#define RC_FACTOR_MAX 1.75
#define SECTION_NOISE_DEF 250.0
#define LOW_I_THRESH 24000
@@ -105,7 +102,7 @@ static void output_stats(FIRSTPASS_STATS *stats,
fprintf(fpfile,
"%12.0lf %12.4lf %12.2lf %12.2lf %12.2lf %12.0lf %12.4lf %12.4lf"
"%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
- "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.0lf %12.0lf %12.0lf"
+ "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.0lf %12.4lf %12.0lf"
"%12.4lf"
"\n",
stats->frame, stats->weight, stats->intra_error, stats->coded_error,
@@ -316,16 +313,7 @@ void vp9_init_first_pass(VP9_COMP *cpi) {
}
void vp9_end_first_pass(VP9_COMP *cpi) {
- if (is_two_pass_svc(cpi)) {
- int i;
- for (i = 0; i < cpi->svc.number_spatial_layers; ++i) {
- output_stats(&cpi->svc.layer_context[i].twopass.total_stats,
- cpi->output_pkt_list);
- }
- } else {
- output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
- }
-
+ output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
vpx_free(cpi->twopass.fp_mb_float_stats);
cpi->twopass.fp_mb_float_stats = NULL;
}
@@ -503,11 +491,10 @@ static int scale_sse_threshold(VP9_COMMON *cm, int thresh) {
switch (cm->bit_depth) {
case VPX_BITS_8: ret_val = thresh; break;
case VPX_BITS_10: ret_val = thresh << 4; break;
- case VPX_BITS_12: ret_val = thresh << 8; break;
default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, "
- "VPX_BITS_10 or VPX_BITS_12");
+ assert(cm->bit_depth == VPX_BITS_12);
+ ret_val = thresh << 8;
+ break;
}
}
#else
@@ -529,11 +516,10 @@ static int get_ul_intra_threshold(VP9_COMMON *cm) {
switch (cm->bit_depth) {
case VPX_BITS_8: ret_val = UL_INTRA_THRESH; break;
case VPX_BITS_10: ret_val = UL_INTRA_THRESH << 2; break;
- case VPX_BITS_12: ret_val = UL_INTRA_THRESH << 4; break;
default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, "
- "VPX_BITS_10 or VPX_BITS_12");
+ assert(cm->bit_depth == VPX_BITS_12);
+ ret_val = UL_INTRA_THRESH << 4;
+ break;
}
}
#else
@@ -550,11 +536,10 @@ static int get_smooth_intra_threshold(VP9_COMMON *cm) {
switch (cm->bit_depth) {
case VPX_BITS_8: ret_val = SMOOTH_INTRA_THRESH; break;
case VPX_BITS_10: ret_val = SMOOTH_INTRA_THRESH << 4; break;
- case VPX_BITS_12: ret_val = SMOOTH_INTRA_THRESH << 8; break;
default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, "
- "VPX_BITS_10 or VPX_BITS_12");
+ assert(cm->bit_depth == VPX_BITS_12);
+ ret_val = SMOOTH_INTRA_THRESH << 8;
+ break;
}
}
#else
@@ -731,9 +716,8 @@ static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps,
// Exclude any image dead zone
if (fp_acc_data->image_data_start_row > 0) {
fp_acc_data->intra_skip_count =
- VPXMAX(0,
- fp_acc_data->intra_skip_count -
- (fp_acc_data->image_data_start_row * cm->mb_cols * 2));
+ VPXMAX(0, fp_acc_data->intra_skip_count -
+ (fp_acc_data->image_data_start_row * cm->mb_cols * 2));
}
fp_acc_data->intra_factor = fp_acc_data->intra_factor / (double)num_mbs;
@@ -825,6 +809,8 @@ static void accumulate_fp_mb_row_stat(TileDataEnc *this_tile,
fp_acc_data->image_data_start_row);
}
+#define NZ_MOTION_PENALTY 128
+#define INTRA_MODE_PENALTY 1024
void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
FIRSTPASS_DATA *fp_acc_data,
TileDataEnc *tile_data, MV *best_ref_mv,
@@ -834,6 +820,8 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
VP9_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
TileInfo tile = tile_data->tile_info;
+ const int mb_col_start = ROUND_POWER_OF_TWO(tile.mi_col_start, 1);
+ const int mb_col_end = ROUND_POWER_OF_TWO(tile.mi_col_end, 1);
struct macroblock_plane *const p = x->plane;
struct macroblockd_plane *const pd = xd->plane;
const PICK_MODE_CONTEXT *ctx = &td->pc_root->none;
@@ -850,9 +838,6 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
- LAYER_CONTEXT *const lc =
- is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id]
- : NULL;
MODE_INFO mi_above, mi_left;
double mb_intra_factor;
@@ -861,29 +846,10 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
// First pass code requires valid last and new frame buffers.
assert(new_yv12 != NULL);
- assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL));
-
- if (lc != NULL) {
- // Use either last frame or alt frame for motion search.
- if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);
- if (first_ref_buf == NULL)
- first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME);
- }
-
- if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
- if (gld_yv12 == NULL) {
- gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
- }
- } else {
- gld_yv12 = NULL;
- }
- }
+ assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
- xd->mi = cm->mi_grid_visible + xd->mi_stride * (mb_row << 1) +
- (tile.mi_col_start >> 1);
- xd->mi[0] = cm->mi + xd->mi_stride * (mb_row << 1) + (tile.mi_col_start >> 1);
+ xd->mi = cm->mi_grid_visible + xd->mi_stride * (mb_row << 1) + mb_col_start;
+ xd->mi[0] = cm->mi + xd->mi_stride * (mb_row << 1) + mb_col_start;
for (i = 0; i < MAX_MB_PLANE; ++i) {
p[i].coeff = ctx->coeff_pbuf[i][1];
@@ -897,10 +863,9 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height);
// Reset above block coeffs.
- recon_yoffset =
- (mb_row * recon_y_stride * 16) + (tile.mi_col_start >> 1) * 16;
- recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height) +
- (tile.mi_col_start >> 1) * uv_mb_height;
+ recon_yoffset = (mb_row * recon_y_stride * 16) + mb_col_start * 16;
+ recon_uvoffset =
+ (mb_row * recon_uv_stride * uv_mb_height) + mb_col_start * uv_mb_height;
// Set up limit values for motion vectors to prevent them extending
// outside the UMV borders.
@@ -908,8 +873,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
x->mv_limits.row_max =
((cm->mb_rows - 1 - mb_row) * 16) + BORDER_MV_PIXELS_B16;
- for (mb_col = tile.mi_col_start >> 1, c = 0; mb_col < (tile.mi_col_end >> 1);
- ++mb_col, c++) {
+ for (mb_col = mb_col_start, c = 0; mb_col < mb_col_end; ++mb_col, c++) {
int this_error;
int this_intra_error;
const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
@@ -955,7 +919,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
x->skip_encode = 0;
x->fp_src_pred = 0;
// Do intra prediction based on source pixels for tile boundaries
- if ((mb_col == (tile.mi_col_start >> 1)) && mb_col != 0) {
+ if (mb_col == mb_col_start && mb_col != 0) {
xd->left_mi = &mi_left;
x->fp_src_pred = 1;
}
@@ -1002,12 +966,10 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
switch (cm->bit_depth) {
case VPX_BITS_8: break;
case VPX_BITS_10: this_error >>= 4; break;
- case VPX_BITS_12: this_error >>= 8; break;
default:
- assert(0 &&
- "cm->bit_depth should be VPX_BITS_8, "
- "VPX_BITS_10 or VPX_BITS_12");
- return;
+ assert(cm->bit_depth == VPX_BITS_12);
+ this_error >>= 8;
+ break;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
@@ -1073,30 +1035,34 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
// Other than for the first frame do a motion search.
- if ((lc == NULL && cm->current_video_frame > 0) ||
- (lc != NULL && lc->current_video_frame_in_layer > 0)) {
- int tmp_err, motion_error, raw_motion_error;
+ if (cm->current_video_frame > 0) {
+ int tmp_err, motion_error, this_motion_error, raw_motion_error;
// Assume 0,0 motion with no mv overhead.
MV mv = { 0, 0 }, tmp_mv = { 0, 0 };
struct buf_2d unscaled_last_source_buf_2d;
+ vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
motion_error = highbd_get_prediction_error(
bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
+ this_motion_error = highbd_get_prediction_error(
+ bsize, &x->plane[0].src, &xd->plane[0].pre[0], 8);
} else {
motion_error =
get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
+ this_motion_error = motion_error;
}
#else
motion_error =
get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]);
+ this_motion_error = motion_error;
#endif // CONFIG_VP9_HIGHBITDEPTH
// Compute the motion error of the 0,0 motion using the last source
// frame as the reference. Skip the further motion search on
- // reconstructed frame if this error is small.
+ // reconstructed frame if this error is very small.
unscaled_last_source_buf_2d.buf =
cpi->unscaled_last_source->y_buffer + recon_yoffset;
unscaled_last_source_buf_2d.stride = cpi->unscaled_last_source->y_stride;
@@ -1113,12 +1079,20 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
&unscaled_last_source_buf_2d);
#endif // CONFIG_VP9_HIGHBITDEPTH
- // TODO(pengchong): Replace the hard-coded threshold
- if (raw_motion_error > 25 || lc != NULL) {
+ if (raw_motion_error > NZ_MOTION_PENALTY) {
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search.
first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error);
+ v_fn_ptr.vf = get_block_variance_fn(bsize);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, 8);
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ this_motion_error =
+ vp9_get_mvpred_var(x, &mv, best_ref_mv, &v_fn_ptr, 0);
+
// If the current best reference mv is not centered on 0,0 then do a
// 0,0 based search as well.
if (!is_zero_mv(best_ref_mv)) {
@@ -1128,13 +1102,13 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
if (tmp_err < motion_error) {
motion_error = tmp_err;
mv = tmp_mv;
+ this_motion_error =
+ vp9_get_mvpred_var(x, &tmp_mv, &zero_mv, &v_fn_ptr, 0);
}
}
// Search in an older reference frame.
- if (((lc == NULL && cm->current_video_frame > 1) ||
- (lc != NULL && lc->current_video_frame_in_layer > 1)) &&
- gld_yv12 != NULL) {
+ if ((cm->current_video_frame > 1) && gld_yv12 != NULL) {
// Assume 0,0 motion with no mv overhead.
int gf_motion_error;
@@ -1316,7 +1290,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH);
if (this_intra_error < scaled_low_intra_thresh) {
fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize);
- if (motion_error < scaled_low_intra_thresh) {
+ if (this_motion_error < scaled_low_intra_thresh) {
fp_acc_data->intra_count_low += 1.0;
} else {
fp_acc_data->intra_count_high += 1.0;
@@ -1335,7 +1309,7 @@ void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td,
recon_uvoffset += uv_mb_height;
// Accumulate row level stats to the corresponding tile stats
- if (cpi->row_mt && mb_col == (tile.mi_col_end >> 1) - 1)
+ if (cpi->row_mt && mb_col == mb_col_end - 1)
accumulate_fp_mb_row_stat(tile_data, fp_acc_data);
(*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, mb_row, c,
@@ -1372,9 +1346,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm);
const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
- LAYER_CONTEXT *const lc =
- is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id]
- : NULL;
BufferPool *const pool = cm->buffer_pool;
FIRSTPASS_DATA fp_temp_data;
@@ -1386,7 +1357,7 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
// First pass code requires valid last and new frame buffers.
assert(new_yv12 != NULL);
- assert((lc != NULL) || frame_is_intra_only(cm) || (lst_yv12 != NULL));
+ assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
#if CONFIG_FP_MB_STATS
if (cpi->use_fp_mb_stats) {
@@ -1397,50 +1368,6 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
set_first_pass_params(cpi);
vp9_set_quantizer(cm, find_fp_qindex(cm->bit_depth));
- if (lc != NULL) {
- twopass = &lc->twopass;
-
- cpi->lst_fb_idx = cpi->svc.spatial_layer_id;
- cpi->ref_frame_flags = VP9_LAST_FLAG;
-
- if (cpi->svc.number_spatial_layers + cpi->svc.spatial_layer_id <
- REF_FRAMES) {
- cpi->gld_fb_idx =
- cpi->svc.number_spatial_layers + cpi->svc.spatial_layer_id;
- cpi->ref_frame_flags |= VP9_GOLD_FLAG;
- cpi->refresh_golden_frame = (lc->current_video_frame_in_layer == 0);
- } else {
- cpi->refresh_golden_frame = 0;
- }
-
- if (lc->current_video_frame_in_layer == 0) cpi->ref_frame_flags = 0;
-
- vp9_scale_references(cpi);
-
- // Use either last frame or alt frame for motion search.
- if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
- first_ref_buf = vp9_get_scaled_ref_frame(cpi, LAST_FRAME);
- if (first_ref_buf == NULL)
- first_ref_buf = get_ref_frame_buffer(cpi, LAST_FRAME);
- }
-
- if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
- gld_yv12 = vp9_get_scaled_ref_frame(cpi, GOLDEN_FRAME);
- if (gld_yv12 == NULL) {
- gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
- }
- } else {
- gld_yv12 = NULL;
- }
-
- set_ref_ptrs(cm, xd,
- (cpi->ref_frame_flags & VP9_LAST_FLAG) ? LAST_FRAME : NONE,
- (cpi->ref_frame_flags & VP9_GOLD_FLAG) ? GOLDEN_FRAME : NONE);
-
- cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source,
- &cpi->scaled_source, 0, EIGHTTAP, 0);
- }
-
vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
vp9_setup_src_planes(x, cpi->Source, 0, 0);
@@ -1524,18 +1451,13 @@ void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) {
vpx_extend_frame_borders(new_yv12);
- if (lc != NULL) {
- vp9_update_reference_frames(cpi);
- } else {
- // The frame we just compressed now becomes the last frame.
- ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
- cm->new_fb_idx);
- }
+ // The frame we just compressed now becomes the last frame.
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx],
+ cm->new_fb_idx);
// Special case for the first frame. Copy into the GF buffer as a second
// reference.
- if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX &&
- lc == NULL) {
+ if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX) {
ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx],
cm->ref_frame_map[cpi->lst_fb_idx]);
}
@@ -1583,7 +1505,26 @@ static double calc_correction_factor(double err_per_mb, double err_divisor,
return fclamp(pow(error_term, power_term), 0.05, 5.0);
}
-#define ERR_DIVISOR 115.0
+static double wq_err_divisor(VP9_COMP *cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ unsigned int screen_area = (cm->width * cm->height);
+
+ // Use a different error per mb factor for calculating boost for
+ // different formats.
+ if (screen_area <= 640 * 360) {
+ return 115.0;
+ } else if (screen_area < 1280 * 720) {
+ return 125.0;
+ } else if (screen_area <= 1920 * 1080) {
+ return 130.0;
+ } else if (screen_area < 3840 * 2160) {
+ return 150.0;
+ }
+
+ // Fall through to here only for 4K and above.
+ return 200.0;
+}
+
#define NOISE_FACTOR_MIN 0.9
#define NOISE_FACTOR_MAX 1.1
static int get_twopass_worst_quality(VP9_COMP *cpi, const double section_err,
@@ -1643,7 +1584,7 @@ static int get_twopass_worst_quality(VP9_COMP *cpi, const double section_err,
// content at the given rate.
for (q = rc->best_quality; q < rc->worst_quality; ++q) {
const double factor =
- calc_correction_factor(av_err_per_mb, ERR_DIVISOR, q);
+ calc_correction_factor(av_err_per_mb, wq_err_divisor(cpi), q);
const int bits_per_mb = vp9_rc_bits_per_mb(
INTER_FRAME, q,
factor * speed_term * cpi->twopass.bpm_factor * noise_factor,
@@ -1690,14 +1631,9 @@ void calculate_coded_size(VP9_COMP *cpi, int *scaled_frame_width,
}
void vp9_init_second_pass(VP9_COMP *cpi) {
- SVC *const svc = &cpi->svc;
VP9EncoderConfig *const oxcf = &cpi->oxcf;
- const int is_two_pass_svc =
- (svc->number_spatial_layers > 1) || (svc->number_temporal_layers > 1);
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass =
- is_two_pass_svc ? &svc->layer_context[svc->spatial_layer_id].twopass
- : &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->twopass;
double frame_rate;
FIRSTPASS_STATS *stats;
@@ -1774,18 +1710,9 @@ void vp9_init_second_pass(VP9_COMP *cpi) {
// encoded in the second pass is a guess. However, the sum duration is not.
// It is calculated based on the actual durations of all frames from the
// first pass.
-
- if (is_two_pass_svc) {
- vp9_update_spatial_layer_framerate(cpi, frame_rate);
- twopass->bits_left =
- (int64_t)(stats->duration *
- svc->layer_context[svc->spatial_layer_id].target_bandwidth /
- 10000000.0);
- } else {
- vp9_new_framerate(cpi, frame_rate);
- twopass->bits_left =
- (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
- }
+ vp9_new_framerate(cpi, frame_rate);
+ twopass->bits_left =
+ (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
// This variable monitors how far behind the second ref update is lagging.
twopass->sr_update_lag = 1;
@@ -1913,10 +1840,12 @@ static int detect_flash(const TWO_PASS *twopass, int offset) {
// brief break in prediction (such as a flash) but subsequent frames
// are reasonably well predicted by an earlier (pre flash) frame.
// The recovery after a flash is indicated by a high pcnt_second_ref
- // compared to pcnt_inter.
+ // useage or a second ref coded error notabley lower than the last
+ // frame coded error.
return next_frame != NULL &&
- next_frame->pcnt_second_ref > next_frame->pcnt_inter &&
- next_frame->pcnt_second_ref >= 0.5;
+ ((next_frame->sr_coded_error < next_frame->coded_error) ||
+ ((next_frame->pcnt_second_ref > next_frame->pcnt_inter) &&
+ (next_frame->pcnt_second_ref >= 0.5)));
}
// Update the motion related elements to the GF arf boost calculation.
@@ -1971,7 +1900,20 @@ static double calc_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame,
return VPXMIN(frame_boost, GF_MAX_BOOST * boost_q_correction);
}
-#define KF_BASELINE_ERR_PER_MB 12500.0
+static double kf_err_per_mb(VP9_COMP *cpi) {
+ const VP9_COMMON *const cm = &cpi->common;
+ unsigned int screen_area = (cm->width * cm->height);
+
+ // Use a different error per mb factor for calculating boost for
+ // different formats.
+ if (screen_area < 1280 * 720) {
+ return 2000.0;
+ } else if (screen_area < 1920 * 1080) {
+ return 500.0;
+ }
+ return 250.0;
+}
+
static double calc_kf_frame_boost(VP9_COMP *cpi,
const FIRSTPASS_STATS *this_frame,
double *sr_accumulator,
@@ -1984,7 +1926,7 @@ static double calc_kf_frame_boost(VP9_COMP *cpi,
const double active_area = calculate_active_area(cpi, this_frame);
// Underlying boost factor is based on inter error ratio.
- frame_boost = (KF_BASELINE_ERR_PER_MB * active_area) /
+ frame_boost = (kf_err_per_mb(cpi) * active_area) /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error + *sr_accumulator);
// Update the accumulator for second ref error difference.
@@ -1997,8 +1939,11 @@ static double calc_kf_frame_boost(VP9_COMP *cpi,
if (this_frame_mv_in_out > 0.0)
frame_boost += frame_boost * (this_frame_mv_in_out * 2.0);
- // Q correction and scalling
- frame_boost = frame_boost * boost_q_correction;
+ // Q correction and scaling
+ // The 40.0 value here is an experimentally derived baseline minimum.
+ // This value is in line with the minimum per frame boost in the alt_ref
+ // boost calculation.
+ frame_boost = ((frame_boost + 40.0) * boost_q_correction);
return VPXMIN(frame_boost, max_boost * boost_q_correction);
}
@@ -2140,7 +2085,7 @@ static int calculate_boost_bits(int frame_count, int boost,
// return 0 for invalid inputs (could arise e.g. through rounding errors)
if (!boost || (total_group_bits <= 0) || (frame_count < 0)) return 0;
- allocation_chunks = (frame_count * 100) + boost;
+ allocation_chunks = (frame_count * NORMAL_BOOST) + boost;
// Prevent overflow.
if (boost > 1023) {
@@ -2154,18 +2099,6 @@ static int calculate_boost_bits(int frame_count, int boost,
0);
}
-// Current limit on maximum number of active arfs in a GF/ARF group.
-#define MAX_ACTIVE_ARFS 2
-#define ARF_SLOT1 2
-#define ARF_SLOT2 3
-// This function indirects the choice of buffers for arfs.
-// At the moment the values are fixed but this may change as part of
-// the integration process with other codec features that swap buffers around.
-static void get_arf_buffer_indices(unsigned char *arf_buffer_indices) {
- arf_buffer_indices[0] = ARF_SLOT1;
- arf_buffer_indices[1] = ARF_SLOT2;
-}
-
// Used in corpus vbr: Calculates the total normalized group complexity score
// for a given number of frames starting at the current position in the stats
// file.
@@ -2185,11 +2118,129 @@ static double calculate_group_score(VP9_COMP *cpi, double av_score,
++s;
++i;
}
- assert(i == frame_count);
return score_total;
}
+static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group,
+ int *index_counter, int depth, int start, int end) {
+ TWO_PASS *twopass = &cpi->twopass;
+ const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
+ FIRSTPASS_STATS fpf_frame;
+ const int mid = (start + end + 1) >> 1;
+ const int min_frame_interval = 2;
+ int idx;
+
+ // Process regular P frames
+ if ((end - start < min_frame_interval) ||
+ (depth > gf_group->allowed_max_layer_depth)) {
+ for (idx = start; idx <= end; ++idx) {
+ gf_group->update_type[*index_counter] = LF_UPDATE;
+ gf_group->arf_src_offset[*index_counter] = 0;
+ gf_group->frame_gop_index[*index_counter] = idx;
+ gf_group->rf_level[*index_counter] = INTER_NORMAL;
+ gf_group->layer_depth[*index_counter] = depth;
+ gf_group->gfu_boost[*index_counter] = NORMAL_BOOST;
+ ++(*index_counter);
+ }
+ gf_group->max_layer_depth = VPXMAX(gf_group->max_layer_depth, depth);
+ return;
+ }
+
+ assert(abs(mid - start) >= 1 && abs(mid - end) >= 1);
+
+ // Process ARF frame
+ gf_group->layer_depth[*index_counter] = depth;
+ gf_group->update_type[*index_counter] = ARF_UPDATE;
+ gf_group->arf_src_offset[*index_counter] = mid - start;
+ gf_group->frame_gop_index[*index_counter] = mid;
+ gf_group->rf_level[*index_counter] = GF_ARF_LOW;
+
+ for (idx = 0; idx <= mid; ++idx)
+ if (EOF == input_stats(twopass, &fpf_frame)) break;
+
+ gf_group->gfu_boost[*index_counter] =
+ VPXMAX(MIN_ARF_GF_BOOST,
+ calc_arf_boost(cpi, end - mid + 1, mid - start) >> depth);
+
+ reset_fpf_position(twopass, start_pos);
+
+ ++(*index_counter);
+
+ find_arf_order(cpi, gf_group, index_counter, depth + 1, start, mid - 1);
+
+ gf_group->update_type[*index_counter] = USE_BUF_FRAME;
+ gf_group->arf_src_offset[*index_counter] = 0;
+ gf_group->frame_gop_index[*index_counter] = mid;
+ gf_group->rf_level[*index_counter] = INTER_NORMAL;
+ gf_group->layer_depth[*index_counter] = depth;
+ ++(*index_counter);
+
+ find_arf_order(cpi, gf_group, index_counter, depth + 1, mid + 1, end);
+}
+
+static INLINE void set_gf_overlay_frame_type(GF_GROUP *gf_group,
+ int frame_index,
+ int source_alt_ref_active) {
+ if (source_alt_ref_active) {
+ gf_group->update_type[frame_index] = OVERLAY_UPDATE;
+ gf_group->rf_level[frame_index] = INTER_NORMAL;
+ gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS - 1;
+ gf_group->gfu_boost[frame_index] = NORMAL_BOOST;
+ } else {
+ gf_group->update_type[frame_index] = GF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ gf_group->layer_depth[frame_index] = 0;
+ }
+}
+
+static void define_gf_group_structure(VP9_COMP *cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ TWO_PASS *const twopass = &cpi->twopass;
+ GF_GROUP *const gf_group = &twopass->gf_group;
+ int frame_index = 0;
+ int key_frame = cpi->common.frame_type == KEY_FRAME;
+ int layer_depth = 1;
+ int gop_frames =
+ rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending);
+
+ gf_group->frame_start = cpi->common.current_video_frame;
+ gf_group->frame_end = gf_group->frame_start + rc->baseline_gf_interval;
+ gf_group->max_layer_depth = 0;
+ gf_group->allowed_max_layer_depth = 0;
+
+ // For key frames the frame target rate is already set and it
+ // is also the golden frame.
+ // === [frame_index == 0] ===
+ if (!key_frame)
+ set_gf_overlay_frame_type(gf_group, frame_index, rc->source_alt_ref_active);
+
+ ++frame_index;
+
+ // === [frame_index == 1] ===
+ if (rc->source_alt_ref_pending) {
+ gf_group->update_type[frame_index] = ARF_UPDATE;
+ gf_group->rf_level[frame_index] = GF_ARF_STD;
+ gf_group->layer_depth[frame_index] = layer_depth;
+ gf_group->arf_src_offset[frame_index] =
+ (unsigned char)(rc->baseline_gf_interval - 1);
+ gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval;
+ gf_group->max_layer_depth = 1;
+ ++frame_index;
+ ++layer_depth;
+ gf_group->allowed_max_layer_depth = cpi->oxcf.enable_auto_arf;
+ }
+
+ find_arf_order(cpi, gf_group, &frame_index, layer_depth, 1, gop_frames);
+
+ set_gf_overlay_frame_type(gf_group, frame_index, rc->source_alt_ref_pending);
+ gf_group->arf_src_offset[frame_index] = 0;
+ gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval;
+
+ // Set the frame ops number.
+ gf_group->gf_group_size = frame_index;
+}
+
static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
int gf_arf_bits) {
VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -2198,17 +2249,12 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
GF_GROUP *const gf_group = &twopass->gf_group;
FIRSTPASS_STATS frame_stats;
int i;
- int frame_index = 1;
+ int frame_index = 0;
int target_frame_size;
int key_frame;
const int max_bits = frame_max_bits(&cpi->rc, oxcf);
int64_t total_group_bits = gf_group_bits;
- int mid_boost_bits = 0;
int mid_frame_idx;
- unsigned char arf_buffer_indices[MAX_ACTIVE_ARFS];
- int alt_frame_index = frame_index;
- int has_temporal_layers =
- is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1;
int normal_frames;
int normal_frame_bits;
int last_frame_reduction = 0;
@@ -2216,71 +2262,32 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
double tot_norm_frame_score = 1.0;
double this_frame_score = 1.0;
- // Only encode alt reference frame in temporal base layer.
- if (has_temporal_layers) alt_frame_index = cpi->svc.number_temporal_layers;
+ // Define the GF structure and specify
+ int gop_frames = gf_group->gf_group_size;
- key_frame =
- cpi->common.frame_type == KEY_FRAME || vp9_is_upper_layer_key_frame(cpi);
-
- get_arf_buffer_indices(arf_buffer_indices);
+ key_frame = cpi->common.frame_type == KEY_FRAME;
// For key frames the frame target rate is already set and it
// is also the golden frame.
+ // === [frame_index == 0] ===
if (!key_frame) {
- if (rc->source_alt_ref_active) {
- gf_group->update_type[0] = OVERLAY_UPDATE;
- gf_group->rf_level[0] = INTER_NORMAL;
- gf_group->bit_allocation[0] = 0;
- } else {
- gf_group->update_type[0] = GF_UPDATE;
- gf_group->rf_level[0] = GF_ARF_STD;
- gf_group->bit_allocation[0] = gf_arf_bits;
- }
- gf_group->arf_update_idx[0] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[0] = arf_buffer_indices[0];
-
- // Step over the golden frame / overlay frame
- if (EOF == input_stats(twopass, &frame_stats)) return;
+ gf_group->bit_allocation[frame_index] =
+ rc->source_alt_ref_active ? 0 : gf_arf_bits;
}
// Deduct the boost bits for arf (or gf if it is not a key frame)
// from the group total.
if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits;
+ ++frame_index;
+
+ // === [frame_index == 1] ===
// Store the bits to spend on the ARF if there is one.
if (rc->source_alt_ref_pending) {
- gf_group->update_type[alt_frame_index] = ARF_UPDATE;
- gf_group->rf_level[alt_frame_index] = GF_ARF_STD;
- gf_group->bit_allocation[alt_frame_index] = gf_arf_bits;
-
- if (has_temporal_layers)
- gf_group->arf_src_offset[alt_frame_index] =
- (unsigned char)(rc->baseline_gf_interval -
- cpi->svc.number_temporal_layers);
- else
- gf_group->arf_src_offset[alt_frame_index] =
- (unsigned char)(rc->baseline_gf_interval - 1);
-
- gf_group->arf_update_idx[alt_frame_index] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[alt_frame_index] =
- arf_buffer_indices[cpi->multi_arf_last_grp_enabled &&
- rc->source_alt_ref_active];
- if (!has_temporal_layers) ++frame_index;
-
- if (cpi->multi_arf_enabled) {
- // Set aside a slot for a level 1 arf.
- gf_group->update_type[frame_index] = ARF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_LOW;
- gf_group->arf_src_offset[frame_index] =
- (unsigned char)((rc->baseline_gf_interval >> 1) - 1);
- gf_group->arf_update_idx[frame_index] = arf_buffer_indices[1];
- gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
- ++frame_index;
- }
- }
+ gf_group->bit_allocation[frame_index] = gf_arf_bits;
- // Note index of the first normal inter frame int eh group (not gf kf arf)
- gf_group->first_inter_index = frame_index;
+ ++frame_index;
+ }
// Define middle frame
mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1;
@@ -2291,6 +2298,61 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
else
normal_frame_bits = (int)total_group_bits;
+ gf_group->gfu_boost[1] = rc->gfu_boost;
+
+ if (cpi->multi_layer_arf) {
+ int idx;
+ int arf_depth_bits[MAX_ARF_LAYERS] = { 0 };
+ int arf_depth_count[MAX_ARF_LAYERS] = { 0 };
+ int arf_depth_boost[MAX_ARF_LAYERS] = { 0 };
+ int total_arfs = 1; // Account for the base layer ARF.
+
+ for (idx = 0; idx < gop_frames; ++idx) {
+ if (gf_group->update_type[idx] == ARF_UPDATE) {
+ arf_depth_boost[gf_group->layer_depth[idx]] += gf_group->gfu_boost[idx];
+ ++arf_depth_count[gf_group->layer_depth[idx]];
+ }
+ }
+
+ for (idx = 2; idx < MAX_ARF_LAYERS; ++idx) {
+ if (arf_depth_boost[idx] == 0) break;
+ arf_depth_bits[idx] = calculate_boost_bits(
+ rc->baseline_gf_interval - total_arfs - arf_depth_count[idx],
+ arf_depth_boost[idx], total_group_bits);
+
+ total_group_bits -= arf_depth_bits[idx];
+ total_arfs += arf_depth_count[idx];
+ }
+
+ // offset the base layer arf
+ normal_frames -= (total_arfs - 1);
+ if (normal_frames > 1)
+ normal_frame_bits = (int)(total_group_bits / normal_frames);
+ else
+ normal_frame_bits = (int)total_group_bits;
+
+ target_frame_size = normal_frame_bits;
+ target_frame_size =
+ clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits));
+
+ // The first layer ARF has its bit allocation assigned.
+ for (idx = frame_index; idx < gop_frames; ++idx) {
+ switch (gf_group->update_type[idx]) {
+ case ARF_UPDATE:
+ gf_group->bit_allocation[idx] =
+ (int)((arf_depth_bits[gf_group->layer_depth[idx]] *
+ gf_group->gfu_boost[idx]) /
+ arf_depth_boost[gf_group->layer_depth[idx]]);
+ break;
+ case USE_BUF_FRAME: gf_group->bit_allocation[idx] = 0; break;
+ default: gf_group->bit_allocation[idx] = target_frame_size; break;
+ }
+ }
+ gf_group->bit_allocation[idx] = 0;
+
+ return;
+ }
+
if (oxcf->vbr_corpus_complexity) {
av_score = get_distribution_av_err(cpi, twopass);
tot_norm_frame_score = calculate_group_score(cpi, av_score, normal_frames);
@@ -2298,13 +2360,7 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
// Allocate bits to the other frames in the group.
for (i = 0; i < normal_frames; ++i) {
- int arf_idx = 0;
if (EOF == input_stats(twopass, &frame_stats)) break;
-
- if (has_temporal_layers && frame_index == alt_frame_index) {
- ++frame_index;
- }
-
if (oxcf->vbr_corpus_complexity) {
this_frame_score = calculate_norm_frame_score(cpi, twopass, oxcf,
&frame_stats, av_score);
@@ -2318,21 +2374,9 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
target_frame_size -= last_frame_reduction;
}
- if (rc->source_alt_ref_pending && cpi->multi_arf_enabled) {
- mid_boost_bits += (target_frame_size >> 4);
- target_frame_size -= (target_frame_size >> 4);
-
- if (frame_index <= mid_frame_idx) arf_idx = 1;
- }
- gf_group->arf_update_idx[frame_index] = arf_buffer_indices[arf_idx];
- gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[arf_idx];
-
target_frame_size =
clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits));
- gf_group->update_type[frame_index] = LF_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
-
gf_group->bit_allocation[frame_index] = target_frame_size;
++frame_index;
}
@@ -2344,27 +2388,6 @@ static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits,
// We need to configure the frame at the end of the sequence + 1 that will be
// the start frame for the next group. Otherwise prior to the call to
// vp9_rc_get_second_pass_params() the data will be undefined.
- gf_group->arf_update_idx[frame_index] = arf_buffer_indices[0];
- gf_group->arf_ref_idx[frame_index] = arf_buffer_indices[0];
-
- if (rc->source_alt_ref_pending) {
- gf_group->update_type[frame_index] = OVERLAY_UPDATE;
- gf_group->rf_level[frame_index] = INTER_NORMAL;
-
- // Final setup for second arf and its overlay.
- if (cpi->multi_arf_enabled) {
- gf_group->bit_allocation[2] =
- gf_group->bit_allocation[mid_frame_idx] + mid_boost_bits;
- gf_group->update_type[mid_frame_idx] = OVERLAY_UPDATE;
- gf_group->bit_allocation[mid_frame_idx] = 0;
- }
- } else {
- gf_group->update_type[frame_index] = GF_UPDATE;
- gf_group->rf_level[frame_index] = GF_ARF_STD;
- }
-
- // Note whether multi-arf was enabled this group for next time.
- cpi->multi_arf_last_grp_enabled = cpi->multi_arf_enabled;
}
// Adjusts the ARNF filter for a GF group.
@@ -2382,9 +2405,9 @@ static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise,
}
// Analyse and define a gf/arf group.
-#define ARF_DECAY_BREAKOUT 0.10
#define ARF_ABS_ZOOM_THRESH 4.0
+#define MAX_GF_BOOST 5400
static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
@@ -2426,6 +2449,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
const int is_key_frame = frame_is_intra_only(cm);
const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active;
+ double gop_intra_factor = 1.0;
+
// Reset the GF group data structures unless this is a key
// frame in which case it will already have been done.
if (is_key_frame == 0) {
@@ -2465,36 +2490,49 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
{
int int_max_q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality,
cpi->common.bit_depth));
- int int_lbq = (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex,
- cpi->common.bit_depth));
+ int q_term = (cm->current_video_frame == 0)
+ ? int_max_q / 32
+ : (int)(vp9_convert_qindex_to_q(rc->last_boosted_qindex,
+ cpi->common.bit_depth) /
+ 6);
active_min_gf_interval =
rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200);
active_min_gf_interval =
VPXMIN(active_min_gf_interval, rc->max_gf_interval + arf_active_or_kf);
- if (cpi->multi_arf_allowed) {
- active_max_gf_interval = rc->max_gf_interval;
- } else {
- // The value chosen depends on the active Q range. At low Q we have
- // bits to spare and are better with a smaller interval and smaller boost.
- // At high Q when there are few bits to spare we are better with a longer
- // interval to spread the cost of the GF.
- active_max_gf_interval = 12 + arf_active_or_kf + VPXMIN(4, (int_lbq / 6));
-
- // We have: active_min_gf_interval <=
- // rc->max_gf_interval + arf_active_or_kf.
- if (active_max_gf_interval < active_min_gf_interval) {
- active_max_gf_interval = active_min_gf_interval;
- } else {
- active_max_gf_interval = VPXMIN(active_max_gf_interval,
- rc->max_gf_interval + arf_active_or_kf);
- }
+ // The value chosen depends on the active Q range. At low Q we have
+ // bits to spare and are better with a smaller interval and smaller boost.
+ // At high Q when there are few bits to spare we are better with a longer
+ // interval to spread the cost of the GF.
+ active_max_gf_interval = 11 + arf_active_or_kf + VPXMIN(5, q_term);
- // Would the active max drop us out just before the near the next kf?
- if ((active_max_gf_interval <= rc->frames_to_key) &&
- (active_max_gf_interval >= (rc->frames_to_key - rc->min_gf_interval)))
- active_max_gf_interval = rc->frames_to_key / 2;
+ // Force max GF interval to be odd.
+ active_max_gf_interval = active_max_gf_interval | 0x01;
+
+ // We have: active_min_gf_interval <=
+ // rc->max_gf_interval + arf_active_or_kf.
+ if (active_max_gf_interval < active_min_gf_interval) {
+ active_max_gf_interval = active_min_gf_interval;
+ } else {
+ active_max_gf_interval = VPXMIN(active_max_gf_interval,
+ rc->max_gf_interval + arf_active_or_kf);
}
+
+ // Would the active max drop us out just before the near the next kf?
+ if ((active_max_gf_interval <= rc->frames_to_key) &&
+ (active_max_gf_interval >= (rc->frames_to_key - rc->min_gf_interval)))
+ active_max_gf_interval = rc->frames_to_key / 2;
+ }
+
+ if (cpi->multi_layer_arf) {
+ int layers = 0;
+ int max_layers = VPXMIN(MAX_ARF_LAYERS, cpi->oxcf.enable_auto_arf);
+
+ // Adapt the intra_error factor to active_max_gf_interval limit.
+ for (i = active_max_gf_interval; i > 0; i >>= 1) ++layers;
+
+ layers = VPXMIN(max_layers, layers);
+ gop_intra_factor += (layers * 0.25);
}
i = 0;
@@ -2523,15 +2561,17 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
&next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator,
&abs_mv_in_out_accumulator, &mv_ratio_accumulator);
+ // Monitor for static sections.
+ if ((rc->frames_since_key + i - 1) > 1) {
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+ }
+
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
last_loop_decay_rate = loop_decay_rate;
loop_decay_rate = get_prediction_decay_rate(cpi, &next_frame);
- // Monitor for static sections.
- zero_motion_accumulator = VPXMIN(
- zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
-
// Break clause to detect very still sections after motion. For example,
// a static image after a fade or other transition.
if (detect_transition_to_still(cpi, i, 5, loop_decay_rate,
@@ -2551,18 +2591,27 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Break out conditions.
- if (
- // Break at active_max_gf_interval unless almost totally static.
- ((i >= active_max_gf_interval) && (zero_motion_accumulator < 0.995)) ||
+ // Break at maximum of active_max_gf_interval unless almost totally static.
+ //
+ // Note that the addition of a test of rc->source_alt_ref_active is
+ // deliberate. The effect of this is that after a normal altref group even
+ // if the material is static there will be one normal length GF group
+ // before allowing longer GF groups. The reason for this is that in cases
+ // such as slide shows where slides are separated by a complex transition
+ // such as a fade, the arf group spanning the transition may not be coded
+ // at a very high quality and hence this frame (with its overlay) is a
+ // poor golden frame to use for an extended group.
+ if (((i >= active_max_gf_interval) &&
+ ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) ||
(
// Don't break out with a very short interval.
(i >= active_min_gf_interval) &&
// If possible dont break very close to a kf
- ((rc->frames_to_key - i) >= rc->min_gf_interval) &&
+ ((rc->frames_to_key - i) >= rc->min_gf_interval) && (i & 0x01) &&
(!flash_detected) &&
((mv_ratio_accumulator > mv_ratio_accumulator_thresh) ||
(abs_mv_in_out_accumulator > abs_mv_in_out_thresh) ||
- (sr_accumulator > next_frame.intra_error)))) {
+ (sr_accumulator > gop_intra_factor * next_frame.intra_error)))) {
break;
}
@@ -2573,8 +2622,9 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
// Should we use the alternate reference frame.
- if (allow_alt_ref && (i < cpi->oxcf.lag_in_frames) &&
- (i >= rc->min_gf_interval)) {
+ if ((zero_motion_accumulator < 0.995) && allow_alt_ref &&
+ (twopass->kf_zeromotion_pct < STATIC_KF_GROUP_THRESH) &&
+ (i < cpi->oxcf.lag_in_frames) && (i >= rc->min_gf_interval)) {
const int forward_frames = (rc->frames_to_key - i >= i - 1)
? i - 1
: VPXMAX(0, rc->frames_to_key - i);
@@ -2582,15 +2632,8 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Calculate the boost for alt ref.
rc->gfu_boost = calc_arf_boost(cpi, forward_frames, (i - 1));
rc->source_alt_ref_pending = 1;
-
- // Test to see if multi arf is appropriate.
- cpi->multi_arf_enabled =
- (cpi->multi_arf_allowed && (rc->baseline_gf_interval >= 6) &&
- (zero_motion_accumulator < 0.995))
- ? 1
- : 0;
} else {
- rc->gfu_boost = calc_arf_boost(cpi, 0, (i - 1));
+ rc->gfu_boost = VPXMIN(MAX_GF_BOOST, calc_arf_boost(cpi, 0, (i - 1)));
rc->source_alt_ref_pending = 0;
}
@@ -2601,31 +2644,7 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->gfu_boost = VPXMIN((int)rc->gfu_boost, i * 200);
#endif
- // Set the interval until the next gf.
- rc->baseline_gf_interval = i - (is_key_frame || rc->source_alt_ref_pending);
-
- // Only encode alt reference frame in temporal base layer. So
- // baseline_gf_interval should be multiple of a temporal layer group
- // (typically the frame distance between two base layer frames)
- if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) {
- int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1;
- int new_gf_interval = (rc->baseline_gf_interval + count) & (~count);
- int j;
- for (j = 0; j < new_gf_interval - rc->baseline_gf_interval; ++j) {
- if (EOF == input_stats(twopass, this_frame)) break;
- gf_group_err +=
- calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
- gf_group_raw_error += this_frame->coded_error;
- gf_group_noise += this_frame->frame_noise_energy;
- gf_group_skip_pct += this_frame->intra_skip_pct;
- gf_group_inactive_zone_rows += this_frame->inactive_zone_rows;
- gf_group_inter += this_frame->pcnt_inter;
- gf_group_motion += this_frame->pcnt_motion;
- }
- rc->baseline_gf_interval = new_gf_interval;
- }
-
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ rc->baseline_gf_interval = i - rc->source_alt_ref_pending;
// Reset the file position.
reset_fpf_position(twopass, start_pos);
@@ -2671,12 +2690,15 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
// Calculate the extra bits to be used for boosted frame(s)
- gf_arf_bits = calculate_boost_bits(rc->baseline_gf_interval, rc->gfu_boost,
- gf_group_bits);
+ gf_arf_bits = calculate_boost_bits((rc->baseline_gf_interval - 1),
+ rc->gfu_boost, gf_group_bits);
// Adjust KF group bits and error remaining.
twopass->kf_group_error_left -= gf_group_err;
+ // Decide GOP structure.
+ define_gf_group_structure(cpi);
+
// Allocate bits to each of the frames in the GF group.
allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits);
@@ -2700,17 +2722,31 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
#endif
}
-// Threshold for use of the lagging second reference frame. High second ref
-// usage may point to a transient event like a flash or occlusion rather than
-// a real scene cut.
-#define SECOND_REF_USEAGE_THRESH 0.1
+// Intra / Inter threshold very low
+#define VERY_LOW_II 1.5
+// Clean slide transitions we expect a sharp single frame spike in error.
+#define ERROR_SPIKE 5.0
+
+// Slide show transition detection.
+// Tests for case where there is very low error either side of the current frame
+// but much higher just for this frame. This can help detect key frames in
+// slide shows even where the slides are pictures of different sizes.
+// Also requires that intra and inter errors are very similar to help eliminate
+// harmful false positives.
+// It will not help if the transition is a fade or other multi-frame effect.
+static int slide_transition(const FIRSTPASS_STATS *this_frame,
+ const FIRSTPASS_STATS *last_frame,
+ const FIRSTPASS_STATS *next_frame) {
+ return (this_frame->intra_error < (this_frame->coded_error * VERY_LOW_II)) &&
+ (this_frame->coded_error > (last_frame->coded_error * ERROR_SPIKE)) &&
+ (this_frame->coded_error > (next_frame->coded_error * ERROR_SPIKE));
+}
+
// Minimum % intra coding observed in first pass (1.0 = 100%)
#define MIN_INTRA_LEVEL 0.25
-// Minimum ratio between the % of intra coding and inter coding in the first
-// pass after discounting neutral blocks (discounting neutral blocks in this
-// way helps catch scene cuts in clips with very flat areas or letter box
-// format clips with image padding.
-#define INTRA_VS_INTER_THRESH 2.0
+// Threshold for use of the lagging second reference frame. Scene cuts do not
+// usually have a high second ref useage.
+#define SECOND_REF_USEAGE_THRESH 0.125
// Hard threshold where the first pass chooses intra for almost all blocks.
// In such a case even if the frame is not a scene cut coding a key frame
// may be a good option.
@@ -2718,12 +2754,6 @@ static void define_gf_group(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Maximum threshold for the relative ratio of intra error score vs best
// inter error score.
#define KF_II_ERR_THRESHOLD 2.5
-// In real scene cuts there is almost always a sharp change in the intra
-// or inter error score.
-#define ERR_CHANGE_THRESHOLD 0.4
-// For real scene cuts we expect an improvment in the intra inter error
-// ratio in the next frame.
-#define II_IMPROVEMENT_THRESHOLD 3.5
#define KF_II_MAX 128.0
#define II_FACTOR 12.5
// Test for very low intra complexity which could cause false key frames
@@ -2735,29 +2765,21 @@ static int test_candidate_kf(TWO_PASS *twopass,
const FIRSTPASS_STATS *next_frame) {
int is_viable_kf = 0;
double pcnt_intra = 1.0 - this_frame->pcnt_inter;
- double modified_pcnt_inter =
- this_frame->pcnt_inter - this_frame->pcnt_neutral;
// Does the frame satisfy the primary criteria of a key frame?
// See above for an explanation of the test criteria.
// If so, then examine how well it predicts subsequent frames.
- if ((this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
- (next_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
+ if (!detect_flash(twopass, -1) && !detect_flash(twopass, 0) &&
+ (this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) &&
((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) ||
- ((pcnt_intra > MIN_INTRA_LEVEL) &&
- (pcnt_intra > (INTRA_VS_INTER_THRESH * modified_pcnt_inter)) &&
+ (slide_transition(this_frame, last_frame, next_frame)) ||
+ (((this_frame->coded_error > (next_frame->coded_error * 1.1)) &&
+ (this_frame->coded_error > (last_frame->coded_error * 1.1))) &&
+ (pcnt_intra > MIN_INTRA_LEVEL) &&
+ ((pcnt_intra + this_frame->pcnt_neutral) > 0.5) &&
((this_frame->intra_error /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) <
- KF_II_ERR_THRESHOLD) &&
- ((fabs(last_frame->coded_error - this_frame->coded_error) /
- DOUBLE_DIVIDE_CHECK(this_frame->coded_error) >
- ERR_CHANGE_THRESHOLD) ||
- (fabs(last_frame->intra_error - this_frame->intra_error) /
- DOUBLE_DIVIDE_CHECK(this_frame->intra_error) >
- ERR_CHANGE_THRESHOLD) ||
- ((next_frame->intra_error /
- DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) >
- II_IMPROVEMENT_THRESHOLD))))) {
+ KF_II_ERR_THRESHOLD)))) {
int i;
const FIRSTPASS_STATS *start_pos = twopass->stats_in;
FIRSTPASS_STATS local_next_frame = *next_frame;
@@ -2815,6 +2837,7 @@ static int test_candidate_kf(TWO_PASS *twopass,
#define FRAMES_TO_CHECK_DECAY 8
#define MIN_KF_TOT_BOOST 300
#define KF_BOOST_SCAN_MAX_FRAMES 32
+#define KF_ABS_ZOOM_THRESH 6.0
#ifdef AGGRESSIVE_VBR
#define KF_MAX_FRAME_BOOST 80.0
@@ -2839,13 +2862,16 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
double zero_motion_accumulator = 1.0;
double boost_score = 0.0;
double kf_mod_err = 0.0;
+ double kf_raw_err = 0.0;
double kf_group_err = 0.0;
double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
double sr_accumulator = 0.0;
+ double abs_mv_in_out_accumulator = 0.0;
const double av_err = get_distribution_av_err(cpi, twopass);
vp9_zero(next_frame);
cpi->common.frame_type = KEY_FRAME;
+ rc->frames_since_key = 0;
// Reset the GF group data structures.
vp9_zero(*gf_group);
@@ -2856,7 +2882,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// Clear the alt ref active flag and last group multi arf flags as they
// can never be set for a key frame.
rc->source_alt_ref_active = 0;
- cpi->multi_arf_last_grp_enabled = 0;
// KF is always a GF so clear frames till next gf counter.
rc->frames_till_gf_update_due = 0;
@@ -2866,6 +2891,7 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->kf_group_bits = 0; // Total bits available to kf group
twopass->kf_group_error_left = 0.0; // Group modified error score.
+ kf_raw_err = this_frame->intra_error;
kf_mod_err =
calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
@@ -2950,18 +2976,6 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->next_key_frame_forced = 0;
}
- if (is_two_pass_svc(cpi) && cpi->svc.number_temporal_layers > 1) {
- int count = (1 << (cpi->svc.number_temporal_layers - 1)) - 1;
- int new_frame_to_key = (rc->frames_to_key + count) & (~count);
- int j;
- for (j = 0; j < new_frame_to_key - rc->frames_to_key; ++j) {
- if (EOF == input_stats(twopass, this_frame)) break;
- kf_group_err +=
- calculate_norm_frame_score(cpi, twopass, oxcf, this_frame, av_err);
- }
- rc->frames_to_key = new_frame_to_key;
- }
-
// Special case for the last key frame of the file.
if (twopass->stats_in >= twopass->stats_in_end) {
// Accumulate kf group error.
@@ -3001,13 +3015,22 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
for (i = 0; i < (rc->frames_to_key - 1); ++i) {
if (EOF == input_stats(twopass, &next_frame)) break;
- if (i <= KF_BOOST_SCAN_MAX_FRAMES) {
+ // The zero motion test here insures that if we mark a kf group as static
+ // it is static throughout not just the first KF_BOOST_SCAN_MAX_FRAMES.
+ // It also allows for a larger boost on long static groups.
+ if ((i <= KF_BOOST_SCAN_MAX_FRAMES) || (zero_motion_accumulator >= 0.99)) {
double frame_boost;
double zm_factor;
// Monitor for static sections.
- zero_motion_accumulator = VPXMIN(
- zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+ // First frame in kf group the second ref indicator is invalid.
+ if (i > 0) {
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(cpi, &next_frame));
+ } else {
+ zero_motion_accumulator =
+ next_frame.pcnt_inter - next_frame.pcnt_motion;
+ }
// Factor 0.75-1.25 based on how much of frame is static.
zm_factor = (0.75 + (zero_motion_accumulator / 2.0));
@@ -3021,7 +3044,15 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
KF_MAX_FRAME_BOOST * zm_factor);
boost_score += frame_boost;
- if (frame_boost < 25.00) break;
+
+ // Measure of zoom. Large zoom tends to indicate reduced boost.
+ abs_mv_in_out_accumulator +=
+ fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion);
+
+ if ((frame_boost < 25.00) ||
+ (abs_mv_in_out_accumulator > KF_ABS_ZOOM_THRESH) ||
+ (sr_accumulator > (kf_raw_err * 1.50)))
+ break;
} else {
break;
}
@@ -3036,10 +3067,16 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->section_intra_rating = calculate_section_intra_ratio(
start_position, twopass->stats_in_end, rc->frames_to_key);
- // Apply various clamps for min and max boost
- rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
- rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
- rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
+ // Special case for static / slide show content but dont apply
+ // if the kf group is very short.
+ if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) {
+ rc->kf_boost = MAX_KF_TOT_BOOST;
+ } else {
+ // Apply various clamps for min and max boost
+ rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
+ rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
+ rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
+ }
// Work out how many bits to allocate for the key frame itself.
kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost,
@@ -3066,60 +3103,12 @@ static void find_next_key_frame(VP9_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
}
-// Define the reference buffers that will be updated post encode.
-static void configure_buffer_updates(VP9_COMP *cpi) {
- TWO_PASS *const twopass = &cpi->twopass;
-
- cpi->rc.is_src_frame_alt_ref = 0;
- switch (twopass->gf_group.update_type[twopass->gf_group.index]) {
- case KF_UPDATE:
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 1;
- cpi->refresh_alt_ref_frame = 1;
- break;
- case LF_UPDATE:
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_alt_ref_frame = 0;
- break;
- case GF_UPDATE:
- cpi->refresh_last_frame = 1;
- cpi->refresh_golden_frame = 1;
- cpi->refresh_alt_ref_frame = 0;
- break;
- case OVERLAY_UPDATE:
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 1;
- cpi->refresh_alt_ref_frame = 0;
- cpi->rc.is_src_frame_alt_ref = 1;
- break;
- case ARF_UPDATE:
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 0;
- cpi->refresh_alt_ref_frame = 1;
- break;
- default: assert(0); break;
- }
- if (is_two_pass_svc(cpi)) {
- if (cpi->svc.temporal_layer_id > 0) {
- cpi->refresh_last_frame = 0;
- cpi->refresh_golden_frame = 0;
- }
- if (cpi->svc.layer_context[cpi->svc.spatial_layer_id].gold_ref_idx < 0)
- cpi->refresh_golden_frame = 0;
- if (cpi->alt_ref_source == NULL) cpi->refresh_alt_ref_frame = 0;
- }
-}
-
static int is_skippable_frame(const VP9_COMP *cpi) {
// If the current frame does not have non-zero motion vector detected in the
// first pass, and so do its previous and forward frames, then this frame
// can be skipped for partition check, and the partition size is assigned
// according to the variance
- const SVC *const svc = &cpi->svc;
- const TWO_PASS *const twopass =
- is_two_pass_svc(cpi) ? &svc->layer_context[svc->spatial_layer_id].twopass
- : &cpi->twopass;
+ const TWO_PASS *const twopass = &cpi->twopass;
return (!frame_is_intra_only(&cpi->common) &&
twopass->stats_in - 2 > twopass->stats_in_start &&
@@ -3140,38 +3129,25 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
GF_GROUP *const gf_group = &twopass->gf_group;
FIRSTPASS_STATS this_frame;
- int target_rate;
- LAYER_CONTEXT *const lc =
- is_two_pass_svc(cpi) ? &cpi->svc.layer_context[cpi->svc.spatial_layer_id]
- : 0;
-
if (!twopass->stats_in) return;
// If this is an arf frame then we dont want to read the stats file or
// advance the input pointer as we already have what we need.
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
int target_rate;
- configure_buffer_updates(cpi);
+
+ vp9_configure_buffer_updates(cpi, gf_group->index);
+
target_rate = gf_group->bit_allocation[gf_group->index];
target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate);
rc->base_frame_target = target_rate;
cm->frame_type = INTER_FRAME;
- if (lc != NULL) {
- if (cpi->svc.spatial_layer_id == 0) {
- lc->is_key_frame = 0;
- } else {
- lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
-
- if (lc->is_key_frame) cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
- }
- }
-
// Do the firstpass stats indicate that this frame is skippable for the
// partition search?
if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 &&
- (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ !cpi->use_svc) {
cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
}
@@ -3182,12 +3158,9 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
if (cpi->oxcf.rc_mode == VPX_Q) {
twopass->active_worst_quality = cpi->oxcf.cq_level;
- } else if (cm->current_video_frame == 0 ||
- (lc != NULL && lc->current_video_frame_in_layer == 0)) {
+ } else if (cm->current_video_frame == 0) {
const int frames_left =
- (int)(twopass->total_stats.count -
- ((lc != NULL) ? lc->current_video_frame_in_layer
- : cm->current_video_frame));
+ (int)(twopass->total_stats.count - cm->current_video_frame);
// Special case code for first frame.
const int section_target_bandwidth =
(int)(twopass->bits_left / frames_left);
@@ -3236,59 +3209,36 @@ void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
cm->frame_type = INTER_FRAME;
}
- if (lc != NULL) {
- if (cpi->svc.spatial_layer_id == 0) {
- lc->is_key_frame = (cm->frame_type == KEY_FRAME);
- if (lc->is_key_frame) {
- cpi->ref_frame_flags &=
- (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
- lc->frames_from_key_frame = 0;
- // Encode an intra only empty frame since we have a key frame.
- cpi->svc.encode_intra_empty_frame = 1;
- }
- } else {
- cm->frame_type = INTER_FRAME;
- lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame;
-
- if (lc->is_key_frame) {
- cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
- lc->frames_from_key_frame = 0;
- }
- }
- }
-
// Define a new GF/ARF group. (Should always enter here for key frames).
if (rc->frames_till_gf_update_due == 0) {
define_gf_group(cpi, &this_frame);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
- if (lc != NULL) cpi->refresh_golden_frame = 1;
#if ARF_STATS_OUTPUT
{
FILE *fpfile;
fpfile = fopen("arf.stt", "a");
++arf_count;
- fprintf(fpfile, "%10d %10ld %10d %10d %10ld\n", cm->current_video_frame,
- rc->frames_till_gf_update_due, rc->kf_boost, arf_count,
- rc->gfu_boost);
+ fprintf(fpfile, "%10d %10ld %10d %10d %10ld %10ld\n",
+ cm->current_video_frame, rc->frames_till_gf_update_due,
+ rc->kf_boost, arf_count, rc->gfu_boost, cm->frame_type);
fclose(fpfile);
}
#endif
}
- configure_buffer_updates(cpi);
+ vp9_configure_buffer_updates(cpi, gf_group->index);
// Do the firstpass stats indicate that this frame is skippable for the
// partition search?
if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 &&
- (!cpi->use_svc || is_two_pass_svc(cpi))) {
+ !cpi->use_svc) {
cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
}
- target_rate = gf_group->bit_allocation[gf_group->index];
- rc->base_frame_target = target_rate;
+ rc->base_frame_target = gf_group->bit_allocation[gf_group->index];
// The multiplication by 256 reverses a scaling factor of (>> 8)
// applied when combining MB error values for the frame.
@@ -3329,8 +3279,7 @@ void vp9_twopass_postencode_update(VP9_COMP *cpi) {
rc->rate_error_estimate = 0;
}
- if (cpi->common.frame_type != KEY_FRAME &&
- !vp9_is_upper_layer_key_frame(cpi)) {
+ if (cpi->common.frame_type != KEY_FRAME) {
twopass->kf_group_bits -= bits_used;
twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct;
}
diff --git a/libvpx/vp9/encoder/vp9_firstpass.h b/libvpx/vp9/encoder/vp9_firstpass.h
index 000ecd779..0807097ac 100644
--- a/libvpx/vp9/encoder/vp9_firstpass.h
+++ b/libvpx/vp9/encoder/vp9_firstpass.h
@@ -8,8 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_FIRSTPASS_H_
-#define VP9_ENCODER_VP9_FIRSTPASS_H_
+#ifndef VPX_VP9_ENCODER_VP9_FIRSTPASS_H_
+#define VPX_VP9_ENCODER_VP9_FIRSTPASS_H_
+
+#include <assert.h>
#include "vp9/encoder/vp9_lookahead.h"
#include "vp9/encoder/vp9_ratectrl.h"
@@ -41,6 +43,8 @@ typedef struct {
#define INVALID_ROW -1
+#define MAX_ARF_LAYERS 6
+
typedef struct {
double frame_mb_intra_factor;
double frame_mb_brightness_factor;
@@ -107,7 +111,9 @@ typedef enum {
GF_UPDATE = 2,
ARF_UPDATE = 3,
OVERLAY_UPDATE = 4,
- FRAME_UPDATE_TYPES = 5
+ MID_OVERLAY_UPDATE = 5,
+ USE_BUF_FRAME = 6, // Use show existing frame, no ref buffer update
+ FRAME_UPDATE_TYPES = 7
} FRAME_UPDATE_TYPE;
#define FC_ANIMATION_THRESH 0.15
@@ -119,13 +125,23 @@ typedef enum {
typedef struct {
unsigned char index;
- unsigned char first_inter_index;
- RATE_FACTOR_LEVEL rf_level[(MAX_LAG_BUFFERS * 2) + 1];
- FRAME_UPDATE_TYPE update_type[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char arf_src_offset[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char arf_update_idx[(MAX_LAG_BUFFERS * 2) + 1];
- unsigned char arf_ref_idx[(MAX_LAG_BUFFERS * 2) + 1];
- int bit_allocation[(MAX_LAG_BUFFERS * 2) + 1];
+ RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ unsigned char layer_depth[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ unsigned char frame_gop_index[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 2];
+ int gfu_boost[MAX_STATIC_GF_GROUP_LENGTH + 2];
+
+ int frame_start;
+ int frame_end;
+ // TODO(jingning): The array size of arf_stack could be reduced.
+ int arf_index_stack[MAX_LAG_BUFFERS * 2];
+ int top_arf_idx;
+ int stack_size;
+ int gf_group_size;
+ int max_layer_depth;
+ int allowed_max_layer_depth;
} GF_GROUP;
typedef struct {
@@ -182,7 +198,6 @@ struct ThreadData;
struct TileDataEnc;
void vp9_init_first_pass(struct VP9_COMP *cpi);
-void vp9_rc_get_first_pass_params(struct VP9_COMP *cpi);
void vp9_first_pass(struct VP9_COMP *cpi, const struct lookahead_entry *source);
void vp9_end_first_pass(struct VP9_COMP *cpi);
@@ -194,7 +209,6 @@ void vp9_first_pass_encode_tile_mb_row(struct VP9_COMP *cpi,
void vp9_init_second_pass(struct VP9_COMP *cpi);
void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
-void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
// Post encode update of the rate control parameters for 2-pass
void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
@@ -206,4 +220,4 @@ void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_FIRSTPASS_H_
+#endif // VPX_VP9_ENCODER_VP9_FIRSTPASS_H_
diff --git a/libvpx/vp9/encoder/vp9_job_queue.h b/libvpx/vp9/encoder/vp9_job_queue.h
index 89c08f207..ad09c1119 100644
--- a/libvpx/vp9/encoder/vp9_job_queue.h
+++ b/libvpx/vp9/encoder/vp9_job_queue.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_JOB_QUEUE_H_
-#define VP9_ENCODER_VP9_JOB_QUEUE_H_
+#ifndef VPX_VP9_ENCODER_VP9_JOB_QUEUE_H_
+#define VPX_VP9_ENCODER_VP9_JOB_QUEUE_H_
typedef enum {
FIRST_PASS_JOB,
@@ -43,4 +43,4 @@ typedef struct {
int num_jobs_acquired;
} JobQueueHandle;
-#endif // VP9_ENCODER_VP9_JOB_QUEUE_H_
+#endif // VPX_VP9_ENCODER_VP9_JOB_QUEUE_H_
diff --git a/libvpx/vp9/encoder/vp9_lookahead.h b/libvpx/vp9/encoder/vp9_lookahead.h
index 88be0ffcd..c627bede2 100644
--- a/libvpx/vp9/encoder/vp9_lookahead.h
+++ b/libvpx/vp9/encoder/vp9_lookahead.h
@@ -8,17 +8,13 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_LOOKAHEAD_H_
-#define VP9_ENCODER_VP9_LOOKAHEAD_H_
+#ifndef VPX_VP9_ENCODER_VP9_LOOKAHEAD_H_
+#define VPX_VP9_ENCODER_VP9_LOOKAHEAD_H_
#include "vpx_scale/yv12config.h"
#include "vpx/vpx_encoder.h"
#include "vpx/vpx_integer.h"
-#if CONFIG_SPATIAL_SVC
-#include "vpx/vp8cx.h"
-#endif
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -115,4 +111,4 @@ unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_LOOKAHEAD_H_
+#endif // VPX_VP9_ENCODER_VP9_LOOKAHEAD_H_
diff --git a/libvpx/vp9/encoder/vp9_mbgraph.c b/libvpx/vp9/encoder/vp9_mbgraph.c
index 46d626def..831c79c17 100644
--- a/libvpx/vp9/encoder/vp9_mbgraph.c
+++ b/libvpx/vp9/encoder/vp9_mbgraph.c
@@ -57,11 +57,12 @@ static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv,
{
uint32_t distortion;
uint32_t sse;
+ // TODO(yunqing): may use higher tap interp filter than 2 taps if needed.
cpi->find_fractional_mv_step(
x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit,
- &v_fn_ptr, 0, mv_sf->subpel_iters_per_step,
+ &v_fn_ptr, 0, mv_sf->subpel_search_level,
cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0,
- 0);
+ 0, USE_2_TAPS);
}
xd->mi[0]->mode = NEWMV;
diff --git a/libvpx/vp9/encoder/vp9_mbgraph.h b/libvpx/vp9/encoder/vp9_mbgraph.h
index df2fb98ef..7b629861d 100644
--- a/libvpx/vp9/encoder/vp9_mbgraph.h
+++ b/libvpx/vp9/encoder/vp9_mbgraph.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_MBGRAPH_H_
-#define VP9_ENCODER_VP9_MBGRAPH_H_
+#ifndef VPX_VP9_ENCODER_VP9_MBGRAPH_H_
+#define VPX_VP9_ENCODER_VP9_MBGRAPH_H_
#ifdef __cplusplus
extern "C" {
@@ -25,7 +25,9 @@ typedef struct {
} ref[MAX_REF_FRAMES];
} MBGRAPH_MB_STATS;
-typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS;
+typedef struct {
+ MBGRAPH_MB_STATS *mb_stats;
+} MBGRAPH_FRAME_STATS;
struct VP9_COMP;
@@ -35,4 +37,4 @@ void vp9_update_mbgraph_stats(struct VP9_COMP *cpi);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_MBGRAPH_H_
+#endif // VPX_VP9_ENCODER_VP9_MBGRAPH_H_
diff --git a/libvpx/vp9/encoder/vp9_mcomp.c b/libvpx/vp9/encoder/vp9_mcomp.c
index 44f01be25..5a6717ab2 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.c
+++ b/libvpx/vp9/encoder/vp9_mcomp.c
@@ -263,27 +263,6 @@ static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) {
} \
}
-// TODO(yunqingwang): SECOND_LEVEL_CHECKS_BEST was a rewrote of
-// SECOND_LEVEL_CHECKS, and SECOND_LEVEL_CHECKS should be rewritten
-// later in the same way.
-#define SECOND_LEVEL_CHECKS_BEST \
- { \
- unsigned int second; \
- int br0 = br; \
- int bc0 = bc; \
- assert(tr == br || tc == bc); \
- if (tr == br && tc != bc) { \
- kc = bc - tc; \
- } else if (tr != br && tc == bc) { \
- kr = br - tr; \
- } \
- CHECK_BETTER(second, br0 + kr, bc0); \
- CHECK_BETTER(second, br0, bc0 + kc); \
- if (br0 != br || bc0 != bc) { \
- CHECK_BETTER(second, br0 + kr, bc0 + kc); \
- } \
- }
-
#define SETUP_SUBPEL_SEARCH \
const uint8_t *const z = x->plane[0].src.buf; \
const int src_stride = x->plane[0].src.stride; \
@@ -329,8 +308,8 @@ static unsigned int setup_center_error(
if (second_pred != NULL) {
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
- vpx_highbd_comp_avg_pred(comp_pred16, second_pred, w, h, y + offset,
- y_stride);
+ vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
+ h, CONVERT_TO_SHORTPTR(y + offset), y_stride);
besterr =
vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1);
} else {
@@ -388,14 +367,12 @@ static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) {
*ir = (int)divide_and_round(x1 * b, y1);
}
-uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
- const MV *ref_mv, int allow_hp,
- int error_per_bit,
- const vp9_variance_fn_ptr_t *vfp,
- int forced_stop, int iters_per_step,
- int *cost_list, int *mvjcost, int *mvcost[2],
- uint32_t *distortion, uint32_t *sse1,
- const uint8_t *second_pred, int w, int h) {
+uint32_t vp9_skip_sub_pixel_tree(
+ const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
+ int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
+ int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
+ uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
@@ -418,6 +395,7 @@ uint32_t vp9_skip_sub_pixel_tree(const MACROBLOCK *x, MV *bestmv,
(void)sse;
(void)thismse;
(void)cost_list;
+ (void)use_accurate_subpel_search;
return besterr;
}
@@ -427,7 +405,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
@@ -439,6 +417,7 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore(
(void)allow_hp;
(void)forced_stop;
(void)hstep;
+ (void)use_accurate_subpel_search;
if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
@@ -492,8 +471,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned_more(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
+ (void)use_accurate_subpel_search;
+
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
offset, mvjcost, mvcost, sse1, distortion);
@@ -552,8 +533,10 @@ uint32_t vp9_find_best_sub_pixel_tree_pruned(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
+ (void)use_accurate_subpel_search;
+
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
offset, mvjcost, mvcost, sse1, distortion);
@@ -638,12 +621,119 @@ static const MV search_step_table[12] = {
};
/* clang-format on */
+static int accurate_sub_pel_search(
+ const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf,
+ const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp,
+ const uint8_t *const src_address, const int src_stride,
+ const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred,
+ int w, int h, uint32_t *sse) {
+#if CONFIG_VP9_HIGHBITDEPTH
+ uint64_t besterr;
+ assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
+ assert(w != 0 && h != 0);
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]);
+ vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride,
+ pred16, w, this_mv, sf, w, h, 0, kernel,
+ MV_PRECISION_Q3, 0, 0, xd->bd);
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]);
+ vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w,
+ h, pred16, w);
+ besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address,
+ src_stride, sse);
+ } else {
+ besterr =
+ vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse);
+ }
+ } else {
+ DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
+ vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
+ 0, kernel, MV_PRECISION_Q3, 0, 0);
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
+ vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
+ besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
+ } else {
+ besterr = vfp->vf(pred, w, src_address, src_stride, sse);
+ }
+ }
+ if (besterr >= UINT_MAX) return UINT_MAX;
+ return (int)besterr;
+#else
+ int besterr;
+ DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]);
+ assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16);
+ assert(w != 0 && h != 0);
+ (void)xd;
+
+ vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h,
+ 0, kernel, MV_PRECISION_Q3, 0, 0);
+ if (second_pred != NULL) {
+ DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]);
+ vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w);
+ besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse);
+ } else {
+ besterr = vfp->vf(pred, w, src_address, src_stride, sse);
+ }
+ return besterr;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+}
+
+// TODO(yunqing): this part can be further refactored.
+#if CONFIG_VP9_HIGHBITDEPTH
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ int64_t tmpmse; \
+ const MV mv = { r, c }; \
+ const MV ref_mv = { rr, rc }; \
+ thismse = \
+ accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
+ y, y_stride, second_pred, w, h, &sse); \
+ tmpmse = thismse; \
+ tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \
+ if (tmpmse >= INT_MAX) { \
+ v = INT_MAX; \
+ } else if ((v = (uint32_t)tmpmse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+#else
+/* checks if (r, c) has better score than previous best */
+#define CHECK_BETTER1(v, r, c) \
+ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \
+ const MV mv = { r, c }; \
+ const MV ref_mv = { rr, rc }; \
+ thismse = \
+ accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \
+ y, y_stride, second_pred, w, h, &sse); \
+ if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \
+ thismse) < besterr) { \
+ besterr = v; \
+ br = r; \
+ bc = c; \
+ *distortion = thismse; \
+ *sse1 = sse; \
+ } \
+ } else { \
+ v = INT_MAX; \
+ }
+
+#endif
+
uint32_t vp9_find_best_sub_pixel_tree(
const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
const uint8_t *const z = x->plane[0].src.buf;
const uint8_t *const src_address = z;
const int src_stride = x->plane[0].src.stride;
@@ -671,6 +761,17 @@ uint32_t vp9_find_best_sub_pixel_tree(
int kr, kc;
MvLimits subpel_mv_limits;
+ // TODO(yunqing): need to add 4-tap filter optimization to speed up the
+ // encoder.
+ const InterpKernel *kernel =
+ (use_accurate_subpel_search > 0)
+ ? ((use_accurate_subpel_search == USE_4_TAPS)
+ ? vp9_filter_kernels[FOURTAP]
+ : ((use_accurate_subpel_search == USE_8_TAPS)
+ ? vp9_filter_kernels[EIGHTTAP]
+ : vp9_filter_kernels[EIGHTTAP_SHARP]))
+ : vp9_filter_kernels[BILINEAR];
+
vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
minc = subpel_mv_limits.col_min;
maxc = subpel_mv_limits.col_max;
@@ -695,16 +796,25 @@ uint32_t vp9_find_best_sub_pixel_tree(
tr = br + search_step[idx].row;
tc = bc + search_step[idx].col;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
MV this_mv;
this_mv.row = tr;
this_mv.col = tc;
- if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- src_stride, &sse);
- else
- thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
- src_address, src_stride, &sse, second_pred);
+
+ if (use_accurate_subpel_search) {
+ thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
+ src_address, src_stride, y,
+ y_stride, second_pred, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address =
+ y + (tr >> 3) * y_stride + (tc >> 3);
+ if (second_pred == NULL)
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse);
+ else
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse, second_pred);
+ }
+
cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost,
mvcost, error_per_bit);
@@ -726,14 +836,21 @@ uint32_t vp9_find_best_sub_pixel_tree(
tc = bc + kc;
tr = br + kr;
if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) {
- const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
MV this_mv = { tr, tc };
- if (second_pred == NULL)
- thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- src_stride, &sse);
- else
- thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address,
- src_stride, &sse, second_pred);
+ if (use_accurate_subpel_search) {
+ thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp,
+ src_address, src_stride, y, y_stride,
+ second_pred, w, h, &sse);
+ } else {
+ const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3);
+ if (second_pred == NULL)
+ thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address,
+ src_stride, &sse);
+ else
+ thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr),
+ src_address, src_stride, &sse, second_pred);
+ }
+
cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost,
error_per_bit);
@@ -755,10 +872,48 @@ uint32_t vp9_find_best_sub_pixel_tree(
bc = tc;
}
- if (iters_per_step > 1 && best_idx != -1) SECOND_LEVEL_CHECKS_BEST;
+ if (iters_per_step > 0 && best_idx != -1) {
+ unsigned int second;
+ const int br0 = br;
+ const int bc0 = bc;
+ assert(tr == br || tc == bc);
+
+ if (tr == br && tc != bc) {
+ kc = bc - tc;
+ if (iters_per_step == 1) {
+ if (use_accurate_subpel_search) {
+ CHECK_BETTER1(second, br0, bc0 + kc);
+ } else {
+ CHECK_BETTER(second, br0, bc0 + kc);
+ }
+ }
+ } else if (tr != br && tc == bc) {
+ kr = br - tr;
+ if (iters_per_step == 1) {
+ if (use_accurate_subpel_search) {
+ CHECK_BETTER1(second, br0 + kr, bc0);
+ } else {
+ CHECK_BETTER(second, br0 + kr, bc0);
+ }
+ }
+ }
- tr = br;
- tc = bc;
+ if (iters_per_step > 1) {
+ if (use_accurate_subpel_search) {
+ CHECK_BETTER1(second, br0 + kr, bc0);
+ CHECK_BETTER1(second, br0, bc0 + kc);
+ if (br0 != br || bc0 != bc) {
+ CHECK_BETTER1(second, br0 + kr, bc0 + kc);
+ }
+ } else {
+ CHECK_BETTER(second, br0 + kr, bc0);
+ CHECK_BETTER(second, br0, bc0 + kc);
+ if (br0 != br || bc0 != bc) {
+ CHECK_BETTER(second, br0 + kr, bc0 + kc);
+ }
+ }
+ }
+ }
search_step += 4;
hstep >>= 1;
@@ -780,6 +935,7 @@ uint32_t vp9_find_best_sub_pixel_tree(
}
#undef CHECK_BETTER
+#undef CHECK_BETTER1
static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col,
int range) {
@@ -1576,6 +1732,183 @@ static int exhuastive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv,
return best_sad;
}
+#if CONFIG_NON_GREEDY_MV
+double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
+ int mv_num) {
+ int i;
+ int update = 0;
+ double best_cost = 0;
+ vpx_clear_system_state();
+ for (i = 0; i < mv_num; ++i) {
+ if (nb_mvs[i].as_int != INVALID_MV) {
+ MV nb_mv = nb_mvs[i].as_mv;
+ const double row_diff = mv->row - nb_mv.row;
+ const double col_diff = mv->col - nb_mv.col;
+ double cost = row_diff * row_diff + col_diff * col_diff;
+ cost = log2(1 + cost);
+ if (update == 0) {
+ best_cost = cost;
+ update = 1;
+ } else {
+ best_cost = cost < best_cost ? cost : best_cost;
+ }
+ }
+ }
+ return best_cost;
+}
+
+double vp9_diamond_search_sad_new(const MACROBLOCK *x,
+ const search_site_config *cfg,
+ const MV *init_full_mv, MV *best_full_mv,
+ double *best_mv_dist, double *best_mv_cost,
+ int search_param, double lambda, int *num00,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, int full_mv_num) {
+ int i, j, step;
+
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ uint8_t *what = x->plane[0].src.buf;
+ const int what_stride = x->plane[0].src.stride;
+ const uint8_t *in_what;
+ const int in_what_stride = xd->plane[0].pre[0].stride;
+ const uint8_t *best_address;
+
+ double bestsad;
+ int best_site = -1;
+ int last_site = -1;
+
+ // search_param determines the length of the initial step and hence the number
+ // of iterations.
+ // 0 = initial step (MAX_FIRST_STEP) pel
+ // 1 = (MAX_FIRST_STEP/2) pel,
+ // 2 = (MAX_FIRST_STEP/4) pel...
+ // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step];
+ const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step];
+ const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step];
+ const int tot_steps = cfg->total_steps - search_param;
+ vpx_clear_system_state();
+
+ *best_full_mv = *init_full_mv;
+ clamp_mv(best_full_mv, x->mv_limits.col_min, x->mv_limits.col_max,
+ x->mv_limits.row_min, x->mv_limits.row_max);
+ *num00 = 0;
+
+ // Work out the start point for the search
+ in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride +
+ best_full_mv->col;
+ best_address = in_what;
+
+ // Check the starting position
+ *best_mv_dist = fn_ptr->sdf(what, what_stride, in_what, in_what_stride);
+ *best_mv_cost =
+ vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
+ bestsad = (*best_mv_dist) + lambda * (*best_mv_cost);
+
+ i = 0;
+
+ for (step = 0; step < tot_steps; step++) {
+ int all_in = 1, t;
+
+ // All_in is true if every one of the points we are checking are within
+ // the bounds of the image.
+ all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
+ all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
+ all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
+ all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
+
+ // If all the pixels are within the bounds we don't check whether the
+ // search point is valid in this loop, otherwise we check each point
+ // for validity..
+ if (all_in) {
+ unsigned int sad_array[4];
+
+ for (j = 0; j < cfg->searches_per_step; j += 4) {
+ unsigned char const *block_offset[4];
+
+ for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address;
+
+ fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride,
+ sad_array);
+
+ for (t = 0; t < 4; t++, i++) {
+ const MV this_mv = { best_full_mv->row + ss_mv[i].row,
+ best_full_mv->col + ss_mv[i].col };
+ const double mv_dist = sad_array[t];
+ const double mv_cost =
+ vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
+ double thissad = mv_dist + lambda * mv_cost;
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ *best_mv_dist = mv_dist;
+ *best_mv_cost = mv_cost;
+ best_site = i;
+ }
+ }
+ }
+ } else {
+ for (j = 0; j < cfg->searches_per_step; j++) {
+ // Trap illegal vectors
+ const MV this_mv = { best_full_mv->row + ss_mv[i].row,
+ best_full_mv->col + ss_mv[i].col };
+
+ if (is_mv_in(&x->mv_limits, &this_mv)) {
+ const uint8_t *const check_here = ss_os[i] + best_address;
+ const double mv_dist =
+ fn_ptr->sdf(what, what_stride, check_here, in_what_stride);
+ const double mv_cost =
+ vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num);
+ double thissad = mv_dist + lambda * mv_cost;
+ if (thissad < bestsad) {
+ bestsad = thissad;
+ *best_mv_dist = mv_dist;
+ *best_mv_cost = mv_cost;
+ best_site = i;
+ }
+ }
+ i++;
+ }
+ }
+ if (best_site != last_site) {
+ best_full_mv->row += ss_mv[best_site].row;
+ best_full_mv->col += ss_mv[best_site].col;
+ best_address += ss_os[best_site];
+ last_site = best_site;
+ } else if (best_address == in_what) {
+ (*num00)++;
+ }
+ }
+ return bestsad;
+}
+
+void vp9_prepare_nb_full_mvs(const TplDepFrame *tpl_frame, int mi_row,
+ int mi_col, int rf_idx, BLOCK_SIZE bsize,
+ int_mv *nb_full_mvs) {
+ const int mi_width = num_8x8_blocks_wide_lookup[bsize];
+ const int mi_height = num_8x8_blocks_high_lookup[bsize];
+ const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+ int i;
+ for (i = 0; i < NB_MVS_NUM; ++i) {
+ int r = dirs[i][0] * mi_height;
+ int c = dirs[i][1] * mi_width;
+ if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
+ mi_col + c < tpl_frame->mi_cols) {
+ const TplDepStats *tpl_ptr =
+ &tpl_frame
+ ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];
+ int_mv *mv =
+ get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);
+ if (tpl_ptr->ready[rf_idx]) {
+ nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv);
+ } else {
+ nb_full_mvs[i].as_int = INVALID_MV;
+ }
+ } else {
+ nb_full_mvs[i].as_int = INVALID_MV;
+ }
+ }
+}
+#endif // CONFIG_NON_GREEDY_MV
+
int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
MV *ref_mv, MV *best_mv, int search_param,
int sad_per_bit, int *num00,
@@ -1785,12 +2118,15 @@ static int vector_match(int16_t *ref, int16_t *src, int bwl) {
}
static const MV search_pos[4] = {
- { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 },
+ { -1, 0 },
+ { 0, -1 },
+ { 0, 1 },
+ { 1, 0 },
};
unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row,
- int mi_col) {
+ int mi_col, const MV *ref_mv) {
MACROBLOCKD *xd = &x->e_mbd;
MODE_INFO *mi = xd->mi[0];
struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
@@ -1812,6 +2148,7 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
const int norm_factor = 3 + (bw >> 5);
const YV12_BUFFER_CONFIG *scaled_ref_frame =
vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]);
+ MvLimits subpel_mv_limits;
if (scaled_ref_frame) {
int i;
@@ -1876,7 +2213,10 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
{
const uint8_t *const pos[4] = {
- ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride,
+ ref_buf - ref_stride,
+ ref_buf - 1,
+ ref_buf + 1,
+ ref_buf + ref_stride,
};
cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
@@ -1911,6 +2251,10 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
tmp_mv->row *= 8;
tmp_mv->col *= 8;
+ vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv);
+ clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max,
+ subpel_mv_limits.row_min, subpel_mv_limits.row_max);
+
if (scaled_ref_frame) {
int i;
for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
@@ -1919,6 +2263,74 @@ unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x,
return best_sad;
}
+#if CONFIG_NON_GREEDY_MV
+// Runs sequence of diamond searches in smaller steps for RD.
+/* do_refine: If last step (1-away) of n-step search doesn't pick the center
+ point as the best match, we will do a final 1-away diamond
+ refining search */
+double vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
+ MV *mvp_full, int step_param, double lambda,
+ int do_refine,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, int full_mv_num,
+ MV *best_mv, double *best_mv_dist,
+ double *best_mv_cost) {
+ int n, num00 = 0;
+ double thissme;
+ double bestsme;
+ const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param;
+ vpx_clear_system_state();
+ bestsme = vp9_diamond_search_sad_new(
+ x, &cpi->ss_cfg, mvp_full, best_mv, best_mv_dist, best_mv_cost,
+ step_param, lambda, &n, fn_ptr, nb_full_mvs, full_mv_num);
+
+ // If there won't be more n-step search, check to see if refining search is
+ // needed.
+ if (n > further_steps) do_refine = 0;
+
+ while (n < further_steps) {
+ ++n;
+ if (num00) {
+ num00--;
+ } else {
+ MV temp_mv;
+ double mv_dist;
+ double mv_cost;
+ thissme = vp9_diamond_search_sad_new(
+ x, &cpi->ss_cfg, mvp_full, &temp_mv, &mv_dist, &mv_cost,
+ step_param + n, lambda, &num00, fn_ptr, nb_full_mvs, full_mv_num);
+ // check to see if refining search is needed.
+ if (num00 > further_steps - n) do_refine = 0;
+
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ *best_mv = temp_mv;
+ *best_mv_dist = mv_dist;
+ *best_mv_cost = mv_cost;
+ }
+ }
+ }
+
+ // final 1-away diamond refining search
+ if (do_refine) {
+ const int search_range = 8;
+ MV temp_mv = *best_mv;
+ double mv_dist;
+ double mv_cost;
+ thissme = vp9_refining_search_sad_new(x, &temp_mv, &mv_dist, &mv_cost,
+ lambda, search_range, fn_ptr,
+ nb_full_mvs, full_mv_num);
+ if (thissme < bestsme) {
+ bestsme = thissme;
+ *best_mv = temp_mv;
+ *best_mv_dist = mv_dist;
+ *best_mv_cost = mv_cost;
+ }
+ }
+ return bestsme;
+}
+#endif // CONFIG_NON_GREEDY_MV
+
// Runs sequence of diamond searches in smaller steps for RD.
/* do_refine: If last step (1-away) of n-step search doesn't pick the center
point as the best match, we will do a final 1-away diamond
@@ -2042,6 +2454,90 @@ static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
return bestsme;
}
+#if CONFIG_NON_GREEDY_MV
+double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
+ double *best_mv_dist, double *best_mv_cost,
+ double lambda, int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, int full_mv_num) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } };
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv);
+ double best_sad;
+ int i, j;
+ vpx_clear_system_state();
+ *best_mv_dist =
+ fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride);
+ *best_mv_cost =
+ vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num);
+ best_sad = (*best_mv_dist) + lambda * (*best_mv_cost);
+
+ for (i = 0; i < search_range; i++) {
+ int best_site = -1;
+ const int all_in = ((best_full_mv->row - 1) > x->mv_limits.row_min) &
+ ((best_full_mv->row + 1) < x->mv_limits.row_max) &
+ ((best_full_mv->col - 1) > x->mv_limits.col_min) &
+ ((best_full_mv->col + 1) < x->mv_limits.col_max);
+
+ if (all_in) {
+ unsigned int sads[4];
+ const uint8_t *const positions[4] = { best_address - in_what->stride,
+ best_address - 1, best_address + 1,
+ best_address + in_what->stride };
+
+ fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads);
+
+ for (j = 0; j < 4; ++j) {
+ const MV mv = { best_full_mv->row + neighbors[j].row,
+ best_full_mv->col + neighbors[j].col };
+ const double mv_dist = sads[j];
+ const double mv_cost =
+ vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ const double thissad = mv_dist + lambda * mv_cost;
+ if (thissad < best_sad) {
+ best_sad = thissad;
+ *best_mv_dist = mv_dist;
+ *best_mv_cost = mv_cost;
+ best_site = j;
+ }
+ }
+ } else {
+ for (j = 0; j < 4; ++j) {
+ const MV mv = { best_full_mv->row + neighbors[j].row,
+ best_full_mv->col + neighbors[j].col };
+
+ if (is_mv_in(&x->mv_limits, &mv)) {
+ const double mv_dist =
+ fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride);
+ const double mv_cost =
+ vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ const double thissad = mv_dist + lambda * mv_cost;
+ if (thissad < best_sad) {
+ best_sad = thissad;
+ *best_mv_dist = mv_dist;
+ *best_mv_cost = mv_cost;
+ best_site = j;
+ }
+ }
+ }
+ }
+
+ if (best_site == -1) {
+ break;
+ } else {
+ best_full_mv->row += neighbors[best_site].row;
+ best_full_mv->col += neighbors[best_site].col;
+ best_address = get_buf_from_mv(in_what, best_full_mv);
+ }
+ }
+
+ return best_sad;
+}
+#endif // CONFIG_NON_GREEDY_MV
+
int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
int search_range,
const vp9_variance_fn_ptr_t *fn_ptr,
@@ -2175,6 +2671,8 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
const SEARCH_METHODS method = (SEARCH_METHODS)search_method;
vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize];
int var = 0;
+ int run_exhaustive_search = 0;
+
if (cost_list) {
cost_list[0] = INT_MAX;
cost_list[1] = INT_MAX;
@@ -2205,35 +2703,38 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
fn_ptr, 1, ref_mv, tmp_mv);
break;
case NSTEP:
+ case MESH:
var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit,
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
cost_list, fn_ptr, ref_mv, tmp_mv);
-
- // Should we allow a follow on exhaustive search?
- if ((sf->exhaustive_searches_thresh < INT_MAX) &&
- !cpi->rc.is_src_frame_alt_ref) {
- int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
- exhuastive_thr >>=
- 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
-
- // Threshold variance for an exhaustive full search.
- if (var > exhuastive_thr) {
- int var_ex;
- MV tmp_mv_ex;
- var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit,
- cost_list, fn_ptr, ref_mv, &tmp_mv_ex);
-
- if (var_ex < var) {
- var = var_ex;
- *tmp_mv = tmp_mv_ex;
- }
- }
- }
break;
- default: assert(0 && "Invalid search method.");
+ default: assert(0 && "Unknown search method");
+ }
+
+ if (method == NSTEP) {
+ if (sf->exhaustive_searches_thresh < INT_MAX &&
+ !cpi->rc.is_src_frame_alt_ref) {
+ const int64_t exhuastive_thr =
+ sf->exhaustive_searches_thresh >>
+ (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]));
+ if (var > exhuastive_thr) run_exhaustive_search = 1;
+ }
+ } else if (method == MESH) {
+ run_exhaustive_search = 1;
}
- if (method != NSTEP && rd && var < var_max)
+ if (run_exhaustive_search) {
+ int var_ex;
+ MV tmp_mv_ex;
+ var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, cost_list,
+ fn_ptr, ref_mv, &tmp_mv_ex);
+ if (var_ex < var) {
+ var = var_ex;
+ *tmp_mv = tmp_mv_ex;
+ }
+ }
+
+ if (method != NSTEP && method != MESH && rd && var < var_max)
var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
return var;
@@ -2274,7 +2775,8 @@ int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
(void)tc; \
(void)sse; \
(void)thismse; \
- (void)cost_list;
+ (void)cost_list; \
+ (void)use_accurate_subpel_search;
// Return the maximum MV.
uint32_t vp9_return_max_sub_pixel_mv(
@@ -2282,7 +2784,7 @@ uint32_t vp9_return_max_sub_pixel_mv(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
COMMON_MV_TEST;
(void)minr;
@@ -2304,7 +2806,7 @@ uint32_t vp9_return_min_sub_pixel_mv(
int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop,
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h) {
+ int h, int use_accurate_subpel_search) {
COMMON_MV_TEST;
(void)maxr;
diff --git a/libvpx/vp9/encoder/vp9_mcomp.h b/libvpx/vp9/encoder/vp9_mcomp.h
index b8db2c353..ab69afdcd 100644
--- a/libvpx/vp9/encoder/vp9_mcomp.h
+++ b/libvpx/vp9/encoder/vp9_mcomp.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_MCOMP_H_
-#define VP9_ENCODER_VP9_MCOMP_H_
+#ifndef VPX_VP9_ENCODER_VP9_MCOMP_H_
+#define VPX_VP9_ENCODER_VP9_MCOMP_H_
#include "vp9/encoder/vp9_block.h"
#include "vpx_dsp/variance.h"
@@ -59,14 +59,15 @@ struct SPEED_FEATURES;
int vp9_init_search_range(int size);
int vp9_refining_search_sad(const struct macroblock *x, struct mv *ref_mv,
- int sad_per_bit, int distance,
+ int error_per_bit, int search_range,
const struct vp9_variance_vtable *fn_ptr,
const struct mv *center_mv);
// Perform integral projection based motion estimation.
unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi,
MACROBLOCK *x, BLOCK_SIZE bsize,
- int mi_row, int mi_col);
+ int mi_row, int mi_col,
+ const MV *ref_mv);
typedef uint32_t(fractional_mv_step_fp)(
const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp,
@@ -74,7 +75,7 @@ typedef uint32_t(fractional_mv_step_fp)(
int forced_stop, // 0 - full, 1 - qtr only, 2 - half only
int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w,
- int h);
+ int h, int use_accurate_subpel_search);
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree;
extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned;
@@ -106,6 +107,9 @@ int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit,
struct VP9_COMP;
+// "mvp_full" is the MV search starting point;
+// "ref_mv" is the context reference MV;
+// "tmp_mv" is the searched best MV.
int vp9_full_pixel_search(struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
MV *mvp_full, int step_param, int search_method,
int error_per_bit, int *cost_list, const MV *ref_mv,
@@ -115,8 +119,38 @@ void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits,
const MvLimits *umv_window_limits,
const MV *ref_mv);
+#if CONFIG_NON_GREEDY_MV
+#define NB_MVS_NUM 4
+struct TplDepStats;
+double vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv,
+ double *best_mv_dist, double *best_mv_cost,
+ double lambda, int search_range,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, int full_mv_num);
+
+double vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x,
+ MV *mvp_full, int step_param, double lambda,
+ int do_refine,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const int_mv *nb_full_mvs, int full_mv_num,
+ MV *best_mv, double *best_mv_dist,
+ double *best_mv_cost);
+
+double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs, int mv_num);
+static INLINE MV get_full_mv(const MV *mv) {
+ MV out_mv;
+ out_mv.row = mv->row >> 3;
+ out_mv.col = mv->col >> 3;
+ return out_mv;
+}
+
+struct TplDepFrame;
+void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,
+ int mi_col, int rf_idx, BLOCK_SIZE bsize,
+ int_mv *nb_full_mvs);
+#endif // CONFIG_NON_GREEDY_MV
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_MCOMP_H_
+#endif // VPX_VP9_ENCODER_VP9_MCOMP_H_
diff --git a/libvpx/vp9/encoder/vp9_multi_thread.c b/libvpx/vp9/encoder/vp9_multi_thread.c
index da06fb151..c66c03549 100644
--- a/libvpx/vp9/encoder/vp9_multi_thread.c
+++ b/libvpx/vp9/encoder/vp9_multi_thread.c
@@ -13,6 +13,7 @@
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_ethread.h"
#include "vp9/encoder/vp9_multi_thread.h"
+#include "vp9/encoder/vp9_temporal_filter.h"
void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt,
int tile_id) {
@@ -50,6 +51,20 @@ void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt,
return job_info;
}
+void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi,
+ TileDataEnc *const this_tile) {
+ VP9_COMMON *const cm = &cpi->common;
+ const int sb_rows =
+ (mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1;
+ int i;
+
+ this_tile->row_base_thresh_freq_fact =
+ (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES,
+ sizeof(*(this_tile->row_base_thresh_freq_fact)));
+ for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++)
+ this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT;
+}
+
void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
struct VP9Common *cm = &cpi->common;
MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
@@ -59,6 +74,8 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
int jobs_per_tile_col, total_jobs;
+ // Allocate memory that is large enough for all row_mt stages. First pass
+ // uses 16x16 block size.
jobs_per_tile_col = VPXMAX(cm->mb_rows, sb_rows);
// Calculate the total number of jobs
total_jobs = jobs_per_tile_col * tile_cols;
@@ -83,14 +100,11 @@ void vp9_row_mt_mem_alloc(VP9_COMP *cpi) {
TileDataEnc *this_tile = &cpi->tile_data[tile_col];
vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col);
if (cpi->sf.adaptive_rd_thresh_row_mt) {
- const int sb_rows =
- (mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1;
- int i;
- this_tile->row_base_thresh_freq_fact =
- (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES,
- sizeof(*(this_tile->row_base_thresh_freq_fact)));
- for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++)
- this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT;
+ if (this_tile->row_base_thresh_freq_fact != NULL) {
+ vpx_free(this_tile->row_base_thresh_freq_fact);
+ this_tile->row_base_thresh_freq_fact = NULL;
+ }
+ vp9_row_mt_alloc_rd_thresh(cpi, this_tile);
}
}
@@ -146,11 +160,9 @@ void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) {
TileDataEnc *this_tile =
&cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols +
tile_col];
- if (cpi->sf.adaptive_rd_thresh_row_mt) {
- if (this_tile->row_base_thresh_freq_fact != NULL) {
- vpx_free(this_tile->row_base_thresh_freq_fact);
- this_tile->row_base_thresh_freq_fact = NULL;
- }
+ if (this_tile->row_base_thresh_freq_fact != NULL) {
+ vpx_free(this_tile->row_base_thresh_freq_fact);
+ this_tile->row_base_thresh_freq_fact = NULL;
}
}
}
@@ -219,11 +231,19 @@ void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type) {
MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt;
JobQueue *job_queue = multi_thread_ctxt->job_queue;
const int tile_cols = 1 << cm->log2_tile_cols;
- int job_row_num, jobs_per_tile, jobs_per_tile_col, total_jobs;
+ int job_row_num, jobs_per_tile, jobs_per_tile_col = 0, total_jobs;
const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
int tile_col, i;
- jobs_per_tile_col = (job_type != ENCODE_JOB) ? cm->mb_rows : sb_rows;
+ switch (job_type) {
+ case ENCODE_JOB: jobs_per_tile_col = sb_rows; break;
+ case FIRST_PASS_JOB: jobs_per_tile_col = cm->mb_rows; break;
+ case ARNR_JOB:
+ jobs_per_tile_col = ((cm->mi_rows + TF_ROUND) >> TF_SHIFT);
+ break;
+ default: assert(0);
+ }
+
total_jobs = jobs_per_tile_col * tile_cols;
multi_thread_ctxt->jobs_per_tile_col = jobs_per_tile_col;
diff --git a/libvpx/vp9/encoder/vp9_multi_thread.h b/libvpx/vp9/encoder/vp9_multi_thread.h
index bfc0c0ae4..a2276f4fe 100644
--- a/libvpx/vp9/encoder/vp9_multi_thread.h
+++ b/libvpx/vp9/encoder/vp9_multi_thread.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_MULTI_THREAD_H
-#define VP9_ENCODER_VP9_MULTI_THREAD_H
+#ifndef VPX_VP9_ENCODER_VP9_MULTI_THREAD_H_
+#define VPX_VP9_ENCODER_VP9_MULTI_THREAD_H_
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_job_queue.h"
@@ -29,10 +29,13 @@ void vp9_multi_thread_tile_init(VP9_COMP *cpi);
void vp9_row_mt_mem_alloc(VP9_COMP *cpi);
+void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi,
+ TileDataEnc *const this_tile);
+
void vp9_row_mt_mem_dealloc(VP9_COMP *cpi);
int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt,
int *tile_completion_status, int *cur_tile_id,
int tile_cols);
-#endif // VP9_ENCODER_VP9_MULTI_THREAD_H
+#endif // VPX_VP9_ENCODER_VP9_MULTI_THREAD_H_
diff --git a/libvpx/vp9/encoder/vp9_noise_estimate.c b/libvpx/vp9/encoder/vp9_noise_estimate.c
index 276a0c785..fc189dbb1 100644
--- a/libvpx/vp9/encoder/vp9_noise_estimate.c
+++ b/libvpx/vp9/encoder/vp9_noise_estimate.c
@@ -148,7 +148,9 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
ne->last_h = cm->height;
}
return;
- } else if (cm->current_video_frame > 60 &&
+ } else if (frame_counter > 60 && cpi->svc.num_encoded_top_layer > 1 &&
+ cpi->rc.frames_since_key > cpi->svc.number_spatial_layers &&
+ cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 &&
cpi->rc.avg_frame_low_motion < (low_res ? 70 : 50)) {
// Force noise estimation to 0 and denoiser off if content has high motion.
ne->level = kLowLow;
@@ -157,7 +159,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) &&
cpi->svc.current_superframe > 1) {
- vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
+ vp9_denoiser_set_noise_level(cpi, ne->level);
copy_frame(&cpi->denoiser.last_source, cpi->Source);
}
#endif
@@ -258,7 +260,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
// Normalize.
avg_est = avg_est / num_samples;
// Update noise estimate.
- ne->value = (int)((15 * ne->value + avg_est) >> 4);
+ ne->value = (int)((3 * ne->value + avg_est) >> 2);
ne->count++;
if (ne->count == ne->num_frames_estimate) {
// Reset counter and check noise level condition.
@@ -267,7 +269,7 @@ void vp9_update_noise_estimate(VP9_COMP *const cpi) {
ne->level = vp9_noise_estimate_extract_level(ne);
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
- vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
+ vp9_denoiser_set_noise_level(cpi, ne->level);
#endif
}
}
diff --git a/libvpx/vp9/encoder/vp9_noise_estimate.h b/libvpx/vp9/encoder/vp9_noise_estimate.h
index 335cdbe64..574b7c337 100644
--- a/libvpx/vp9/encoder/vp9_noise_estimate.h
+++ b/libvpx/vp9/encoder/vp9_noise_estimate.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_NOISE_ESTIMATE_H_
-#define VP9_ENCODER_NOISE_ESTIMATE_H_
+#ifndef VPX_VP9_ENCODER_VP9_NOISE_ESTIMATE_H_
+#define VPX_VP9_ENCODER_VP9_NOISE_ESTIMATE_H_
#include "vp9/encoder/vp9_block.h"
#include "vp9/encoder/vp9_skin_detection.h"
@@ -48,4 +48,4 @@ void vp9_update_noise_estimate(struct VP9_COMP *const cpi);
} // extern "C"
#endif
-#endif // VP9_ENCODER_NOISE_ESTIMATE_H_
+#endif // VPX_VP9_ENCODER_VP9_NOISE_ESTIMATE_H_
diff --git a/libvpx/vp9/encoder/vp9_partition_models.h b/libvpx/vp9/encoder/vp9_partition_models.h
new file mode 100644
index 000000000..904d21400
--- /dev/null
+++ b/libvpx/vp9/encoder/vp9_partition_models.h
@@ -0,0 +1,1172 @@
+/*
+ * Copyright (c) 2018 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP9_ENCODER_VP9_PARTITION_MODELS_H_
+#define VPX_VP9_ENCODER_VP9_PARTITION_MODELS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NN_MAX_HIDDEN_LAYERS 10
+#define NN_MAX_NODES_PER_LAYER 128
+
+// Neural net model config. It defines the layout of a neural net model, such as
+// the number of inputs/outputs, number of layers, the number of nodes in each
+// layer, as well as the weights and bias of each node.
+typedef struct {
+ int num_inputs; // Number of input nodes, i.e. features.
+ int num_outputs; // Number of output nodes.
+ int num_hidden_layers; // Number of hidden layers, maximum 10.
+ // Number of nodes for each hidden layer.
+ int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS];
+ // Weight parameters, indexed by layer.
+ const float *weights[NN_MAX_HIDDEN_LAYERS + 1];
+ // Bias parameters, indexed by layer.
+ const float *bias[NN_MAX_HIDDEN_LAYERS + 1];
+} NN_CONFIG;
+
+// Partition search breakout model.
+#define FEATURES 4
+#define Q_CTX 3
+#define RESOLUTION_CTX 2
+static const float
+ vp9_partition_breakout_weights_64[RESOLUTION_CTX][Q_CTX][FEATURES + 1] = {
+ {
+ {
+ -0.016673f,
+ -0.001025f,
+ -0.000032f,
+ 0.000833f,
+ 1.94261885f - 2.1f,
+ },
+ {
+ -0.160867f,
+ -0.002101f,
+ 0.000011f,
+ 0.002448f,
+ 1.65738142f - 2.5f,
+ },
+ {
+ -0.628934f,
+ -0.011459f,
+ -0.000009f,
+ 0.013833f,
+ 1.47982645f - 1.6f,
+ },
+ },
+ {
+ {
+ -0.064309f,
+ -0.006121f,
+ 0.000232f,
+ 0.005778f,
+ 0.7989465f - 5.0f,
+ },
+ {
+ -0.314957f,
+ -0.009346f,
+ -0.000225f,
+ 0.010072f,
+ 2.80695581f - 5.5f,
+ },
+ {
+ -0.635535f,
+ -0.015135f,
+ 0.000091f,
+ 0.015247f,
+ 2.90381241f - 5.0f,
+ },
+ },
+ };
+
+static const float
+ vp9_partition_breakout_weights_32[RESOLUTION_CTX][Q_CTX][FEATURES + 1] = {
+ {
+ {
+ -0.010554f,
+ -0.003081f,
+ -0.000134f,
+ 0.004491f,
+ 1.68445992f - 3.5f,
+ },
+ {
+ -0.051489f,
+ -0.007609f,
+ 0.000016f,
+ 0.009792f,
+ 1.28089404f - 2.5f,
+ },
+ {
+ -0.163097f,
+ -0.013081f,
+ 0.000022f,
+ 0.019006f,
+ 1.36129403f - 3.2f,
+ },
+ },
+ {
+ {
+ -0.024629f,
+ -0.006492f,
+ -0.000254f,
+ 0.004895f,
+ 1.27919173f - 4.5f,
+ },
+ {
+ -0.083936f,
+ -0.009827f,
+ -0.000200f,
+ 0.010399f,
+ 2.73731065f - 4.5f,
+ },
+ {
+ -0.279052f,
+ -0.013334f,
+ 0.000289f,
+ 0.023203f,
+ 2.43595719f - 3.5f,
+ },
+ },
+ };
+
+static const float
+ vp9_partition_breakout_weights_16[RESOLUTION_CTX][Q_CTX][FEATURES + 1] = {
+ {
+ {
+ -0.013154f,
+ -0.002404f,
+ -0.000977f,
+ 0.008450f,
+ 2.57404566f - 5.5f,
+ },
+ {
+ -0.019146f,
+ -0.004018f,
+ 0.000064f,
+ 0.008187f,
+ 2.15043926f - 2.5f,
+ },
+ {
+ -0.075755f,
+ -0.010858f,
+ 0.000030f,
+ 0.024505f,
+ 2.06848121f - 2.5f,
+ },
+ },
+ {
+ {
+ -0.007636f,
+ -0.002751f,
+ -0.000682f,
+ 0.005968f,
+ 0.19225763f - 4.5f,
+ },
+ {
+ -0.047306f,
+ -0.009113f,
+ -0.000518f,
+ 0.016007f,
+ 2.61068869f - 4.0f,
+ },
+ {
+ -0.069336f,
+ -0.010448f,
+ -0.001120f,
+ 0.023083f,
+ 1.47591054f - 5.5f,
+ },
+ },
+ };
+
+static const float vp9_partition_breakout_weights_8[RESOLUTION_CTX][Q_CTX]
+ [FEATURES + 1] = {
+ {
+ {
+ -0.011807f,
+ -0.009873f,
+ -0.000931f,
+ 0.034768f,
+ 1.32254851f - 2.0f,
+ },
+ {
+ -0.003861f,
+ -0.002701f,
+ 0.000100f,
+ 0.013876f,
+ 1.96755111f - 1.5f,
+ },
+ {
+ -0.013522f,
+ -0.008677f,
+ -0.000562f,
+ 0.034468f,
+ 1.53440356f - 1.5f,
+ },
+ },
+ {
+ {
+ -0.003221f,
+ -0.002125f,
+ 0.000993f,
+ 0.012768f,
+ 0.03541421f - 2.0f,
+ },
+ {
+ -0.006069f,
+ -0.007335f,
+ 0.000229f,
+ 0.026104f,
+ 0.17135315f - 1.5f,
+ },
+ {
+ -0.039894f,
+ -0.011419f,
+ 0.000070f,
+ 0.061817f,
+ 0.6739977f - 1.5f,
+ },
+ },
+ };
+#undef FEATURES
+#undef Q_CTX
+#undef RESOLUTION_CTX
+
+// Rectangular partition search pruning model.
+#define FEATURES 17
+#define LABELS 4
+static const float vp9_rect_part_nn_weights_16_layer0[FEATURES * 32] = {
+ 1.262885f, -0.533345f, -0.161280f, 0.106098f, 0.194799f, 0.003600f,
+ 0.394783f, -0.053954f, 0.264474f, -0.016651f, 0.376765f, 0.221471f,
+ 0.489799f, 0.054924f, 0.018292f, 0.037633f, -0.053430f, 1.092426f,
+ 0.205791f, -0.055661f, -0.227335f, 0.301274f, -0.169917f, 0.100426f,
+ 0.254388f, 0.103465f, 0.189560f, 0.116479f, 1.647195f, -0.667044f,
+ 0.067795f, -0.044580f, 0.019428f, 0.072938f, -0.797569f, -0.077539f,
+ -0.225636f, 0.262883f, -1.048009f, 0.210118f, -0.416156f, -0.143741f,
+ -0.296985f, 0.205918f, -0.517383f, -0.118527f, -0.396606f, -0.113128f,
+ -0.279468f, 0.096141f, -0.342051f, -0.337036f, 0.143222f, -0.860280f,
+ 0.137169f, 0.339767f, -0.336076f, 0.071988f, 0.251557f, -0.004068f,
+ 0.170734f, 0.237283f, -0.332443f, 0.073643f, 0.375357f, 0.220407f,
+ 0.150708f, -0.176979f, 0.265786f, -0.105878f, -0.337465f, -0.000491f,
+ 0.234308f, -0.098973f, 0.129038f, -0.205936f, -0.034793f, -0.106981f,
+ 0.009974f, 0.037861f, -0.282874f, -0.354414f, 0.023021f, -0.266749f,
+ -0.041762f, -0.721725f, 0.182262f, -0.273945f, 0.123722f, -0.036749f,
+ -0.788645f, -0.081560f, -0.472226f, 0.004654f, -0.756766f, -0.132186f,
+ 1.085412f, -0.221324f, -0.072577f, -0.172834f, -0.104831f, -1.391641f,
+ -0.345893f, 0.194442f, -0.306583f, -0.041813f, -0.267635f, -0.218568f,
+ -0.178452f, 0.044421f, -0.128042f, -0.094797f, -0.253724f, 0.273931f,
+ 0.144843f, -0.401416f, -0.014354f, -0.348929f, 0.123550f, 0.494504f,
+ -0.007050f, -0.143830f, 0.111292f, 0.211057f, -1.579988f, 0.117744f,
+ -1.732487f, 0.009320f, -1.162696f, 0.176687f, -0.705609f, 0.524827f,
+ 0.089822f, 0.082976f, -0.023681f, 0.006120f, -0.907175f, -0.026273f,
+ 0.019027f, 0.027170f, -0.462563f, -0.535335f, 0.202231f, 0.709803f,
+ -0.112251f, -1.213869f, 0.225714f, 0.323785f, -0.518254f, -0.014235f,
+ -0.070790f, -0.369589f, 0.373399f, 0.002738f, 0.175113f, 0.084529f,
+ -0.101586f, -0.018978f, 0.773392f, -0.673230f, -0.549279f, 0.790196f,
+ 0.658609f, -0.826831f, -0.514211f, 0.575341f, -0.711311f, 0.276289f,
+ -0.435715f, 0.392986f, -0.079298f, -0.318719f, 0.188429f, -0.114366f,
+ 0.172527f, -0.261721f, -0.216761f, 0.163822f, -0.189374f, -0.391901f,
+ 0.142013f, -0.135046f, 0.144419f, 0.053887f, 0.074673f, -0.290791f,
+ -0.039560f, -0.103830f, -0.330263f, -0.042091f, 0.050646f, -0.057466f,
+ -0.069064f, -0.412864f, 0.071097f, 0.126693f, 0.175397f, -0.168485f,
+ 0.018129f, -0.419188f, -0.272024f, -0.436859f, -0.425711f, -0.024382f,
+ 0.248042f, -0.169090f, -0.346878f, -0.070926f, 0.292278f, -0.197610f,
+ -0.218286f, 0.290846f, 0.297843f, 0.247394f, -0.160736f, 0.110314f,
+ 0.276000f, -0.301676f, -0.232816f, -0.127576f, -0.174457f, -0.124503f,
+ 0.264880f, -0.332379f, 0.012659f, -0.197333f, 0.604700f, 0.801582f,
+ 0.758702f, 0.691880f, 0.440917f, 0.773548f, 0.064242f, 1.147508f,
+ -0.127543f, -0.189628f, -0.122994f, -0.226776f, -0.053531f, -0.187548f,
+ 0.226554f, -0.273451f, 0.011751f, 0.009133f, 0.185091f, 0.003031f,
+ 0.000525f, 0.221829f, 0.331550f, -0.202558f, -0.286550f, 0.100683f,
+ 0.268818f, 0.179971f, -0.050016f, 0.579665f, 0.015911f, 0.033068f,
+ 0.077768f, -0.017757f, -1.411251f, 0.051519f, -1.745767f, 0.011258f,
+ -1.947372f, 0.111396f, -1.112755f, -0.008989f, -0.006211f, -0.002098f,
+ -0.015236f, -0.095697f, -0.095820f, 0.044622f, -0.112096f, 0.060000f,
+ 0.138957f, -0.462708f, 0.590790f, -0.021405f, -0.283744f, -1.141749f,
+ 0.213121f, -0.332311f, -0.314090f, -0.789311f, 0.157605f, -0.438019f,
+ 0.642189f, -0.340764f, -0.996025f, 0.109871f, 0.106128f, -0.010505f,
+ -0.117233f, -0.223194f, 0.344105f, -0.308754f, 0.386020f, -0.305270f,
+ -0.538281f, -0.270720f, -0.101688f, 0.207580f, 0.237153f, -0.055730f,
+ 0.842779f, 0.393543f, 0.007886f, -0.318167f, 0.603768f, 0.388241f,
+ 0.421536f, 0.632080f, 0.423965f, 0.371472f, 0.456827f, 0.488134f,
+ 0.358997f, 0.032621f, -0.017104f, 0.032198f, 0.113266f, -0.312277f,
+ 0.178189f, 0.234180f, 0.134271f, -0.414889f, 0.774141f, -0.225043f,
+ 0.614052f, -0.279921f, 1.329141f, -0.140827f, 0.797267f, -0.171361f,
+ 0.066205f, 0.339976f, 0.015223f, 0.193725f, -0.245067f, -0.035578f,
+ -0.084043f, 0.086756f, 0.029478f, -0.845370f, 0.388613f, -1.215236f,
+ 0.304573f, -0.439884f, -0.293969f, -0.107988f, -0.267837f, -0.695339f,
+ -0.702099f, 0.359047f, 0.511730f, 1.429516f, 0.216959f, -0.313828f,
+ 0.068062f, -0.124917f, -0.648327f, -0.308411f, -0.378467f, -0.429288f,
+ -0.032415f, -0.357005f, 0.170068f, 0.161167f, -0.250280f, -0.320468f,
+ -0.408987f, -0.201496f, -0.155996f, 0.021067f, 0.141083f, -0.202733f,
+ -0.130953f, -0.278148f, -0.042051f, 0.070576f, 0.009982f, -0.044326f,
+ -0.346851f, -0.255397f, -0.346456f, 0.281781f, 0.001618f, 0.120648f,
+ 0.297140f, 0.198343f, 0.186104f, 0.183548f, -0.344482f, 0.182258f,
+ 0.291003f, -0.330228f, -0.048174f, 0.133694f, 0.264582f, 0.229671f,
+ -0.167251f, -0.316040f, 0.191829f, 0.153417f, -0.345158f, -0.212790f,
+ -0.878872f, -0.313099f, -0.028368f, 0.065869f, -0.695388f, 1.102812f,
+ -0.605539f, 0.400680f, -0.350120f, -0.432965f, 0.034553f, -0.693476f,
+ -0.045708f, 0.492409f, -0.043825f, -0.430522f, 0.071159f, -0.317376f,
+ -1.164842f, 0.112394f, 0.034137f, -0.611882f, 0.251020f, -0.245113f,
+ 0.286093f, -0.187883f, 0.340263f, -0.211592f, -0.065706f, -0.332148f,
+ 0.104026f, -0.003206f, 0.036397f, 0.206499f, 0.161962f, 0.037663f,
+ -0.313039f, -0.199837f, 0.117952f, -0.182145f, -0.343724f, 0.017625f,
+ 0.033427f, -0.288075f, -0.101873f, -0.083378f, 0.147870f, 0.049598f,
+ -0.241824f, 0.070494f, 0.140942f, -0.013795f, 0.020023f, -0.192213f,
+ -0.320505f, -0.193072f, 0.147260f, 0.311352f, 0.053486f, 0.183716f,
+ 0.142535f, 0.294333f, -0.054853f, 0.293314f, -0.025398f, 0.190815f,
+ -0.137574f, -0.191864f, -0.190950f, -0.205988f, -0.199046f, -0.017582f,
+ -0.149347f, 0.131040f, 0.006854f, -0.350732f, 0.113301f, -0.194371f,
+ -0.296885f, -0.249199f, -0.193946f, 0.116150f, -0.310411f, -0.325851f,
+ -0.053275f, -0.063419f, 0.204170f, -0.091940f, -0.146229f, 0.298173f,
+ 0.053349f, -0.368540f, 0.235629f, -0.317825f, -0.107304f, -0.114618f,
+ 0.058709f, -0.272070f, 0.076224f, 0.110668f, -0.193282f, -0.135440f,
+ -0.267950f, -0.102285f, 0.102699f, -0.159082f, 0.262721f, -0.263227f,
+ 0.094509f, -0.113405f, 0.069888f, -0.169665f, 0.070800f, 0.035432f,
+ 0.054243f, 0.264229f, 0.117416f, 0.091568f, -0.022069f, -0.069214f,
+ 0.124543f, 0.070413f, -0.039343f, 0.082823f, -0.838348f, 0.153727f,
+ -0.000947f, 0.270348f, -1.404952f, -0.159680f, -0.234320f, 0.061023f,
+ 0.271660f, -0.541834f, 0.570828f, -0.277254f,
+};
+
+static const float vp9_rect_part_nn_bias_16_layer0[32] = {
+ 0.045740f, 0.292685f, -0.754007f, -0.150412f, -0.006171f, 0.005915f,
+ 0.000167f, 0.322797f, -0.381793f, 0.349786f, 0.003878f, -0.307203f,
+ 0.000000f, 0.029122f, 0.000000f, 0.625494f, 0.302105f, -0.362807f,
+ -0.034002f, -0.573278f, 0.240021f, 0.083965f, 0.000000f, -0.018979f,
+ -0.147739f, -0.036990f, 0.000000f, 0.000000f, -0.026790f, -0.000036f,
+ -0.073448f, 0.398328f,
+};
+
+static const float vp9_rect_part_nn_weights_16_layer1[32 * LABELS] = {
+ 0.095090f, 0.831754f, 0.484433f, 0.472945f, 0.086165f, -0.442388f,
+ 0.176263f, -0.760247f, 0.419932f, -0.131377f, 0.075814f, 0.089844f,
+ -0.294718f, 0.299808f, -0.318435f, -0.623205f, -0.346703f, 0.494356f,
+ 0.949221f, 0.524653f, 0.044095f, 0.428540f, 0.402571f, -0.216920f,
+ 0.423915f, 1.023334f, -0.366449f, 0.395057f, 0.057576f, 0.094019f,
+ 0.247685f, -0.007200f, -0.420023f, -0.728965f, -0.063040f, -0.071321f,
+ 0.209298f, 0.486625f, -0.244375f, 0.263219f, -0.250463f, -0.260301f,
+ 0.068579f, 0.177644f, -0.155311f, -0.027606f, -0.101614f, 0.553046f,
+ -0.462729f, -0.237568f, -0.589316f, 0.045182f, 0.551759f, -0.196872f,
+ 0.183040f, 0.054341f, 0.252784f, -0.536486f, -0.024425f, 0.154942f,
+ -0.086636f, 0.360416f, 0.214773f, -0.170876f, -0.363522f, -0.464099f,
+ 0.145494f, -0.099329f, 0.343718f, 0.286427f, 0.085540f, -0.105182f,
+ 0.155543f, 0.290939f, -0.067069f, 0.228399f, 0.178247f, 0.113031f,
+ -0.067336f, 0.441062f, 0.132364f, -0.263403f, -0.263925f, -0.083613f,
+ -0.268577f, -0.204442f, 0.052526f, 0.334787f, -0.064285f, -0.197875f,
+ 0.296405f, 0.396440f, 0.033231f, 0.229087f, 0.118289f, 0.490894f,
+ -0.527582f, -0.897206f, -0.325708f, -0.433018f, -0.053989f, 0.223814f,
+ -0.352319f, 0.772440f, -0.108648f, -0.082859f, -0.342718f, 0.033022f,
+ -0.309199f, -0.560337f, 0.208476f, 0.520309f, -0.241035f, -0.560391f,
+ -1.268968f, -0.267567f, 0.129461f, -0.385547f, 0.080142f, 0.065785f,
+ -0.159324f, -0.580704f, -0.315150f, -0.224900f, -0.110807f, -0.230163f,
+ 0.307266f, 0.153446f,
+};
+
+static const float vp9_rect_part_nn_bias_16_layer1[LABELS] = {
+ -0.455437f,
+ 0.255310f,
+ 0.452974f,
+ -0.278733f,
+};
+
+static const NN_CONFIG vp9_rect_part_nnconfig_16 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 32,
+ }, // num_hidden_nodes
+ {
+ vp9_rect_part_nn_weights_16_layer0,
+ vp9_rect_part_nn_weights_16_layer1,
+ },
+ {
+ vp9_rect_part_nn_bias_16_layer0,
+ vp9_rect_part_nn_bias_16_layer1,
+ },
+};
+
+static const float vp9_rect_part_nn_weights_32_layer0[FEATURES * 32] = {
+ 0.735110f, -0.238477f, 0.101978f, 0.311671f, -0.123833f, 1.596506f,
+ -0.341982f, -0.480170f, -0.247587f, 0.613159f, -0.279899f, -0.740856f,
+ 0.499051f, 0.039041f, 0.056763f, 0.258874f, 0.470812f, -0.121635f,
+ -0.318852f, -0.098677f, -0.214714f, -0.159974f, -0.305400f, -0.344477f,
+ -0.260653f, -0.007737f, -0.053016f, -0.158079f, 0.151911f, -0.057685f,
+ -0.230948f, -0.165940f, -0.127591f, -0.192084f, 1.890390f, -0.315123f,
+ -0.714531f, -0.015355f, 0.186437f, 0.305504f, 0.035343f, -0.556783f,
+ 0.239364f, -0.297789f, 0.202735f, -0.707576f, 0.710250f, 0.223346f,
+ -0.291511f, 0.235778f, 0.455338f, -0.059402f, 0.084530f, -0.115117f,
+ -0.103696f, -0.192821f, 0.114579f, -0.223487f, 0.306864f, 0.021887f,
+ -0.028040f, 0.087866f, 0.038870f, -0.081742f, -0.056052f, -0.130837f,
+ 0.201058f, 0.293391f, 1.880344f, 0.339162f, 0.040928f, -0.503942f,
+ 0.476333f, 0.259272f, 0.629416f, 0.869369f, 0.622841f, 1.012843f,
+ 0.715795f, 1.958844f, -1.697462f, 0.071334f, 0.074189f, 0.014585f,
+ -0.002536f, 0.021900f, 0.151883f, 0.169501f, -0.333018f, -0.247512f,
+ -0.418575f, -0.473960f, -0.004501f, -0.280939f, -0.162188f, -0.355632f,
+ 0.136654f, -0.100967f, -0.350435f, -0.135386f, 0.037237f, 0.136982f,
+ -0.084157f, -0.073248f, 0.021792f, 0.077429f, -0.083042f, -3.169569f,
+ 0.016261f, -3.351328f, 0.021120f, -3.572247f, 0.023870f, -4.312754f,
+ 0.040973f, -0.038328f, -0.015052f, 0.017702f, 0.101427f, 0.115458f,
+ -0.304792f, 0.021826f, -0.157998f, 0.341022f, -0.013465f, 0.105076f,
+ -0.261465f, 0.318730f, 0.065701f, 0.314879f, -0.064785f, 0.282824f,
+ 0.100542f, 0.057260f, -0.003756f, -0.026214f, -0.264641f, 0.275545f,
+ -0.049201f, -0.283015f, -0.057363f, 0.183570f, 0.243161f, -0.255764f,
+ 0.099747f, -0.156157f, -0.262494f, 0.231521f, -0.262617f, -0.186096f,
+ 0.171720f, 0.018983f, -0.145545f, 0.197662f, -0.001502f, -0.267526f,
+ 0.001960f, 0.003260f, 0.045237f, -0.377174f, -0.042499f, -0.015278f,
+ -0.196779f, -0.262797f, -0.318427f, -0.126092f, -0.339723f, 0.205288f,
+ -0.544284f, -0.507896f, -0.316622f, -0.090312f, -0.250917f, -0.337263f,
+ -0.220199f, -0.296591f, -0.116816f, 0.052381f, 0.145681f, 0.016521f,
+ -0.093549f, -0.097822f, 0.023140f, -0.010346f, 0.036181f, 0.145826f,
+ -0.139123f, -0.462638f, -0.007315f, 0.156533f, -0.102787f, 0.143586f,
+ -0.092094f, -0.144220f, -0.168994f, -0.045833f, 0.021628f, -0.421794f,
+ -0.055857f, 0.217931f, -0.061937f, -0.028768f, -0.078250f, -0.426939f,
+ -0.223118f, -0.230080f, -0.194988f, -0.197673f, -0.020918f, 0.139945f,
+ 0.186951f, -0.071317f, -0.084007f, -0.138597f, 0.101950f, 0.093870f,
+ 0.153226f, 0.017799f, -0.088539f, -0.037796f, 0.340412f, 0.183305f,
+ 0.391880f, -1.127417f, 0.132762f, -0.228565f, 0.399035f, 0.017483f,
+ -0.041619f, 0.017849f, 0.092340f, 0.054204f, 0.681185f, 0.421034f,
+ 0.112520f, -0.040618f, -0.040148f, -0.360647f, 0.053555f, 0.192854f,
+ 0.076968f, -0.179224f, -0.081617f, -0.287661f, -0.191072f, -0.310227f,
+ -0.332226f, -0.039786f, -0.247795f, -0.232201f, -0.333533f, -0.077995f,
+ -0.471732f, 0.051829f, 0.090488f, 0.142465f, -0.120490f, -0.286151f,
+ -0.049117f, -0.251082f, 0.211884f, -0.223366f, 0.063565f, 0.229938f,
+ -0.059348f, -0.029573f, -0.064303f, -0.156148f, 0.086958f, -0.297613f,
+ -0.125107f, 0.062718f, 0.339137f, -0.218896f, -0.057290f, -0.236670f,
+ -0.143783f, -0.119429f, 0.242320f, -0.323464f, -0.178377f, 0.238275f,
+ -0.025042f, 0.074798f, 0.111329f, -0.299773f, -0.151748f, -0.261607f,
+ 0.215626f, 0.202243f, -0.121896f, -0.024283f, -0.293854f, -0.018232f,
+ -0.012629f, -0.199297f, -0.060595f, 0.432339f, -0.158735f, -0.028380f,
+ 0.326639f, 0.222546f, -0.218135f, -0.495955f, -0.015055f, -0.104206f,
+ -0.268823f, 0.116765f, 0.041769f, -0.187095f, 0.225090f, 0.198195f,
+ 0.001502f, -0.219212f, -0.244779f, -0.017690f, -0.033197f, -0.339813f,
+ -0.325453f, 0.002499f, -0.066113f, 0.043235f, 0.324275f, -0.630642f,
+ -1.440551f, 0.174527f, 0.124619f, -1.187345f, 1.372693f, -0.278393f,
+ -0.058673f, -0.286338f, 1.708757f, -0.325094f, -0.543172f, -0.229411f,
+ 0.169927f, 0.175064f, 0.198321f, 0.117351f, 0.220882f, 0.138078f,
+ -0.158000f, -0.286708f, 0.096046f, -0.321788f, 0.206949f, -0.014473f,
+ -0.321234f, 0.100033f, -0.108266f, 0.166824f, 0.032904f, -0.065760f,
+ -0.303896f, 0.180342f, -0.301145f, -0.352554f, 0.149089f, 0.013277f,
+ 0.256019f, -0.109770f, 1.832588f, -0.132568f, 1.527658f, -0.164252f,
+ -0.857880f, -0.242694f, -0.553797f, 0.334023f, -0.332759f, -0.166203f,
+ -0.223175f, 0.007953f, -0.175865f, -0.134590f, -0.023858f, -0.011983f,
+ 0.054403f, -0.147054f, -0.176901f, -0.166893f, -0.292662f, -0.010569f,
+ -0.041744f, -0.060398f, -0.237584f, 0.154246f, -0.083270f, -0.314016f,
+ -0.374736f, 0.100063f, 0.048401f, -0.061952f, -0.178816f, 0.157243f,
+ 0.221991f, -0.065035f, 0.098517f, -0.190704f, -0.210613f, -0.274884f,
+ -0.341442f, -0.205281f, 0.073644f, 0.130667f, 0.149194f, -0.018172f,
+ 1.796154f, -1.017806f, -0.169655f, 0.104239f, 0.344313f, 0.643042f,
+ 0.730177f, 0.270776f, 0.581631f, -1.090649f, 0.707472f, 1.411035f,
+ 0.268739f, 0.178860f, -0.062251f, -0.118611f, -0.215759f, 0.023485f,
+ -0.105320f, 0.036396f, -0.059604f, 0.090024f, 0.095224f, -0.053497f,
+ -0.084040f, 0.055836f, 0.111678f, 0.014886f, -0.178380f, 0.079662f,
+ -0.123580f, 0.057379f, -0.409844f, -0.305386f, -0.987808f, -0.291094f,
+ 0.063966f, 0.263709f, -0.337221f, 0.720093f, 0.105030f, 0.848950f,
+ 0.071835f, 0.228972f, 0.057705f, -2.154561f, -0.201303f, -0.058856f,
+ -0.020081f, 0.029375f, 0.234837f, -0.001063f, 0.042527f, 0.014567f,
+ -0.299420f, -0.289117f, 0.275219f, 0.263596f, -0.186026f, -0.111364f,
+ -0.118393f, -0.318778f, 0.010710f, -0.286836f, -0.070330f, -0.049497f,
+ 0.093162f, -0.298085f, 0.204761f, -0.206633f, -0.009057f, -0.235372f,
+ 0.185300f, -0.271814f, 0.281732f, 0.268149f, -0.018967f, 0.162748f,
+ -0.086694f, -0.063839f, -0.097473f, -0.280120f, 0.324688f, 0.157911f,
+ -0.064794f, -0.266017f, -0.305608f, -0.196854f, -0.185767f, 0.199455f,
+ 0.102264f, 0.070866f, 0.172045f, 0.266433f, -0.176167f, 0.251657f,
+ -0.239220f, 0.229667f, 0.156115f, -0.221345f, 0.270720f, 0.109367f,
+ 0.230352f, -0.384561f, -0.026329f, 0.005928f, -0.087685f, -0.097995f,
+ -0.153864f, 0.117211f, -0.226492f, -0.379832f, -0.201714f, 0.049707f,
+ -0.292120f, 0.114074f, -0.085307f, -0.485356f, -0.347405f, 0.089361f,
+ -0.419273f, -0.320764f, -0.107254f, -0.274615f, -0.292991f, 0.095602f,
+ -0.078789f, 0.138927f, 0.270813f, 0.205814f, 0.065003f, 0.169171f,
+ 0.056142f, -0.005792f, 0.059483f, 0.060149f,
+};
+
+static const float vp9_rect_part_nn_bias_32_layer0[32] = {
+ -1.749808f, 0.000000f, 0.239736f, -0.000424f, 0.431792f, -0.150833f,
+ 2.866760f, 0.000000f, 0.000000f, -0.281434f, 0.000000f, -0.150086f,
+ 0.000000f, -0.008346f, -0.204104f, -0.006581f, 0.000000f, -0.197006f,
+ 0.000000f, -0.735287f, -0.028345f, -1.180116f, -0.106524f, 0.000000f,
+ 0.075879f, -0.150966f, -2.438914f, 0.000000f, -0.011775f, -0.024204f,
+ -0.138235f, -0.123763f,
+};
+
+static const float vp9_rect_part_nn_weights_32_layer1[32 * LABELS] = {
+ 0.622235f, 0.264894f, -0.424216f, 0.103989f, 1.401192f, -0.063838f,
+ -5.216846f, 0.329234f, -0.293113f, 0.457519f, -0.271899f, 0.043771f,
+ -0.203823f, 0.573535f, -0.192703f, 0.054939f, 0.163019f, 0.124803f,
+ 0.160664f, 0.385406f, -0.091403f, 0.320204f, 0.101181f, -0.157792f,
+ -0.095555f, -0.255011f, 1.326614f, -0.138076f, -0.082434f, -0.342442f,
+ 0.184067f, -0.076395f, 0.050263f, 0.251065f, 0.291743f, 0.197838f,
+ -0.950922f, 0.280202f, 2.904905f, -0.219434f, 0.284386f, 0.375005f,
+ 0.193817f, -0.298663f, -0.255364f, -0.297545f, 0.030518f, -0.023892f,
+ -0.396120f, -0.253027f, 0.237235f, -0.550249f, -0.076817f, -0.201374f,
+ 0.292708f, 0.341936f, -0.532215f, 0.180634f, -0.943291f, -0.217179f,
+ 0.251611f, -0.306310f, 0.229054f, -0.350337f, -0.192707f, 0.146781f,
+ 0.409007f, 0.279088f, -0.307357f, 0.199059f, 2.780962f, 0.163723f,
+ -0.226445f, 0.242830f, 0.220356f, -0.057621f, 0.196677f, -0.179975f,
+ -0.314636f, 0.218271f, -0.278653f, -0.226286f, 0.034275f, -0.320149f,
+ 0.154779f, 0.074937f, -0.015650f, -0.281735f, -0.495227f, -0.075036f,
+ -0.871024f, -0.350643f, 0.343468f, 0.095665f, 0.447121f, -0.059040f,
+ 0.244757f, 0.223122f, 0.272544f, 0.129678f, -1.700183f, 0.254869f,
+ 2.528983f, 0.217362f, 0.327765f, -0.129369f, -0.003560f, -0.532537f,
+ 0.080216f, -0.739488f, -0.299813f, 0.185421f, 0.265994f, 0.152268f,
+ -0.401829f, -0.901380f, 0.347747f, -0.524845f, -0.201163f, 0.063585f,
+ -0.517479f, -0.077816f, -0.735739f, -0.161411f, -0.113607f, -0.306188f,
+ 0.190817f, -0.362567f,
+};
+
+static const float vp9_rect_part_nn_bias_32_layer1[LABELS] = {
+ -0.833530f,
+ 0.860502f,
+ 0.708645f,
+ -1.083700f,
+};
+
+static const NN_CONFIG vp9_rect_part_nnconfig_32 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 32,
+ }, // num_hidden_nodes
+ {
+ vp9_rect_part_nn_weights_32_layer0,
+ vp9_rect_part_nn_weights_32_layer1,
+ },
+ {
+ vp9_rect_part_nn_bias_32_layer0,
+ vp9_rect_part_nn_bias_32_layer1,
+ },
+};
+
+static const float vp9_rect_part_nn_weights_64_layer0[FEATURES * 32] = {
+ 0.029424f, -0.295893f, -0.313259f, -0.090484f, -0.104946f, 0.121361f,
+ 0.137971f, -0.137984f, -0.328158f, -0.137280f, -0.276995f, -0.153118f,
+ 0.187893f, 0.105787f, -0.236591f, -0.114325f, -0.000708f, 1.936191f,
+ 0.048491f, -0.026048f, -0.206916f, 0.830237f, -0.152354f, 0.074191f,
+ -0.153813f, 0.148942f, -0.103457f, 0.028252f, 1.758264f, -2.123016f,
+ 0.120182f, 0.049954f, 0.110450f, -0.199360f, 0.642198f, 0.040225f,
+ -0.140886f, 0.091833f, -0.122788f, 1.172115f, -0.833333f, -0.505218f,
+ 0.736050f, -0.109958f, -0.839030f, -0.399916f, 1.029718f, 0.408977f,
+ -0.836882f, 0.389683f, -1.134413f, -1.529672f, -0.146351f, 0.089298f,
+ 0.083772f, -0.697869f, 1.683311f, -0.882446f, 0.494428f, -0.122128f,
+ 0.659819f, -0.057178f, -0.915390f, -0.192412f, 0.046613f, 0.010697f,
+ 0.040782f, 0.110807f, -0.225332f, -0.327730f, -0.114825f, 0.063511f,
+ 0.050503f, 0.023602f, 0.006524f, -0.274547f, -0.607145f, -0.143812f,
+ -0.327689f, -0.333072f, -0.017138f, -0.183992f, -0.200622f, -0.262463f,
+ -0.132799f, -0.018155f, -0.534214f, -0.385994f, 0.116278f, -0.752879f,
+ -0.090734f, -0.249152f, 0.071716f, 0.029603f, -0.382456f, -0.122894f,
+ 1.349552f, -0.885192f, 0.257903f, -0.265945f, -0.045579f, 0.112247f,
+ -0.122810f, -0.258285f, -0.145427f, -0.127442f, 0.072778f, 0.072549f,
+ 0.182149f, 0.239403f, 0.167205f, -0.291616f, -0.281237f, 0.335735f,
+ 0.208511f, -0.239628f, -0.022236f, -0.177370f, 0.207808f, 0.023535f,
+ 0.137455f, 0.016406f, -0.138685f, 0.188732f, 0.205513f, 0.209787f,
+ 0.060592f, 0.239954f, -0.128341f, -0.291585f, 0.022141f, -0.311201f,
+ -0.010199f, -0.314224f, -0.351915f, -0.079775f, -0.260028f, -0.015953f,
+ 0.007404f, 0.051589f, 0.019771f, -2.337926f, 0.024596f, -2.512399f,
+ -0.023138f, -2.421380f, 0.016515f, -3.269775f, 0.026844f, -0.053660f,
+ -0.013213f, -0.029248f, 0.114357f, 0.259100f, -0.141749f, -0.106802f,
+ -0.117323f, -0.294698f, -0.316012f, -0.328013f, 0.016459f, 0.136175f,
+ 0.223327f, 0.322312f, -0.297297f, 0.118286f, -0.317197f, -0.116692f,
+ 0.262236f, -0.032443f, -0.392128f, -0.199989f, -0.383621f, 0.008347f,
+ -0.079302f, -0.005529f, 0.049261f, 0.145948f, -0.263592f, -0.317109f,
+ 0.260015f, -0.499341f, -0.171764f, -0.017815f, 0.149186f, 0.178294f,
+ -0.492198f, 0.016956f, 0.008067f, -0.057734f, -0.189979f, -0.131489f,
+ -0.163303f, 0.121378f, -0.172272f, 0.125891f, 0.120654f, 0.071314f,
+ 0.117423f, -0.242167f, 0.047170f, 0.234302f, -0.355370f, -0.336112f,
+ -0.255471f, -0.267792f, -0.135367f, -0.284411f, 0.254592f, 0.098749f,
+ 0.224989f, 0.258450f, -0.306878f, 0.153551f, -0.175806f, -0.244459f,
+ -0.274922f, 0.254346f, 0.110309f, 0.036054f, 0.095133f, -0.589646f,
+ 0.080543f, 0.154155f, 0.133797f, -0.401518f, 0.798127f, 0.066742f,
+ 1.449216f, 0.282498f, 1.210638f, -0.280643f, 0.572386f, -0.308133f,
+ -0.053143f, 0.008437f, 0.269565f, 0.347616f, 0.087180f, -0.771104f,
+ 0.200800f, 0.157578f, 0.474128f, -0.971488f, 0.193451f, 0.340339f,
+ -0.123425f, 0.560754f, -0.139621f, -0.281721f, -0.100162f, 0.250926f,
+ 0.281100f, 0.197680f, 0.138629f, 1.045823f, 0.339047f, 0.036698f,
+ -0.159210f, 0.727869f, -1.371850f, 0.116241f, -2.180194f, 0.214055f,
+ -0.213691f, 0.447957f, -1.129966f, 0.543598f, 0.147599f, 0.060034f,
+ -0.049415f, -0.095858f, 0.290599f, 0.059512f, 0.198343f, -0.211903f,
+ 0.158736f, -0.090220f, -0.221992f, 0.198320f, 0.028632f, -0.408238f,
+ -0.368266f, -0.218740f, -0.379023f, -0.173573f, -0.035179f, 0.240176f,
+ 0.237714f, -0.417132f, -0.184989f, 0.046818f, -0.016965f, -0.524012f,
+ -0.094848f, -0.225678f, 0.021766f, -0.028366f, 0.072343f, -0.039980f,
+ 0.023334f, -0.392397f, 0.164450f, -0.201650f, -0.519754f, -0.023352f,
+ -4.559466f, -0.115996f, 0.135844f, 0.152599f, -0.111570f, 1.870310f,
+ 0.003522f, 1.893098f, -0.134055f, 1.850787f, 0.085160f, -2.203354f,
+ 0.380799f, -0.074047f, 0.023760f, 0.077310f, 0.273381f, -1.163135f,
+ -0.024976f, 0.093252f, 0.011445f, -0.129009f, -2.200677f, -0.013703f,
+ -1.964109f, -0.027246f, -2.135679f, 0.049465f, -3.879032f, 0.195114f,
+ -0.018085f, 0.016755f, 0.036330f, 0.169138f, 0.003548f, -0.028565f,
+ -0.178196f, -0.020577f, -0.104330f, -0.270961f, -0.282822f, -0.228735f,
+ -0.292561f, 0.271648f, 0.129171f, 0.376168f, -0.265005f, -0.093002f,
+ -0.185514f, 0.025598f, 0.055265f, -0.212784f, -0.249005f, 0.051507f,
+ -0.267868f, 0.162227f, -0.237365f, 0.267479f, -0.051543f, -0.288800f,
+ -0.246119f, 0.216296f, 0.226888f, -0.123005f, 0.068040f, -0.096630f,
+ -0.100500f, 0.161640f, -0.349187f, -0.061229f, 0.042915f, 0.024949f,
+ -0.083086f, -0.407249f, -0.428306f, -0.381137f, -0.508822f, 0.354796f,
+ -0.612346f, -0.230076f, -0.734103f, -0.550571f, -0.318788f, -0.300091f,
+ -0.336045f, -0.494406f, -0.206900f, 0.079942f, 0.149065f, -0.533360f,
+ 0.940431f, -0.078860f, 1.418633f, -0.117527f, 1.349170f, 0.242658f,
+ 0.559328f, 0.258770f, -0.014508f, -0.204775f, -0.292631f, 0.498345f,
+ -0.274918f, 0.051670f, 0.157748f, -0.179721f, -0.183330f, -0.393550f,
+ -0.208848f, 0.060742f, -0.159654f, 0.047757f, -0.400256f, -0.084606f,
+ -0.080619f, -0.359664f, -0.078305f, -0.455653f, 0.227624f, -0.385606f,
+ -0.060326f, -0.209831f, -0.077008f, 0.148862f, 0.209908f, 0.047655f,
+ -0.342292f, -0.088375f, -0.115465f, 0.082700f, 0.036465f, -0.001792f,
+ -0.285730f, 0.114632f, 0.239254f, -0.348543f, 0.044916f, -0.299003f,
+ -0.244756f, -0.180802f, 0.314253f, -0.127788f, -0.221512f, 0.034787f,
+ -0.208388f, 0.349156f, 0.265975f, -0.068335f, 0.261372f, 0.146705f,
+ -0.098729f, 0.293699f, -0.111342f, 0.207402f, -0.038772f, 0.124135f,
+ -0.237450f, -0.191511f, -0.052240f, -0.237151f, 0.005013f, 0.139441f,
+ -0.153634f, -0.021596f, -0.036220f, -0.077873f, -0.085995f, -0.254555f,
+ -0.204382f, -0.082362f, 0.941796f, 0.253800f, -0.957468f, 0.095795f,
+ 0.122046f, -0.310364f, 0.087301f, 0.012704f, 0.193265f, -0.058303f,
+ 0.250452f, 0.835269f, 0.507383f, 0.109957f, -0.145028f, -0.114419f,
+ -0.225618f, 0.132387f, -0.063335f, -0.325776f, -0.346173f, -0.006653f,
+ -0.133534f, -0.085549f, -0.050177f, 0.173103f, 0.025421f, 0.105512f,
+ 0.258036f, 0.153116f, 0.290202f, -0.333699f, -0.072405f, -0.124069f,
+ -0.241933f, -0.313318f, 0.013623f, -0.237440f, -0.232228f, -0.170850f,
+ -0.039212f, 0.162468f, -0.330162f, -0.218462f, -0.287064f, -0.181673f,
+ -0.161059f, 0.024664f, -0.108642f, -0.231707f, 0.217994f, -1.128878f,
+ 0.093010f, 0.101513f, 0.055895f, -0.354538f, 0.844174f, 0.254335f,
+ 1.920298f, -0.230777f, 0.798144f, 0.206425f, 0.580655f, -0.177645f,
+ -0.412061f, 0.112629f, -0.476438f, 0.209436f,
+};
+
+static const float vp9_rect_part_nn_bias_64_layer0[32] = {
+ 0.000000f, 0.345406f, -0.499542f, -1.718246f, -0.147443f, -0.408843f,
+ -0.008997f, -0.107946f, 2.117510f, 0.000000f, -0.141830f, -0.049079f,
+ 0.000000f, -1.331136f, -1.417843f, -0.485054f, -0.100856f, -0.230750f,
+ -2.574372f, 2.310627f, -0.030363f, 0.000000f, -0.310119f, -1.314316f,
+ -0.108766f, -0.107918f, 0.000000f, 0.000000f, 0.093643f, 0.000000f,
+ 0.000000f, -0.902343f,
+};
+
+static const float vp9_rect_part_nn_weights_64_layer1[32 * LABELS] = {
+ 0.404567f, 1.168492f, 0.051714f, 0.827941f, 0.135334f, 0.456922f,
+ -0.370524f, 0.062865f, -3.076300f, -0.290613f, 0.280029f, -0.101778f,
+ 0.250216f, 0.347721f, 0.466400f, 0.030845f, 0.114570f, 0.089456f,
+ 1.519938f, -3.493788f, 0.264212f, -0.109125f, 0.306644f, 0.368206f,
+ -0.052168f, -0.229630f, -0.339932f, -0.080472f, 0.319845f, 0.143818f,
+ -0.172595f, 0.372777f, -0.082072f, -0.505781f, -0.288321f, -0.473028f,
+ -0.027567f, -0.034329f, -0.291965f, -0.063262f, 1.721741f, 0.118914f,
+ 0.183681f, 0.041611f, 0.266371f, 0.005896f, -0.484705f, 0.665535f,
+ -0.240945f, -0.017963f, -1.409440f, 2.031976f, 0.240327f, -0.116604f,
+ 0.273245f, -0.170570f, -0.085491f, -0.340315f, -0.209651f, -0.217460f,
+ -0.249373f, 0.009193f, 0.009467f, -0.272909f, 0.308472f, -0.551173f,
+ 0.168374f, -0.583229f, 0.140082f, -0.585715f, -0.010929f, 0.159779f,
+ 1.438104f, 0.293111f, -0.053339f, -0.101828f, -0.280573f, -0.211265f,
+ -0.323605f, -0.540908f, 0.101366f, -0.005288f, -1.517046f, 2.078767f,
+ 0.215597f, 0.144012f, 0.315888f, -0.251324f, 0.150482f, -0.137871f,
+ 0.235116f, -0.194202f, -0.153475f, -0.312384f, -0.375510f, 0.336488f,
+ -0.379837f, -1.004979f, -0.312587f, -0.406174f, 0.154290f, -0.539766f,
+ -0.230074f, 0.303564f, 0.719439f, -0.235108f, -0.204978f, 0.399229f,
+ 0.290222f, -0.278713f, -0.667069f, -0.420550f, 0.164893f, -0.459689f,
+ -1.035368f, 0.818909f, 0.275137f, -0.291006f, -0.061505f, 0.052737f,
+ -0.084871f, -0.348335f, 0.312544f, 0.120753f, -0.707222f, -0.010050f,
+ -0.137148f, -0.351765f,
+};
+
+static const float vp9_rect_part_nn_bias_64_layer1[LABELS] = {
+ -0.926768f,
+ 0.765832f,
+ 0.663683f,
+ -0.621865f,
+};
+
+static const NN_CONFIG vp9_rect_part_nnconfig_64 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 32,
+ }, // num_hidden_nodes
+ {
+ vp9_rect_part_nn_weights_64_layer0,
+ vp9_rect_part_nn_weights_64_layer1,
+ },
+ {
+ vp9_rect_part_nn_bias_64_layer0,
+ vp9_rect_part_nn_bias_64_layer1,
+ },
+};
+#undef FEATURES
+#undef LABELS
+
+#define FEATURES 7
+// Partition pruning model(neural nets).
+static const float vp9_partition_nn_weights_64x64_layer0[FEATURES * 8] = {
+ -3.571348f, 0.014835f, -3.255393f, -0.098090f, -0.013120f, 0.000221f,
+ 0.056273f, 0.190179f, -0.268130f, -1.828242f, -0.010655f, 0.937244f,
+ -0.435120f, 0.512125f, 1.610679f, 0.190816f, -0.799075f, -0.377348f,
+ -0.144232f, 0.614383f, -0.980388f, 1.754150f, -0.185603f, -0.061854f,
+ -0.807172f, 1.240177f, 1.419531f, -0.438544f, -5.980774f, 0.139045f,
+ -0.032359f, -0.068887f, -1.237918f, 0.115706f, 0.003164f, 2.924212f,
+ 1.246838f, -0.035833f, 0.810011f, -0.805894f, 0.010966f, 0.076463f,
+ -4.226380f, -2.437764f, -0.010619f, -0.020935f, -0.451494f, 0.300079f,
+ -0.168961f, -3.326450f, -2.731094f, 0.002518f, 0.018840f, -1.656815f,
+ 0.068039f, 0.010586f,
+};
+
+static const float vp9_partition_nn_bias_64x64_layer0[8] = {
+ -3.469882f, 0.683989f, 0.194010f, 0.313782f,
+ -3.153335f, 2.245849f, -1.946190f, -3.740020f,
+};
+
+static const float vp9_partition_nn_weights_64x64_layer1[8] = {
+ -8.058566f, 0.108306f, -0.280620f, -0.818823f,
+ -6.445117f, 0.865364f, -1.127127f, -8.808660f,
+};
+
+static const float vp9_partition_nn_bias_64x64_layer1[1] = {
+ 6.46909416f,
+};
+
+static const NN_CONFIG vp9_partition_nnconfig_64x64 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_partition_nn_weights_64x64_layer0,
+ vp9_partition_nn_weights_64x64_layer1,
+ },
+ {
+ vp9_partition_nn_bias_64x64_layer0,
+ vp9_partition_nn_bias_64x64_layer1,
+ },
+};
+
+static const float vp9_partition_nn_weights_32x32_layer0[FEATURES * 8] = {
+ -0.295437f, -4.002648f, -0.205399f, -0.060919f, 0.708037f, 0.027221f,
+ -0.039137f, -0.907724f, -3.151662f, 0.007106f, 0.018726f, -0.534928f,
+ 0.022744f, 0.000159f, -1.717189f, -3.229031f, -0.027311f, 0.269863f,
+ -0.400747f, -0.394366f, -0.108878f, 0.603027f, 0.455369f, -0.197170f,
+ 1.241746f, -1.347820f, -0.575636f, -0.462879f, -2.296426f, 0.196696f,
+ -0.138347f, -0.030754f, -0.200774f, 0.453795f, 0.055625f, -3.163116f,
+ -0.091003f, -0.027028f, -0.042984f, -0.605185f, 0.143240f, -0.036439f,
+ -0.801228f, 0.313409f, -0.159942f, 0.031267f, 0.886454f, -1.531644f,
+ -0.089655f, 0.037683f, -0.163441f, -0.130454f, -0.058344f, 0.060011f,
+ 0.275387f, 1.552226f,
+};
+
+static const float vp9_partition_nn_bias_32x32_layer0[8] = {
+ -0.838372f, -2.609089f, -0.055763f, 1.329485f,
+ -1.297638f, -2.636622f, -0.826909f, 1.012644f,
+};
+
+static const float vp9_partition_nn_weights_32x32_layer1[8] = {
+ -1.792632f, -7.322353f, -0.683386f, 0.676564f,
+ -1.488118f, -7.527719f, 1.240163f, 0.614309f,
+};
+
+static const float vp9_partition_nn_bias_32x32_layer1[1] = {
+ 4.97422546f,
+};
+
+static const NN_CONFIG vp9_partition_nnconfig_32x32 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_partition_nn_weights_32x32_layer0,
+ vp9_partition_nn_weights_32x32_layer1,
+ },
+ {
+ vp9_partition_nn_bias_32x32_layer0,
+ vp9_partition_nn_bias_32x32_layer1,
+ },
+};
+
+static const float vp9_partition_nn_weights_16x16_layer0[FEATURES * 8] = {
+ -1.717673f, -4.718130f, -0.125725f, -0.183427f, -0.511764f, 0.035328f,
+ 0.130891f, -3.096753f, 0.174968f, -0.188769f, -0.640796f, 1.305661f,
+ 1.700638f, -0.073806f, -4.006781f, -1.630999f, -0.064863f, -0.086410f,
+ -0.148617f, 0.172733f, -0.018619f, 2.152595f, 0.778405f, -0.156455f,
+ 0.612995f, -0.467878f, 0.152022f, -0.236183f, 0.339635f, -0.087119f,
+ -3.196610f, -1.080401f, -0.637704f, -0.059974f, 1.706298f, -0.793705f,
+ -6.399260f, 0.010624f, -0.064199f, -0.650621f, 0.338087f, -0.001531f,
+ 1.023655f, -3.700272f, -0.055281f, -0.386884f, 0.375504f, -0.898678f,
+ 0.281156f, -0.314611f, 0.863354f, -0.040582f, -0.145019f, 0.029329f,
+ -2.197880f, -0.108733f,
+};
+
+static const float vp9_partition_nn_bias_16x16_layer0[8] = {
+ 0.411516f, -2.143737f, -3.693192f, 2.123142f,
+ -1.356910f, -3.561016f, -0.765045f, -2.417082f,
+};
+
+static const float vp9_partition_nn_weights_16x16_layer1[8] = {
+ -0.619755f, -2.202391f, -4.337171f, 0.611319f,
+ 0.377677f, -4.998723f, -1.052235f, 1.949922f,
+};
+
+static const float vp9_partition_nn_bias_16x16_layer1[1] = {
+ 3.20981717f,
+};
+
+static const NN_CONFIG vp9_partition_nnconfig_16x16 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_partition_nn_weights_16x16_layer0,
+ vp9_partition_nn_weights_16x16_layer1,
+ },
+ {
+ vp9_partition_nn_bias_16x16_layer0,
+ vp9_partition_nn_bias_16x16_layer1,
+ },
+};
+#undef FEATURES
+
+#if CONFIG_ML_VAR_PARTITION
+#define FEATURES 6
+static const float vp9_var_part_nn_weights_64_layer0[FEATURES * 8] = {
+ -0.249572f, 0.205532f, -2.175608f, 1.094836f, -2.986370f, 0.193160f,
+ -0.143823f, 0.378511f, -1.997788f, -2.166866f, -1.930158f, -1.202127f,
+ -0.611875f, -0.506422f, -0.432487f, 0.071205f, 0.578172f, -0.154285f,
+ -0.051830f, 0.331681f, -1.457177f, -2.443546f, -2.000302f, -1.389283f,
+ 0.372084f, -0.464917f, 2.265235f, 2.385787f, 2.312722f, 2.127868f,
+ -0.403963f, -0.177860f, -0.436751f, -0.560539f, 0.254903f, 0.193976f,
+ -0.305611f, 0.256632f, 0.309388f, -0.437439f, 1.702640f, -5.007069f,
+ -0.323450f, 0.294227f, 1.267193f, 1.056601f, 0.387181f, -0.191215f,
+};
+
+static const float vp9_var_part_nn_bias_64_layer0[8] = {
+ -0.044396f, -0.938166f, 0.000000f, -0.916375f,
+ 1.242299f, 0.000000f, -0.405734f, 0.014206f,
+};
+
+static const float vp9_var_part_nn_weights_64_layer1[8] = {
+ 1.635945f, 0.979557f, 0.455315f, 1.197199f,
+ -2.251024f, -0.464953f, 1.378676f, -0.111927f,
+};
+
+static const float vp9_var_part_nn_bias_64_layer1[1] = {
+ -0.37972447f,
+};
+
+static const NN_CONFIG vp9_var_part_nnconfig_64 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_part_nn_weights_64_layer0,
+ vp9_var_part_nn_weights_64_layer1,
+ },
+ {
+ vp9_var_part_nn_bias_64_layer0,
+ vp9_var_part_nn_bias_64_layer1,
+ },
+};
+
+static const float vp9_var_part_nn_weights_32_layer0[FEATURES * 8] = {
+ 0.067243f, -0.083598f, -2.191159f, 2.726434f, -3.324013f, 3.477977f,
+ 0.323736f, -0.510199f, 2.960693f, 2.937661f, 2.888476f, 2.938315f,
+ -0.307602f, -0.503353f, -0.080725f, -0.473909f, -0.417162f, 0.457089f,
+ 0.665153f, -0.273210f, 0.028279f, 0.972220f, -0.445596f, 1.756611f,
+ -0.177892f, -0.091758f, 0.436661f, -0.521506f, 0.133786f, 0.266743f,
+ 0.637367f, -0.160084f, -1.396269f, 1.020841f, -1.112971f, 0.919496f,
+ -0.235883f, 0.651954f, 0.109061f, -0.429463f, 0.740839f, -0.962060f,
+ 0.299519f, -0.386298f, 1.550231f, 2.464915f, 1.311969f, 2.561612f,
+};
+
+static const float vp9_var_part_nn_bias_32_layer0[8] = {
+ 0.368242f, 0.736617f, 0.000000f, 0.757287f,
+ 0.000000f, 0.613248f, -0.776390f, 0.928497f,
+};
+
+static const float vp9_var_part_nn_weights_32_layer1[8] = {
+ 0.939884f, -2.420850f, -0.410489f, -0.186690f,
+ 0.063287f, -0.522011f, 0.484527f, -0.639625f,
+};
+
+static const float vp9_var_part_nn_bias_32_layer1[1] = {
+ -0.6455006f,
+};
+
+static const NN_CONFIG vp9_var_part_nnconfig_32 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_part_nn_weights_32_layer0,
+ vp9_var_part_nn_weights_32_layer1,
+ },
+ {
+ vp9_var_part_nn_bias_32_layer0,
+ vp9_var_part_nn_bias_32_layer1,
+ },
+};
+
+static const float vp9_var_part_nn_weights_16_layer0[FEATURES * 8] = {
+ 0.742567f, -0.580624f, -0.244528f, 0.331661f, -0.113949f, -0.559295f,
+ -0.386061f, 0.438653f, 1.467463f, 0.211589f, 0.513972f, 1.067855f,
+ -0.876679f, 0.088560f, -0.687483f, -0.380304f, -0.016412f, 0.146380f,
+ 0.015318f, 0.000351f, -2.764887f, 3.269717f, 2.752428f, -2.236754f,
+ 0.561539f, -0.852050f, -0.084667f, 0.202057f, 0.197049f, 0.364922f,
+ -0.463801f, 0.431790f, 1.872096f, -0.091887f, -0.055034f, 2.443492f,
+ -0.156958f, -0.189571f, -0.542424f, -0.589804f, -0.354422f, 0.401605f,
+ 0.642021f, -0.875117f, 2.040794f, 1.921070f, 1.792413f, 1.839727f,
+};
+
+static const float vp9_var_part_nn_bias_16_layer0[8] = {
+ 2.901234f, -1.940932f, -0.198970f, -0.406524f,
+ 0.059422f, -1.879207f, -0.232340f, 2.979821f,
+};
+
+static const float vp9_var_part_nn_weights_16_layer1[8] = {
+ -0.528731f, 0.375234f, -0.088422f, 0.668629f,
+ 0.870449f, 0.578735f, 0.546103f, -1.957207f,
+};
+
+static const float vp9_var_part_nn_bias_16_layer1[1] = {
+ -1.95769405f,
+};
+
+static const NN_CONFIG vp9_var_part_nnconfig_16 = {
+ FEATURES, // num_inputs
+ 1, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_part_nn_weights_16_layer0,
+ vp9_var_part_nn_weights_16_layer1,
+ },
+ {
+ vp9_var_part_nn_bias_16_layer0,
+ vp9_var_part_nn_bias_16_layer1,
+ },
+};
+#undef FEATURES
+#endif // CONFIG_ML_VAR_PARTITION
+
+#define FEATURES 6
+#define LABELS 1
+static const float vp9_var_rd_part_nn_weights_64_layer0[FEATURES * 8] = {
+ -0.100129f, 0.128867f, -1.375086f, -2.268096f, -1.470368f, -2.296274f,
+ 0.034445f, -0.062993f, -2.151904f, 0.523215f, 1.611269f, 1.530051f,
+ 0.418182f, -1.330239f, 0.828388f, 0.386546f, -0.026188f, -0.055459f,
+ -0.474437f, 0.861295f, -2.208743f, -0.652991f, -2.985873f, -1.728956f,
+ 0.388052f, -0.420720f, 2.015495f, 1.280342f, 3.040914f, 1.760749f,
+ -0.009062f, 0.009623f, 1.579270f, -2.012891f, 1.629662f, -1.796016f,
+ -0.279782f, -0.288359f, 1.875618f, 1.639855f, 0.903020f, 0.906438f,
+ 0.553394f, -1.621589f, 0.185063f, 0.605207f, -0.133560f, 0.588689f,
+};
+
+static const float vp9_var_rd_part_nn_bias_64_layer0[8] = {
+ 0.659717f, 0.120912f, 0.329894f, -1.586385f,
+ 1.715839f, 0.085754f, 2.038774f, 0.268119f,
+};
+
+static const float vp9_var_rd_part_nn_weights_64_layer1[8 * LABELS] = {
+ -3.445586f, 2.375620f, 1.236970f, 0.804030f,
+ -2.448384f, 2.827254f, 2.291478f, 0.790252f,
+};
+
+static const float vp9_var_rd_part_nn_bias_64_layer1[LABELS] = {
+ -1.16608453f,
+};
+
+static const NN_CONFIG vp9_var_rd_part_nnconfig_64 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_rd_part_nn_weights_64_layer0,
+ vp9_var_rd_part_nn_weights_64_layer1,
+ },
+ {
+ vp9_var_rd_part_nn_bias_64_layer0,
+ vp9_var_rd_part_nn_bias_64_layer1,
+ },
+};
+
+static const float vp9_var_rd_part_nn_weights_32_layer0[FEATURES * 8] = {
+ 0.022420f, -0.032201f, 1.228065f, -2.767655f, 1.928743f, 0.566863f,
+ 0.459229f, 0.422048f, 0.833395f, 0.822960f, -0.232227f, 0.586895f,
+ 0.442856f, -0.018564f, 0.227672f, -1.291306f, 0.119428f, -0.776563f,
+ -0.042947f, 0.183129f, 0.592231f, 1.174859f, -0.503868f, 0.270102f,
+ -0.330537f, -0.036340f, 1.144630f, 1.783710f, 1.216929f, 2.038085f,
+ 0.373782f, -0.430258f, 1.957002f, 1.383908f, 2.012261f, 1.585693f,
+ -0.394399f, -0.337523f, -0.238335f, 0.007819f, -0.368294f, 0.437875f,
+ -0.318923f, -0.242000f, 2.276263f, 1.501432f, 0.645706f, 0.344774f,
+};
+
+static const float vp9_var_rd_part_nn_bias_32_layer0[8] = {
+ -0.023846f, -1.348117f, 1.365007f, -1.644164f,
+ 0.062992f, 1.257980f, -0.098642f, 1.388472f,
+};
+
+static const float vp9_var_rd_part_nn_weights_32_layer1[8 * LABELS] = {
+ 3.016729f, 0.622684f, -1.021302f, 1.490383f,
+ 1.702046f, -2.964618f, 0.689045f, 1.711754f,
+};
+
+static const float vp9_var_rd_part_nn_bias_32_layer1[LABELS] = {
+ -1.28798676f,
+};
+
+static const NN_CONFIG vp9_var_rd_part_nnconfig_32 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_rd_part_nn_weights_32_layer0,
+ vp9_var_rd_part_nn_weights_32_layer1,
+ },
+ {
+ vp9_var_rd_part_nn_bias_32_layer0,
+ vp9_var_rd_part_nn_bias_32_layer1,
+ },
+};
+
+static const float vp9_var_rd_part_nn_weights_16_layer0[FEATURES * 8] = {
+ -0.726813f, -0.026748f, 1.376946f, 1.467961f, 1.961810f, 1.690412f,
+ 0.596484f, -0.261486f, -0.310905f, -0.366311f, -1.300086f, -0.534336f,
+ 0.040520f, -0.032391f, -1.194214f, 2.438063f, -3.915334f, 1.997270f,
+ 0.673696f, -0.676393f, 1.654886f, 1.553838f, 1.129691f, 1.360201f,
+ 0.255001f, 0.336442f, -0.487759f, -0.634555f, 0.479170f, -0.110475f,
+ -0.661852f, -0.158872f, -0.350243f, -0.303957f, -0.045018f, 0.586151f,
+ -0.262463f, 0.228079f, -1.688776f, -1.594502f, -2.261078f, -1.802535f,
+ 0.034748f, -0.028476f, 2.713258f, 0.212446f, -1.529202f, -2.560178f,
+};
+
+static const float vp9_var_rd_part_nn_bias_16_layer0[8] = {
+ 0.495983f, 1.858545f, 0.162974f, 1.992247f,
+ -2.698863f, 0.110020f, 0.550830f, 0.420941f,
+};
+
+static const float vp9_var_rd_part_nn_weights_16_layer1[8 * LABELS] = {
+ 1.768409f, -1.394240f, 1.076846f, -1.762808f,
+ 1.517405f, 0.535195f, -0.426827f, 1.002272f,
+};
+
+static const float vp9_var_rd_part_nn_bias_16_layer1[LABELS] = {
+ -1.65894794f,
+};
+
+static const NN_CONFIG vp9_var_rd_part_nnconfig_16 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_rd_part_nn_weights_16_layer0,
+ vp9_var_rd_part_nn_weights_16_layer1,
+ },
+ {
+ vp9_var_rd_part_nn_bias_16_layer0,
+ vp9_var_rd_part_nn_bias_16_layer1,
+ },
+};
+
+static const float vp9_var_rd_part_nn_weights_8_layer0[FEATURES * 8] = {
+ -0.804900f, -1.214983f, 0.840202f, 0.686566f, 0.155804f, 0.025542f,
+ -1.244635f, -0.368403f, 0.364150f, 1.081073f, 0.552387f, 0.452715f,
+ 0.652968f, -0.293058f, 0.048967f, 0.021240f, -0.662981f, 0.424700f,
+ 0.008293f, -0.013088f, 0.747007f, -1.453907f, -1.498226f, 1.593252f,
+ -0.239557f, -0.143766f, 0.064311f, 1.320998f, -0.477411f, 0.026374f,
+ 0.730884f, -0.675124f, 0.965521f, 0.863658f, 0.809186f, 0.812280f,
+ 0.513131f, 0.185102f, 0.211354f, 0.793666f, 0.121714f, -0.015383f,
+ -0.650980f, -0.046581f, 0.911141f, 0.806319f, 0.974773f, 0.815893f,
+};
+
+static const float vp9_var_rd_part_nn_bias_8_layer0[8] = {
+ 0.176134f, 0.651308f, 2.007761f, 0.068812f,
+ 1.061517f, 1.487161f, -2.308147f, 1.099828f,
+};
+
+static const float vp9_var_rd_part_nn_weights_8_layer1[8 * LABELS] = {
+ 0.683032f, 1.326393f, -1.661539f, 1.438920f,
+ 1.118023f, -2.237380f, 1.518468f, 2.010416f,
+};
+
+static const float vp9_var_rd_part_nn_bias_8_layer1[LABELS] = {
+ -1.65423989f,
+};
+
+static const NN_CONFIG vp9_var_rd_part_nnconfig_8 = {
+ FEATURES, // num_inputs
+ LABELS, // num_outputs
+ 1, // num_hidden_layers
+ {
+ 8,
+ }, // num_hidden_nodes
+ {
+ vp9_var_rd_part_nn_weights_8_layer0,
+ vp9_var_rd_part_nn_weights_8_layer1,
+ },
+ {
+ vp9_var_rd_part_nn_bias_8_layer0,
+ vp9_var_rd_part_nn_bias_8_layer1,
+ },
+};
+#undef FEATURES
+#undef LABELS
+
+// Partition pruning model(linear).
+static const float vp9_partition_feature_mean[24] = {
+ 303501.697372f, 3042630.372158f, 24.694696f, 1.392182f,
+ 689.413511f, 162.027012f, 1.478213f, 0.0,
+ 135382.260230f, 912738.513263f, 28.845217f, 1.515230f,
+ 544.158492f, 131.807995f, 1.436863f, 0.0f,
+ 43682.377587f, 208131.711766f, 28.084737f, 1.356677f,
+ 138.254122f, 119.522553f, 1.252322f, 0.0f,
+};
+
+static const float vp9_partition_feature_std[24] = {
+ 673689.212982f, 5996652.516628f, 0.024449f, 1.989792f,
+ 985.880847f, 0.014638f, 2.001898f, 0.0f,
+ 208798.775332f, 1812548.443284f, 0.018693f, 1.838009f,
+ 396.986910f, 0.015657f, 1.332541f, 0.0f,
+ 55888.847031f, 448587.962714f, 0.017900f, 1.904776f,
+ 98.652832f, 0.016598f, 1.320992f, 0.0f,
+};
+
+// Error tolerance: 0.01%-0.0.05%-0.1%
+static const float vp9_partition_linear_weights[24] = {
+ 0.111736f, 0.289977f, 0.042219f, 0.204765f, 0.120410f, -0.143863f,
+ 0.282376f, 0.847811f, 0.637161f, 0.131570f, 0.018636f, 0.202134f,
+ 0.112797f, 0.028162f, 0.182450f, 1.124367f, 0.386133f, 0.083700f,
+ 0.050028f, 0.150873f, 0.061119f, 0.109318f, 0.127255f, 0.625211f,
+};
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // VPX_VP9_ENCODER_VP9_PARTITION_MODELS_H_
diff --git a/libvpx/vp9/encoder/vp9_picklpf.c b/libvpx/vp9/encoder/vp9_picklpf.c
index 1c2c55b9e..f3c11700f 100644
--- a/libvpx/vp9/encoder/vp9_picklpf.c
+++ b/libvpx/vp9/encoder/vp9_picklpf.c
@@ -150,7 +150,7 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
VP9_COMMON *const cm = &cpi->common;
struct loopfilter *const lf = &cm->lf;
- lf->sharpness_level = cm->frame_type == KEY_FRAME ? 0 : cpi->oxcf.sharpness;
+ lf->sharpness_level = 0;
if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) {
lf->filter_level = 0;
@@ -169,14 +169,10 @@ void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi,
case VPX_BITS_10:
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20);
break;
- case VPX_BITS_12:
+ default:
+ assert(cm->bit_depth == VPX_BITS_12);
filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22);
break;
- default:
- assert(0 &&
- "bit_depth should be VPX_BITS_8, VPX_BITS_10 "
- "or VPX_BITS_12");
- return;
}
#else
int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18);
diff --git a/libvpx/vp9/encoder/vp9_picklpf.h b/libvpx/vp9/encoder/vp9_picklpf.h
index cecca058b..8881b44da 100644
--- a/libvpx/vp9/encoder/vp9_picklpf.h
+++ b/libvpx/vp9/encoder/vp9_picklpf.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_PICKLPF_H_
-#define VP9_ENCODER_VP9_PICKLPF_H_
+#ifndef VPX_VP9_ENCODER_VP9_PICKLPF_H_
+#define VPX_VP9_ENCODER_VP9_PICKLPF_H_
#ifdef __cplusplus
extern "C" {
@@ -26,4 +26,4 @@ void vp9_pick_filter_level(const struct yv12_buffer_config *sd,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_PICKLPF_H_
+#endif // VPX_VP9_ENCODER_VP9_PICKLPF_H_
diff --git a/libvpx/vp9/encoder/vp9_pickmode.c b/libvpx/vp9/encoder/vp9_pickmode.c
index f2f323a28..a3240513f 100644
--- a/libvpx/vp9/encoder/vp9_pickmode.c
+++ b/libvpx/vp9/encoder/vp9_pickmode.c
@@ -41,6 +41,17 @@ typedef struct {
int in_use;
} PRED_BUFFER;
+typedef struct {
+ PRED_BUFFER *best_pred;
+ PREDICTION_MODE best_mode;
+ TX_SIZE best_tx_size;
+ TX_SIZE best_intra_tx_size;
+ MV_REFERENCE_FRAME best_ref_frame;
+ MV_REFERENCE_FRAME best_second_ref_frame;
+ uint8_t best_mode_skip_txfm;
+ INTERP_FILTER best_pred_filter;
+} BEST_PICKMODE;
+
static const int pos_shift_16x16[4][4] = {
{ 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 }
};
@@ -222,13 +233,22 @@ static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x,
}
if (rv && search_subpel) {
- int subpel_force_stop = cpi->sf.mv.subpel_force_stop;
- if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = 2;
+ SUBPEL_FORCE_STOP subpel_force_stop = cpi->sf.mv.subpel_force_stop;
+ if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = HALF_PEL;
+ if (cpi->sf.mv.enable_adaptive_subpel_force_stop) {
+ const int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh;
+ if (abs(tmp_mv->as_mv.row) >= mv_thresh ||
+ abs(tmp_mv->as_mv.col) >= mv_thresh)
+ subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above;
+ else
+ subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below;
+ }
cpi->find_fractional_mv_step(
x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
+ cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0,
+ cpi->sf.use_accurate_subpel_search);
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
x->mvcost, MV_COST_WEIGHT);
}
@@ -326,6 +346,35 @@ static int ac_thr_factor(const int speed, const int width, const int height,
return 1;
}
+static TX_SIZE calculate_tx_size(VP9_COMP *const cpi, BLOCK_SIZE bsize,
+ MACROBLOCKD *const xd, unsigned int var,
+ unsigned int sse, int64_t ac_thr) {
+ TX_SIZE tx_size;
+ if (cpi->common.tx_mode == TX_MODE_SELECT) {
+ if (sse > (var << 2))
+ tx_size = VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ else
+ tx_size = TX_8X8;
+
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+ cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
+ tx_size = TX_8X8;
+ else if (tx_size > TX_16X16)
+ tx_size = TX_16X16;
+
+ // For screen-content force 4X4 tx_size over 8X8, for large variance.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && tx_size == TX_8X8 &&
+ bsize <= BLOCK_16X16 && var > (ac_thr << 6))
+ tx_size = TX_4X4;
+ } else {
+ tx_size = VPXMIN(max_txsize_lookup[bsize],
+ tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
+ }
+
+ return tx_size;
+}
+
static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd,
int *out_rate_sum, int64_t *out_dist_sum,
@@ -342,7 +391,7 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
struct macroblockd_plane *const pd = &xd->plane[0];
const uint32_t dc_quant = pd->dequant[0];
const uint32_t ac_quant = pd->dequant[1];
- const int64_t dc_thr = dc_quant * dc_quant >> 6;
+ int64_t dc_thr = dc_quant * dc_quant >> 6;
int64_t ac_thr = ac_quant * ac_quant >> 6;
unsigned int var;
int sum;
@@ -386,26 +435,16 @@ static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize,
cpi->common.height, abs(sum) >> (bw + bh));
#endif
- if (cpi->common.tx_mode == TX_MODE_SELECT) {
- if (sse > (var << 2))
- tx_size = VPXMIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
- else
- tx_size = TX_8X8;
-
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
- cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
- tx_size = TX_8X8;
- else if (tx_size > TX_16X16)
- tx_size = TX_16X16;
- } else {
- tx_size = VPXMIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
- }
-
- assert(tx_size >= TX_8X8);
+ tx_size = calculate_tx_size(cpi, bsize, xd, var, sse, ac_thr);
+ // The code below for setting skip flag assumes tranform size of at least 8x8,
+ // so force this lower limit on transform.
+ if (tx_size < TX_8X8) tx_size = TX_8X8;
xd->mi[0]->tx_size = tx_size;
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && x->zero_temp_sad_source &&
+ x->source_variance == 0)
+ dc_thr = dc_thr << 1;
+
// Evaluate if the partition block is a skippable block in Y plane.
{
unsigned int sse16x16[16] = { 0 };
@@ -563,24 +602,7 @@ static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x,
*var_y = var;
*sse_y = sse;
- if (cpi->common.tx_mode == TX_MODE_SELECT) {
- if (sse > (var << 2))
- xd->mi[0]->tx_size =
- VPXMIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
- else
- xd->mi[0]->tx_size = TX_8X8;
-
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
- cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id))
- xd->mi[0]->tx_size = TX_8X8;
- else if (xd->mi[0]->tx_size > TX_16X16)
- xd->mi[0]->tx_size = TX_16X16;
- } else {
- xd->mi[0]->tx_size =
- VPXMIN(max_txsize_lookup[bsize],
- tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
- }
+ xd->mi[0]->tx_size = calculate_tx_size(cpi, bsize, xd, var, sse, ac_thr);
// Evaluate if the partition block is a skippable block in Y plane.
{
@@ -726,13 +748,13 @@ static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
- case TX_4X4:
+ default:
+ assert(tx_size == TX_4X4);
x->fwd_txfm4x4(src_diff, coeff, diff_stride);
vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp,
qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan,
scan_order->iscan);
break;
- default: assert(0); break;
}
*skippable &= (*eob == 0);
eob_cost += 1;
@@ -876,6 +898,7 @@ static void encode_breakout_test(
// Skipping threshold for dc.
unsigned int thresh_dc;
int motion_low = 1;
+
if (cpi->use_svc && ref_frame == GOLDEN_FRAME) return;
if (mi->mv[0].as_mv.row > 64 || mi->mv[0].as_mv.row < -64 ||
mi->mv[0].as_mv.col > 64 || mi->mv[0].as_mv.col < -64)
@@ -1292,18 +1315,16 @@ static void vp9_pickmode_ctx_den_update(
VP9_PICKMODE_CTX_DEN *ctx_den, int64_t zero_last_cost_orig,
int ref_frame_cost[MAX_REF_FRAMES],
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int reuse_inter_pred,
- TX_SIZE best_tx_size, PREDICTION_MODE best_mode,
- MV_REFERENCE_FRAME best_ref_frame, INTERP_FILTER best_pred_filter,
- uint8_t best_mode_skip_txfm) {
+ BEST_PICKMODE *bp) {
ctx_den->zero_last_cost_orig = zero_last_cost_orig;
ctx_den->ref_frame_cost = ref_frame_cost;
ctx_den->frame_mv = frame_mv;
ctx_den->reuse_inter_pred = reuse_inter_pred;
- ctx_den->best_tx_size = best_tx_size;
- ctx_den->best_mode = best_mode;
- ctx_den->best_ref_frame = best_ref_frame;
- ctx_den->best_pred_filter = best_pred_filter;
- ctx_den->best_mode_skip_txfm = best_mode_skip_txfm;
+ ctx_den->best_tx_size = bp->best_tx_size;
+ ctx_den->best_mode = bp->best_mode;
+ ctx_den->best_ref_frame = bp->best_ref_frame;
+ ctx_den->best_pred_filter = bp->best_pred_filter;
+ ctx_den->best_mode_skip_txfm = bp->best_mode_skip_txfm;
}
static void recheck_zeromv_after_denoising(
@@ -1332,6 +1353,7 @@ static void recheck_zeromv_after_denoising(
mi->ref_frame[1] = NONE;
mi->mv[0].as_int = 0;
mi->interp_filter = EIGHTTAP;
+ if (cpi->sf.default_interp_filter == BILINEAR) mi->interp_filter = BILINEAR;
xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0];
vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y);
@@ -1416,27 +1438,200 @@ static INLINE int get_force_skip_low_temp_var(uint8_t *variance_low, int mi_row,
return force_skip_low_temp_var;
}
+static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc,
+ int mi_row, int mi_col, PRED_BUFFER *tmp,
+ BLOCK_SIZE bsize, int reuse_inter_pred,
+ PRED_BUFFER **this_mode_pred, unsigned int *var_y,
+ unsigned int *sse_y) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mi = xd->mi[0];
+ struct macroblockd_plane *const pd = &xd->plane[0];
+ const int bw = num_4x4_blocks_wide_lookup[bsize] << 2;
+
+ int pf_rate[3] = { 0 };
+ int64_t pf_dist[3] = { 0 };
+ int curr_rate[3] = { 0 };
+ unsigned int pf_var[3] = { 0 };
+ unsigned int pf_sse[3] = { 0 };
+ TX_SIZE pf_tx_size[3] = { 0 };
+ int64_t best_cost = INT64_MAX;
+ INTERP_FILTER best_filter = SWITCHABLE, filter;
+ PRED_BUFFER *current_pred = *this_mode_pred;
+ uint8_t skip_txfm = SKIP_TXFM_NONE;
+
+ for (filter = EIGHTTAP; filter <= EIGHTTAP_SMOOTH; ++filter) {
+ int64_t cost;
+ mi->interp_filter = filter;
+ vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
+ model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter],
+ &pf_var[filter], &pf_sse[filter]);
+ curr_rate[filter] = pf_rate[filter];
+ pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
+ cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
+ pf_tx_size[filter] = mi->tx_size;
+ if (cost < best_cost) {
+ best_filter = filter;
+ best_cost = cost;
+ skip_txfm = x->skip_txfm[0];
+
+ if (reuse_inter_pred) {
+ if (*this_mode_pred != current_pred) {
+ free_pred_buffer(*this_mode_pred);
+ *this_mode_pred = current_pred;
+ }
+ current_pred = &tmp[get_pred_buffer(tmp, 3)];
+ pd->dst.buf = current_pred->data;
+ pd->dst.stride = bw;
+ }
+ }
+ }
+
+ if (reuse_inter_pred && *this_mode_pred != current_pred)
+ free_pred_buffer(current_pred);
+
+ mi->interp_filter = best_filter;
+ mi->tx_size = pf_tx_size[best_filter];
+ this_rdc->rate = curr_rate[best_filter];
+ this_rdc->dist = pf_dist[best_filter];
+ *var_y = pf_var[best_filter];
+ *sse_y = pf_sse[best_filter];
+ x->skip_txfm[0] = skip_txfm;
+ if (reuse_inter_pred) {
+ pd->dst.buf = (*this_mode_pred)->data;
+ pd->dst.stride = (*this_mode_pred)->stride;
+ }
+}
+
+static int search_new_mv(VP9_COMP *cpi, MACROBLOCK *x,
+ int_mv frame_mv[][MAX_REF_FRAMES],
+ MV_REFERENCE_FRAME ref_frame, int gf_temporal_ref,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int best_pred_sad, int *rate_mv,
+ unsigned int best_sse_sofar, RD_COST *best_rdc) {
+ SVC *const svc = &cpi->svc;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MODE_INFO *const mi = xd->mi[0];
+ SPEED_FEATURES *const sf = &cpi->sf;
+
+ if (ref_frame > LAST_FRAME && gf_temporal_ref &&
+ cpi->oxcf.rc_mode == VPX_CBR) {
+ int tmp_sad;
+ uint32_t dis;
+ int cost_list[5] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+
+ if (bsize < BLOCK_16X16) return -1;
+
+ tmp_sad = vp9_int_pro_motion_estimation(
+ cpi, x, bsize, mi_row, mi_col,
+ &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv);
+
+ if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) return -1;
+ if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad) return -1;
+
+ frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int;
+ *rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
+ &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
+ x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
+ frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
+ frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
+
+ cpi->find_fractional_mv_step(
+ x, &frame_mv[NEWMV][ref_frame].as_mv,
+ &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
+ cpi->common.allow_high_precision_mv, x->errorperbit,
+ &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
+ cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0,
+ cpi->sf.use_accurate_subpel_search);
+ } else if (svc->use_base_mv && svc->spatial_layer_id) {
+ if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) {
+ const int pre_stride = xd->plane[0].pre[0].stride;
+ unsigned int base_mv_sse = UINT_MAX;
+ int scale = (cpi->rc.avg_frame_low_motion > 60) ? 2 : 4;
+ const uint8_t *const pre_buf =
+ xd->plane[0].pre[0].buf +
+ (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride +
+ (frame_mv[NEWMV][ref_frame].as_mv.col >> 3);
+ cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
+ pre_buf, pre_stride, &base_mv_sse);
+
+ // Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16,
+ // for SVC encoding.
+ if (cpi->use_svc && svc->use_base_mv && bsize < BLOCK_16X16 &&
+ frame_mv[NEWMV][ref_frame].as_mv.row == 0 &&
+ frame_mv[NEWMV][ref_frame].as_mv.col == 0)
+ return -1;
+
+ // Exit NEWMV search if base_mv_sse is large.
+ if (sf->base_mv_aggressive && base_mv_sse > (best_sse_sofar << scale))
+ return -1;
+ if (base_mv_sse < (best_sse_sofar << 1)) {
+ // Base layer mv is good.
+ // Exit NEWMV search if the base_mv is (0, 0) and sse is low, since
+ // (0, 0) mode is already tested.
+ unsigned int base_mv_sse_normalized =
+ base_mv_sse >>
+ (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+ if (sf->base_mv_aggressive && base_mv_sse <= best_sse_sofar &&
+ base_mv_sse_normalized < 400 &&
+ frame_mv[NEWMV][ref_frame].as_mv.row == 0 &&
+ frame_mv[NEWMV][ref_frame].as_mv.col == 0)
+ return -1;
+ if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame], rate_mv,
+ best_rdc->rdcost, 1)) {
+ return -1;
+ }
+ } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame], rate_mv,
+ best_rdc->rdcost, 0)) {
+ return -1;
+ }
+ } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame], rate_mv,
+ best_rdc->rdcost, 0)) {
+ return -1;
+ }
+ } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
+ &frame_mv[NEWMV][ref_frame], rate_mv,
+ best_rdc->rdcost, 0)) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static INLINE void init_best_pickmode(BEST_PICKMODE *bp) {
+ bp->best_mode = ZEROMV;
+ bp->best_ref_frame = LAST_FRAME;
+ bp->best_tx_size = TX_SIZES;
+ bp->best_intra_tx_size = TX_SIZES;
+ bp->best_pred_filter = EIGHTTAP;
+ bp->best_mode_skip_txfm = SKIP_TXFM_NONE;
+ bp->best_second_ref_frame = NONE;
+ bp->best_pred = NULL;
+}
+
void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int mi_row, int mi_col, RD_COST *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
VP9_COMMON *const cm = &cpi->common;
SPEED_FEATURES *const sf = &cpi->sf;
- const SVC *const svc = &cpi->svc;
+ SVC *const svc = &cpi->svc;
MACROBLOCKD *const xd = &x->e_mbd;
MODE_INFO *const mi = xd->mi[0];
struct macroblockd_plane *const pd = &xd->plane[0];
- PREDICTION_MODE best_mode = ZEROMV;
- MV_REFERENCE_FRAME ref_frame, best_ref_frame = LAST_FRAME;
+
+ BEST_PICKMODE best_pickmode;
+
+ MV_REFERENCE_FRAME ref_frame;
MV_REFERENCE_FRAME usable_ref_frame, second_ref_frame;
- TX_SIZE best_tx_size = TX_SIZES;
- INTERP_FILTER best_pred_filter = EIGHTTAP;
int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES];
uint8_t mode_checked[MB_MODE_COUNT][MAX_REF_FRAMES];
struct buf_2d yv12_mb[4][MAX_MB_PLANE];
static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
VP9_ALT_FLAG };
RD_COST this_rdc, best_rdc;
- uint8_t skip_txfm = SKIP_TXFM_NONE, best_mode_skip_txfm = SKIP_TXFM_NONE;
// var_y and sse_y are saved to be used in skipping checking
unsigned int var_y = UINT_MAX;
unsigned int sse_y = UINT_MAX;
@@ -1472,7 +1667,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64]);
#endif
struct buf_2d orig_dst = pd->dst;
- PRED_BUFFER *best_pred = NULL;
PRED_BUFFER *this_mode_pred = NULL;
const int pixels_in_block = bh * bw;
int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready;
@@ -1488,22 +1682,79 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int skip_ref_find_pred[4] = { 0 };
unsigned int sse_zeromv_normalized = UINT_MAX;
unsigned int best_sse_sofar = UINT_MAX;
- unsigned int thresh_svc_skip_golden = 500;
+ int gf_temporal_ref = 0;
#if CONFIG_VP9_TEMPORAL_DENOISING
VP9_PICKMODE_CTX_DEN ctx_den;
int64_t zero_last_cost_orig = INT64_MAX;
int denoise_svc_pickmode = 1;
#endif
INTERP_FILTER filter_gf_svc = EIGHTTAP;
- MV_REFERENCE_FRAME best_second_ref_frame = NONE;
+ MV_REFERENCE_FRAME inter_layer_ref = GOLDEN_FRAME;
+ const struct segmentation *const seg = &cm->seg;
int comp_modes = 0;
int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES;
int flag_svc_subpel = 0;
int svc_mv_col = 0;
int svc_mv_row = 0;
+ int no_scaling = 0;
+ unsigned int thresh_svc_skip_golden = 500;
+ int scene_change_detected =
+ cpi->rc.high_source_sad ||
+ (cpi->use_svc && cpi->svc.high_source_sad_superframe);
- init_ref_frame_cost(cm, xd, ref_frame_cost);
+ init_best_pickmode(&best_pickmode);
+
+ x->encode_breakout = seg->enabled
+ ? cpi->segment_encode_breakout[mi->segment_id]
+ : cpi->encode_breakout;
+
+ x->source_variance = UINT_MAX;
+ if (cpi->sf.default_interp_filter == BILINEAR) {
+ best_pickmode.best_pred_filter = BILINEAR;
+ filter_gf_svc = BILINEAR;
+ }
+ if (cpi->use_svc && svc->spatial_layer_id > 0) {
+ int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id - 1, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ if (lc->scaling_factor_num == lc->scaling_factor_den) no_scaling = 1;
+ }
+ if (svc->spatial_layer_id > 0 &&
+ (svc->high_source_sad_superframe || no_scaling))
+ thresh_svc_skip_golden = 0;
+ // Lower the skip threshold if lower spatial layer is better quality relative
+ // to current layer.
+ else if (svc->spatial_layer_id > 0 && cm->base_qindex > 150 &&
+ cm->base_qindex > svc->lower_layer_qindex + 15)
+ thresh_svc_skip_golden = 100;
+ // Increase skip threshold if lower spatial layer is lower quality relative
+ // to current layer.
+ else if (svc->spatial_layer_id > 0 && cm->base_qindex < 140 &&
+ cm->base_qindex < svc->lower_layer_qindex - 20)
+ thresh_svc_skip_golden = 1000;
+
+ if (!cpi->use_svc ||
+ (svc->use_gf_temporal_ref_current_layer &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame)) {
+ struct scale_factors *const sf_last = &cm->frame_refs[LAST_FRAME - 1].sf;
+ struct scale_factors *const sf_golden =
+ &cm->frame_refs[GOLDEN_FRAME - 1].sf;
+ gf_temporal_ref = 1;
+ // For temporal long term prediction, check that the golden reference
+ // is same scale as last reference, otherwise disable.
+ if ((sf_last->x_scale_fp != sf_golden->x_scale_fp) ||
+ (sf_last->y_scale_fp != sf_golden->y_scale_fp)) {
+ gf_temporal_ref = 0;
+ } else {
+ if (cpi->rc.avg_frame_low_motion > 70)
+ thresh_svc_skip_golden = 500;
+ else
+ thresh_svc_skip_golden = 0;
+ }
+ }
+ init_ref_frame_cost(cm, xd, ref_frame_cost);
memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES);
if (reuse_inter_pred) {
@@ -1532,12 +1783,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// filter_ref, we use a less strict condition on assigning filter_ref.
// This is to reduce the probabily of entering the flow of not assigning
// filter_ref and then skip filter search.
- if (xd->above_mi && is_inter_block(xd->above_mi))
- filter_ref = xd->above_mi->interp_filter;
- else if (xd->left_mi && is_inter_block(xd->left_mi))
- filter_ref = xd->left_mi->interp_filter;
- else
- filter_ref = cm->interp_filter;
+ filter_ref = cm->interp_filter;
+ if (cpi->sf.default_interp_filter != BILINEAR) {
+ if (xd->above_mi && is_inter_block(xd->above_mi))
+ filter_ref = xd->above_mi->interp_filter;
+ else if (xd->left_mi && is_inter_block(xd->left_mi))
+ filter_ref = xd->left_mi->interp_filter;
+ }
// initialize mode decisions
vp9_rd_cost_reset(&best_rdc);
@@ -1558,23 +1810,23 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
#endif // CONFIG_VP9_HIGHBITDEPTH
x->source_variance =
vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
+
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && mi->segment_id > 0 &&
+ x->zero_temp_sad_source && x->source_variance == 0) {
+ mi->segment_id = 0;
+ vp9_init_plane_quantizers(cpi, x);
+ }
}
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
- if (cpi->use_svc) {
- int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
- cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
- denoise_svc_pickmode = denoise_svc(cpi) && !lc->is_key_frame;
- }
+ if (cpi->use_svc) denoise_svc_pickmode = vp9_denoise_svc_non_key(cpi);
if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode)
vp9_denoiser_reset_frame_stats(ctx);
}
#endif
- if (cpi->rc.frames_since_golden == 0 && !cpi->use_svc &&
+ if (cpi->rc.frames_since_golden == 0 && gf_temporal_ref &&
!cpi->rc.alt_ref_gf_group && !cpi->rc.last_frame_is_src_altref) {
usable_ref_frame = LAST_FRAME;
} else {
@@ -1601,14 +1853,20 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// For svc mode, on spatial_layer_id > 0: if the reference has different scale
// constrain the inter mode to only test zero motion.
if (cpi->use_svc && svc->force_zero_mode_spatial_ref &&
- cpi->svc.spatial_layer_id > 0) {
+ svc->spatial_layer_id > 0 && !gf_temporal_ref) {
if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) {
struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf;
- if (vp9_is_scaled(sf)) svc_force_zero_mode[LAST_FRAME - 1] = 1;
+ if (vp9_is_scaled(sf)) {
+ svc_force_zero_mode[LAST_FRAME - 1] = 1;
+ inter_layer_ref = LAST_FRAME;
+ }
}
if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) {
struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf;
- if (vp9_is_scaled(sf)) svc_force_zero_mode[GOLDEN_FRAME - 1] = 1;
+ if (vp9_is_scaled(sf)) {
+ svc_force_zero_mode[GOLDEN_FRAME - 1] = 1;
+ inter_layer_ref = GOLDEN_FRAME;
+ }
}
}
@@ -1624,6 +1882,10 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
}
+ if (sf->disable_golden_ref && (x->content_state_sb != kVeryHighSad ||
+ cpi->rc.avg_frame_low_motion < 60))
+ usable_ref_frame = LAST_FRAME;
+
if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
!svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var))
use_golden_nonzeromv = 0;
@@ -1638,7 +1900,21 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
cpi->sf.use_compound_nonrd_pickmode && usable_ref_frame == ALTREF_FRAME)
comp_modes = 2;
+ // If the segment reference frame feature is enabled and it's set to GOLDEN
+ // reference, then make sure we don't skip checking GOLDEN, this is to
+ // prevent possibility of not picking any mode.
+ if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) == GOLDEN_FRAME) {
+ usable_ref_frame = GOLDEN_FRAME;
+ skip_ref_find_pred[GOLDEN_FRAME] = 0;
+ thresh_svc_skip_golden = 0;
+ }
+
for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) {
+ // Skip find_predictor if the reference frame is not in the
+ // ref_frame_flags (i.e., not used as a reference for this frame).
+ skip_ref_find_pred[ref_frame] =
+ !(cpi->ref_frame_flags & flag_list[ref_frame]);
if (!skip_ref_find_pred[ref_frame]) {
find_predictors(cpi, x, ref_frame, frame_mv, const_motion,
&ref_frame_skip_mask, flag_list, tile_data, mi_row,
@@ -1652,11 +1928,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Set the flag_svc_subpel to 1 for SVC if the lower spatial layer used
// an averaging filter for downsampling (phase = 8). If so, we will test
- // a nonzero motion mode on the spatial (goldeen) reference.
+ // a nonzero motion mode on the spatial reference.
// The nonzero motion is half pixel shifted to left and top (-4, -4).
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 &&
- svc_force_zero_mode[GOLDEN_FRAME - 1] &&
- cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id - 1] == 8) {
+ if (cpi->use_svc && svc->spatial_layer_id > 0 &&
+ svc_force_zero_mode[inter_layer_ref - 1] &&
+ svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 &&
+ !gf_temporal_ref) {
svc_mv_col = -4;
svc_mv_row = -4;
flag_svc_subpel = 1;
@@ -1675,7 +1952,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
int inter_mv_mode = 0;
int skip_this_mv = 0;
int comp_pred = 0;
- int force_gf_mv = 0;
+ int force_mv_inter_layer = 0;
PREDICTION_MODE this_mode;
second_ref_frame = NONE;
@@ -1699,8 +1976,19 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (ref_frame > usable_ref_frame) continue;
if (skip_ref_find_pred[ref_frame]) continue;
- if (flag_svc_subpel && ref_frame == GOLDEN_FRAME) {
- force_gf_mv = 1;
+ if (svc->previous_frame_is_intra_only) {
+ if (ref_frame != LAST_FRAME || frame_mv[this_mode][ref_frame].as_int != 0)
+ continue;
+ }
+
+ // If the segment reference frame feature is enabled then do nothing if the
+ // current ref frame is not allowed.
+ if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame)
+ continue;
+
+ if (flag_svc_subpel && ref_frame == inter_layer_ref) {
+ force_mv_inter_layer = 1;
// Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
// otherwise set NEWMV to (svc_mv_col, svc_mv_row).
if (this_mode == NEWMV) {
@@ -1713,7 +2001,6 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
if (comp_pred) {
- const struct segmentation *const seg = &cm->seg;
if (!cpi->allow_comp_inter_inter) continue;
// Skip compound inter modes if ARF is not available.
if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
@@ -1728,8 +2015,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
sse_zeromv_normalized < thresh_svc_skip_golden)
continue;
+ if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
+
if (sf->short_circuit_flat_blocks && x->source_variance == 0 &&
- this_mode != NEARESTMV) {
+ (frame_mv[this_mode][ref_frame].as_int != 0 ||
+ (cpi->oxcf.content == VP9E_CONTENT_SCREEN && !svc->spatial_layer_id &&
+ !x->zero_temp_sad_source))) {
continue;
}
@@ -1759,14 +2050,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
continue;
}
- if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue;
-
if (const_motion[ref_frame] && this_mode == NEARMV) continue;
// Skip non-zeromv mode search for golden frame if force_skip_low_temp_var
// is set. If nearestmv for golden frame is 0, zeromv mode will be skipped
// later.
- if (!force_gf_mv && force_skip_low_temp_var && ref_frame == GOLDEN_FRAME &&
+ if (!force_mv_inter_layer && force_skip_low_temp_var &&
+ ref_frame == GOLDEN_FRAME &&
frame_mv[this_mode][ref_frame].as_int != 0) {
continue;
}
@@ -1780,34 +2070,39 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
}
if (cpi->use_svc) {
- if (!force_gf_mv && svc_force_zero_mode[ref_frame - 1] &&
+ if (!force_mv_inter_layer && svc_force_zero_mode[ref_frame - 1] &&
frame_mv[this_mode][ref_frame].as_int != 0)
continue;
}
- if (sf->reference_masking &&
- !(frame_mv[this_mode][ref_frame].as_int == 0 &&
- ref_frame == LAST_FRAME)) {
- if (usable_ref_frame < ALTREF_FRAME) {
- if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
- i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
- if ((cpi->ref_frame_flags & flag_list[i]))
- if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
- ref_frame_skip_mask |= (1 << ref_frame);
+ // Disable this drop out case if the ref frame segment level feature is
+ // enabled for this segment. This is to prevent the possibility that we end
+ // up unable to pick any mode.
+ if (!segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) {
+ if (sf->reference_masking &&
+ !(frame_mv[this_mode][ref_frame].as_int == 0 &&
+ ref_frame == LAST_FRAME)) {
+ if (usable_ref_frame < ALTREF_FRAME) {
+ if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) {
+ i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME;
+ if ((cpi->ref_frame_flags & flag_list[i]))
+ if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1))
+ ref_frame_skip_mask |= (1 << ref_frame);
+ }
+ } else if (!cpi->rc.is_src_frame_alt_ref &&
+ !(frame_mv[this_mode][ref_frame].as_int == 0 &&
+ ref_frame == ALTREF_FRAME)) {
+ int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME;
+ int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME;
+ if (((cpi->ref_frame_flags & flag_list[ref1]) &&
+ (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
+ ((cpi->ref_frame_flags & flag_list[ref2]) &&
+ (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
+ ref_frame_skip_mask |= (1 << ref_frame);
}
- } else if (!cpi->rc.is_src_frame_alt_ref &&
- !(frame_mv[this_mode][ref_frame].as_int == 0 &&
- ref_frame == ALTREF_FRAME)) {
- int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME;
- int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME;
- if (((cpi->ref_frame_flags & flag_list[ref1]) &&
- (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) ||
- ((cpi->ref_frame_flags & flag_list[ref2]) &&
- (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1))))
- ref_frame_skip_mask |= (1 << ref_frame);
}
+ if (ref_frame_skip_mask & (1 << ref_frame)) continue;
}
- if (ref_frame_skip_mask & (1 << ref_frame)) continue;
// Select prediction reference frames.
for (i = 0; i < MAX_MB_PLANE; i++) {
@@ -1820,8 +2115,9 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)];
- mode_rd_thresh = best_mode_skip_txfm ? rd_threshes[mode_index] << 1
- : rd_threshes[mode_index];
+ mode_rd_thresh = best_pickmode.best_mode_skip_txfm
+ ? rd_threshes[mode_index] << 1
+ : rd_threshes[mode_index];
// Increase mode_rd_thresh value for GOLDEN_FRAME for improved encoding
// speed with little/no subjective quality loss.
@@ -1835,92 +2131,13 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
(!cpi->sf.adaptive_rd_thresh_row_mt &&
rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh,
&rd_thresh_freq_fact[mode_index])))
- continue;
+ if (frame_mv[this_mode][ref_frame].as_int != 0) continue;
- if (this_mode == NEWMV && !force_gf_mv) {
- if (ref_frame > LAST_FRAME && !cpi->use_svc &&
- cpi->oxcf.rc_mode == VPX_CBR) {
- int tmp_sad;
- uint32_t dis;
- int cost_list[5] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX };
-
- if (bsize < BLOCK_16X16) continue;
-
- tmp_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
-
- if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) continue;
- if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad)
- continue;
-
- frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int;
- rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv,
- &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
- x->nmvjointcost, x->mvcost, MV_COST_WEIGHT);
- frame_mv[NEWMV][ref_frame].as_mv.row >>= 3;
- frame_mv[NEWMV][ref_frame].as_mv.col >>= 3;
-
- cpi->find_fractional_mv_step(
- x, &frame_mv[NEWMV][ref_frame].as_mv,
- &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv,
- cpi->common.allow_high_precision_mv, x->errorperbit,
- &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0,
- 0);
- } else if (svc->use_base_mv && svc->spatial_layer_id) {
- if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) {
- const int pre_stride = xd->plane[0].pre[0].stride;
- unsigned int base_mv_sse = UINT_MAX;
- int scale = (cpi->rc.avg_frame_low_motion > 60) ? 2 : 4;
- const uint8_t *const pre_buf =
- xd->plane[0].pre[0].buf +
- (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride +
- (frame_mv[NEWMV][ref_frame].as_mv.col >> 3);
- cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride,
- pre_buf, pre_stride, &base_mv_sse);
-
- // Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16,
- // for SVC encoding.
- if (cpi->use_svc && cpi->svc.use_base_mv && bsize < BLOCK_16X16 &&
- frame_mv[NEWMV][ref_frame].as_mv.row == 0 &&
- frame_mv[NEWMV][ref_frame].as_mv.col == 0)
- continue;
-
- // Exit NEWMV search if base_mv_sse is large.
- if (sf->base_mv_aggressive && base_mv_sse > (best_sse_sofar << scale))
- continue;
- if (base_mv_sse < (best_sse_sofar << 1)) {
- // Base layer mv is good.
- // Exit NEWMV search if the base_mv is (0, 0) and sse is low, since
- // (0, 0) mode is already tested.
- unsigned int base_mv_sse_normalized =
- base_mv_sse >>
- (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
- if (sf->base_mv_aggressive && base_mv_sse <= best_sse_sofar &&
- base_mv_sse_normalized < 400 &&
- frame_mv[NEWMV][ref_frame].as_mv.row == 0 &&
- frame_mv[NEWMV][ref_frame].as_mv.col == 0)
- continue;
- if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame], &rate_mv,
- best_rdc.rdcost, 1)) {
- continue;
- }
- } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame],
- &rate_mv, best_rdc.rdcost, 0)) {
- continue;
- }
- } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame],
- &rate_mv, best_rdc.rdcost, 0)) {
- continue;
- }
- } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col,
- &frame_mv[NEWMV][ref_frame], &rate_mv,
- best_rdc.rdcost, 0)) {
+ if (this_mode == NEWMV && !force_mv_inter_layer) {
+ if (search_new_mv(cpi, x, frame_mv, ref_frame, gf_temporal_ref, bsize,
+ mi_row, mi_col, best_pred_sad, &rate_mv, best_sse_sofar,
+ &best_rdc))
continue;
- }
}
// TODO(jianj): Skipping the testing of (duplicate) non-zero motion vector
@@ -1978,61 +2195,12 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if ((this_mode == NEWMV || filter_ref == SWITCHABLE) &&
pred_filter_search &&
(ref_frame == LAST_FRAME ||
- (ref_frame == GOLDEN_FRAME && !force_gf_mv &&
+ (ref_frame == GOLDEN_FRAME && !force_mv_inter_layer &&
(cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) &&
(((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) {
- int pf_rate[3];
- int64_t pf_dist[3];
- int curr_rate[3];
- unsigned int pf_var[3];
- unsigned int pf_sse[3];
- TX_SIZE pf_tx_size[3];
- int64_t best_cost = INT64_MAX;
- INTERP_FILTER best_filter = SWITCHABLE, filter;
- PRED_BUFFER *current_pred = this_mode_pred;
rd_computed = 1;
-
- for (filter = EIGHTTAP; filter <= EIGHTTAP_SMOOTH; ++filter) {
- int64_t cost;
- mi->interp_filter = filter;
- vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize);
- model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter],
- &pf_var[filter], &pf_sse[filter]);
- curr_rate[filter] = pf_rate[filter];
- pf_rate[filter] += vp9_get_switchable_rate(cpi, xd);
- cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]);
- pf_tx_size[filter] = mi->tx_size;
- if (cost < best_cost) {
- best_filter = filter;
- best_cost = cost;
- skip_txfm = x->skip_txfm[0];
-
- if (reuse_inter_pred) {
- if (this_mode_pred != current_pred) {
- free_pred_buffer(this_mode_pred);
- this_mode_pred = current_pred;
- }
- current_pred = &tmp[get_pred_buffer(tmp, 3)];
- pd->dst.buf = current_pred->data;
- pd->dst.stride = bw;
- }
- }
- }
-
- if (reuse_inter_pred && this_mode_pred != current_pred)
- free_pred_buffer(current_pred);
-
- mi->interp_filter = best_filter;
- mi->tx_size = pf_tx_size[best_filter];
- this_rdc.rate = curr_rate[best_filter];
- this_rdc.dist = pf_dist[best_filter];
- var_y = pf_var[best_filter];
- sse_y = pf_sse[best_filter];
- x->skip_txfm[0] = skip_txfm;
- if (reuse_inter_pred) {
- pd->dst.buf = this_mode_pred->data;
- pd->dst.stride = this_mode_pred->stride;
- }
+ search_filter_ref(cpi, x, &this_rdc, mi_row, mi_col, tmp, bsize,
+ reuse_inter_pred, &this_mode_pred, &var_y, &sse_y);
} else {
// For low motion content use x->sb_is_skin in addition to VeryHighSad
// for setting large_block.
@@ -2138,7 +2306,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Skipping checking: test to see if this block can be reconstructed by
// prediction only.
- if (cpi->allow_encode_breakout) {
+ if (cpi->allow_encode_breakout && !xd->lossless && !scene_change_detected) {
encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, this_mode,
var_y, sse_y, yv12_mb, &this_rdc.rate,
&this_rdc.dist, flag_preduv_computed);
@@ -2165,17 +2333,17 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (this_rdc.rdcost < best_rdc.rdcost || x->skip) {
best_rdc = this_rdc;
- best_mode = this_mode;
- best_pred_filter = mi->interp_filter;
- best_tx_size = mi->tx_size;
- best_ref_frame = ref_frame;
- best_mode_skip_txfm = x->skip_txfm[0];
best_early_term = this_early_term;
- best_second_ref_frame = second_ref_frame;
+ best_pickmode.best_mode = this_mode;
+ best_pickmode.best_pred_filter = mi->interp_filter;
+ best_pickmode.best_tx_size = mi->tx_size;
+ best_pickmode.best_ref_frame = ref_frame;
+ best_pickmode.best_mode_skip_txfm = x->skip_txfm[0];
+ best_pickmode.best_second_ref_frame = second_ref_frame;
if (reuse_inter_pred) {
- free_pred_buffer(best_pred);
- best_pred = this_mode_pred;
+ free_pred_buffer(best_pickmode.best_pred);
+ best_pickmode.best_pred = this_mode_pred;
}
} else {
if (reuse_inter_pred) free_pred_buffer(this_mode_pred);
@@ -2185,38 +2353,50 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// If early termination flag is 1 and at least 2 modes are checked,
// the mode search is terminated.
- if (best_early_term && idx > 0) {
+ if (best_early_term && idx > 0 && !scene_change_detected) {
x->skip = 1;
break;
}
}
- mi->mode = best_mode;
- mi->interp_filter = best_pred_filter;
- mi->tx_size = best_tx_size;
- mi->ref_frame[0] = best_ref_frame;
- mi->mv[0].as_int = frame_mv[best_mode][best_ref_frame].as_int;
+ mi->mode = best_pickmode.best_mode;
+ mi->interp_filter = best_pickmode.best_pred_filter;
+ mi->tx_size = best_pickmode.best_tx_size;
+ mi->ref_frame[0] = best_pickmode.best_ref_frame;
+ mi->mv[0].as_int =
+ frame_mv[best_pickmode.best_mode][best_pickmode.best_ref_frame].as_int;
xd->mi[0]->bmi[0].as_mv[0].as_int = mi->mv[0].as_int;
- x->skip_txfm[0] = best_mode_skip_txfm;
- mi->ref_frame[1] = best_second_ref_frame;
+ x->skip_txfm[0] = best_pickmode.best_mode_skip_txfm;
+ mi->ref_frame[1] = best_pickmode.best_second_ref_frame;
// For spatial enhancemanent layer: perform intra prediction only if base
// layer is chosen as the reference. Always perform intra prediction if
- // LAST is the only reference or is_key_frame is set.
- if (cpi->svc.spatial_layer_id) {
+ // LAST is the only reference, or is_key_frame is set, or on base
+ // temporal layer.
+ if (svc->spatial_layer_id && !gf_temporal_ref) {
perform_intra_pred =
- cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame ||
+ svc->temporal_layer_id == 0 ||
+ svc->layer_context[svc->temporal_layer_id].is_key_frame ||
!(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) ||
- (!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
- svc_force_zero_mode[best_ref_frame - 1]);
+ (!svc->layer_context[svc->temporal_layer_id].is_key_frame &&
+ svc_force_zero_mode[best_pickmode.best_ref_frame - 1]);
inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh;
}
- if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
- cpi->rc.is_src_frame_alt_ref)
+ if ((cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR &&
+ cpi->rc.is_src_frame_alt_ref) ||
+ svc->previous_frame_is_intra_only)
perform_intra_pred = 0;
+
+ // If the segment reference frame feature is enabled and set then
+ // skip the intra prediction.
+ if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) &&
+ get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) > 0)
+ perform_intra_pred = 0;
+
// Perform intra prediction search, if the best SAD is above a certain
// threshold.
if (best_rdc.rdcost == INT64_MAX ||
+ (scene_change_detected && perform_intra_pred) ||
((!force_skip_low_temp_var || bsize < BLOCK_32X32 ||
x->content_state_sb == kVeryHighSad) &&
perform_intra_pred && !x->skip && best_rdc.rdcost > inter_mode_thresh &&
@@ -2224,7 +2404,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
!x->lowvar_highsumdiff)) {
struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 };
int i;
- TX_SIZE best_intra_tx_size = TX_SIZES;
+ PRED_BUFFER *const best_pred = best_pickmode.best_pred;
TX_SIZE intra_tx_size =
VPXMIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[cpi->common.tx_mode]);
@@ -2249,7 +2429,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
this_mode_pred->data, this_mode_pred->stride, NULL, 0,
0, 0, 0, bw, bh);
#endif // CONFIG_VP9_HIGHBITDEPTH
- best_pred = this_mode_pred;
+ best_pickmode.best_pred = this_mode_pred;
}
}
pd->dst = orig_dst;
@@ -2309,36 +2489,37 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
if (this_rdc.rdcost < best_rdc.rdcost) {
best_rdc = this_rdc;
- best_mode = this_mode;
- best_intra_tx_size = mi->tx_size;
- best_ref_frame = INTRA_FRAME;
- best_second_ref_frame = NONE;
+ best_pickmode.best_mode = this_mode;
+ best_pickmode.best_intra_tx_size = mi->tx_size;
+ best_pickmode.best_ref_frame = INTRA_FRAME;
+ best_pickmode.best_second_ref_frame = NONE;
mi->uv_mode = this_mode;
mi->mv[0].as_int = INVALID_MV;
mi->mv[1].as_int = INVALID_MV;
- best_mode_skip_txfm = x->skip_txfm[0];
+ best_pickmode.best_mode_skip_txfm = x->skip_txfm[0];
}
}
// Reset mb_mode_info to the best inter mode.
- if (best_ref_frame != INTRA_FRAME) {
- mi->tx_size = best_tx_size;
+ if (best_pickmode.best_ref_frame != INTRA_FRAME) {
+ mi->tx_size = best_pickmode.best_tx_size;
} else {
- mi->tx_size = best_intra_tx_size;
+ mi->tx_size = best_pickmode.best_intra_tx_size;
}
}
pd->dst = orig_dst;
- mi->mode = best_mode;
- mi->ref_frame[0] = best_ref_frame;
- mi->ref_frame[1] = best_second_ref_frame;
- x->skip_txfm[0] = best_mode_skip_txfm;
+ mi->mode = best_pickmode.best_mode;
+ mi->ref_frame[0] = best_pickmode.best_ref_frame;
+ mi->ref_frame[1] = best_pickmode.best_second_ref_frame;
+ x->skip_txfm[0] = best_pickmode.best_mode_skip_txfm;
if (!is_inter_block(mi)) {
mi->interp_filter = SWITCHABLE_FILTERS;
}
- if (reuse_inter_pred && best_pred != NULL) {
+ if (reuse_inter_pred && best_pickmode.best_pred != NULL) {
+ PRED_BUFFER *const best_pred = best_pickmode.best_pred;
if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) {
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth)
@@ -2367,25 +2548,26 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
// Remove this condition when the issue is resolved.
if (x->sb_pickmode_part) ctx->sb_skip_denoising = 1;
vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost,
- frame_mv, reuse_inter_pred, best_tx_size,
- best_mode, best_ref_frame, best_pred_filter,
- best_mode_skip_txfm);
- vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision);
+ frame_mv, reuse_inter_pred, &best_pickmode);
+ vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision,
+ gf_temporal_ref);
recheck_zeromv_after_denoising(cpi, mi, x, xd, decision, &ctx_den, yv12_mb,
&best_rdc, bsize, mi_row, mi_col);
- best_ref_frame = ctx_den.best_ref_frame;
+ best_pickmode.best_ref_frame = ctx_den.best_ref_frame;
}
#endif
- if (best_ref_frame == ALTREF_FRAME || best_second_ref_frame == ALTREF_FRAME)
+ if (best_pickmode.best_ref_frame == ALTREF_FRAME ||
+ best_pickmode.best_second_ref_frame == ALTREF_FRAME)
x->arf_frame_usage++;
- else if (best_ref_frame != INTRA_FRAME)
+ else if (best_pickmode.best_ref_frame != INTRA_FRAME)
x->lastgolden_frame_usage++;
if (cpi->sf.adaptive_rd_thresh) {
- THR_MODES best_mode_idx = mode_idx[best_ref_frame][mode_offset(mi->mode)];
+ THR_MODES best_mode_idx =
+ mode_idx[best_pickmode.best_ref_frame][mode_offset(mi->mode)];
- if (best_ref_frame == INTRA_FRAME) {
+ if (best_pickmode.best_ref_frame == INTRA_FRAME) {
// Only consider the modes that are included in the intra_mode_list.
int intra_modes = sizeof(intra_mode_list) / sizeof(PREDICTION_MODE);
int i;
@@ -2405,7 +2587,7 @@ void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data,
} else {
for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) {
PREDICTION_MODE this_mode;
- if (best_ref_frame != ref_frame) continue;
+ if (best_pickmode.best_ref_frame != ref_frame) continue;
for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) {
if (cpi->sf.adaptive_rd_thresh_row_mt)
update_thresh_freq_fact_row_mt(cpi, tile_data, x->source_variance,
@@ -2585,9 +2767,10 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row,
x, &tmp_mv, &mbmi_ext->ref_mvs[ref_frame][0].as_mv,
cpi->common.allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step,
- cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost,
- &dummy_dist, &x->pred_sse[ref_frame], NULL, 0, 0);
+ cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dummy_dist,
+ &x->pred_sse[ref_frame], NULL, 0, 0,
+ cpi->sf.use_accurate_subpel_search);
xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv;
} else {
diff --git a/libvpx/vp9/encoder/vp9_pickmode.h b/libvpx/vp9/encoder/vp9_pickmode.h
index 9aa00c4fa..15207e6cf 100644
--- a/libvpx/vp9/encoder/vp9_pickmode.h
+++ b/libvpx/vp9/encoder/vp9_pickmode.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_PICKMODE_H_
-#define VP9_ENCODER_VP9_PICKMODE_H_
+#ifndef VPX_VP9_ENCODER_VP9_PICKMODE_H_
+#define VPX_VP9_ENCODER_VP9_PICKMODE_H_
#include "vp9/encoder/vp9_encoder.h"
@@ -32,4 +32,4 @@ void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_PICKMODE_H_
+#endif // VPX_VP9_ENCODER_VP9_PICKMODE_H_
diff --git a/libvpx/vp9/encoder/vp9_quantize.c b/libvpx/vp9/encoder/vp9_quantize.c
index 09f61ead2..75f3a8ba7 100644
--- a/libvpx/vp9/encoder/vp9_quantize.c
+++ b/libvpx/vp9/encoder/vp9_quantize.c
@@ -204,10 +204,9 @@ static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) {
switch (bit_depth) {
case VPX_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80);
case VPX_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80);
- case VPX_BITS_12: return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
default:
- assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
- return -1;
+ assert(bit_depth == VPX_BITS_12);
+ return q == 0 ? 64 : (quant < 2368 ? 84 : 80);
}
#else
(void)bit_depth;
@@ -221,13 +220,20 @@ void vp9_init_quantizer(VP9_COMP *cpi) {
int i, q, quant;
for (q = 0; q < QINDEX_RANGE; q++) {
- const int qzbin_factor = get_qzbin_factor(q, cm->bit_depth);
- const int qrounding_factor = q == 0 ? 64 : 48;
+ int qzbin_factor = get_qzbin_factor(q, cm->bit_depth);
+ int qrounding_factor = q == 0 ? 64 : 48;
+ const int sharpness_adjustment = 16 * (7 - cpi->oxcf.sharpness) / 7;
+
+ if (cpi->oxcf.sharpness > 0 && q > 0) {
+ qzbin_factor = 64 + sharpness_adjustment;
+ qrounding_factor = 64 - sharpness_adjustment;
+ }
for (i = 0; i < 2; ++i) {
int qrounding_factor_fp = i == 0 ? 48 : 42;
if (q == 0) qrounding_factor_fp = 64;
-
+ if (cpi->oxcf.sharpness > 0)
+ qrounding_factor_fp = 64 - sharpness_adjustment;
// y
quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth)
: vp9_ac_quant(q, 0, cm->bit_depth);
diff --git a/libvpx/vp9/encoder/vp9_quantize.h b/libvpx/vp9/encoder/vp9_quantize.h
index 61320361b..ed9b84958 100644
--- a/libvpx/vp9/encoder/vp9_quantize.h
+++ b/libvpx/vp9/encoder/vp9_quantize.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_QUANTIZE_H_
-#define VP9_ENCODER_VP9_QUANTIZE_H_
+#ifndef VPX_VP9_ENCODER_VP9_QUANTIZE_H_
+#define VPX_VP9_ENCODER_VP9_QUANTIZE_H_
#include "./vpx_config.h"
#include "vp9/encoder/vp9_block.h"
@@ -59,4 +59,4 @@ int vp9_qindex_to_quantizer(int qindex);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_QUANTIZE_H_
+#endif // VPX_VP9_ENCODER_VP9_QUANTIZE_H_
diff --git a/libvpx/vp9/encoder/vp9_ratectrl.c b/libvpx/vp9/encoder/vp9_ratectrl.c
index b7f3a0e89..5ad68e2e5 100644
--- a/libvpx/vp9/encoder/vp9_ratectrl.c
+++ b/libvpx/vp9/encoder/vp9_ratectrl.c
@@ -31,10 +31,13 @@
#include "vp9/encoder/vp9_encodemv.h"
#include "vp9/encoder/vp9_ratectrl.h"
-// Max rate target for 1080P and below encodes under normal circumstances
-// (1920 * 1080 / (16 * 16)) * MAX_MB_RATE bits per MB
+// Max rate per frame for 1080P and below encodes if no level requirement given.
+// For larger formats limit to MAX_MB_RATE bits per MB
+// 4Mbits is derived from the level requirement for level 4 (1080P 30) which
+// requires that HW can sustain a rate of 16Mbits over a 4 frame group.
+// If a lower level requirement is specified then this may over ride this value.
#define MAX_MB_RATE 250
-#define MAXRATE_1080P 2025000
+#define MAXRATE_1080P 4000000
#define DEFAULT_KF_BOOST 2000
#define DEFAULT_GF_BOOST 2000
@@ -45,18 +48,16 @@
#define MAX_BPB_FACTOR 50
#if CONFIG_VP9_HIGHBITDEPTH
-#define ASSIGN_MINQ_TABLE(bit_depth, name) \
- do { \
- switch (bit_depth) { \
- case VPX_BITS_8: name = name##_8; break; \
- case VPX_BITS_10: name = name##_10; break; \
- case VPX_BITS_12: name = name##_12; break; \
- default: \
- assert(0 && \
- "bit_depth should be VPX_BITS_8, VPX_BITS_10" \
- " or VPX_BITS_12"); \
- name = NULL; \
- } \
+#define ASSIGN_MINQ_TABLE(bit_depth, name) \
+ do { \
+ switch (bit_depth) { \
+ case VPX_BITS_8: name = name##_8; break; \
+ case VPX_BITS_10: name = name##_10; break; \
+ default: \
+ assert(bit_depth == VPX_BITS_12); \
+ name = name##_12; \
+ break; \
+ } \
} while (0)
#else
#define ASSIGN_MINQ_TABLE(bit_depth, name) \
@@ -97,8 +98,8 @@ static int kf_low = 400;
#else
static int gf_high = 2000;
static int gf_low = 400;
-static int kf_high = 5000;
-static int kf_low = 400;
+static int kf_high = 4800;
+static int kf_low = 300;
#endif
// Functions to compute the active minq lookup table entries based on a
@@ -128,7 +129,7 @@ static void init_minq_luts(int *kf_low_m, int *kf_high_m, int *arfgf_low,
for (i = 0; i < QINDEX_RANGE; i++) {
const double maxq = vp9_convert_qindex_to_q(i, bit_depth);
kf_low_m[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.150, bit_depth);
- kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth);
+ kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.45, bit_depth);
#ifdef AGGRESSIVE_VBR
arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.275, bit_depth);
inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.80, bit_depth);
@@ -164,10 +165,9 @@ double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth) {
switch (bit_depth) {
case VPX_BITS_8: return vp9_ac_quant(qindex, 0, bit_depth) / 4.0;
case VPX_BITS_10: return vp9_ac_quant(qindex, 0, bit_depth) / 16.0;
- case VPX_BITS_12: return vp9_ac_quant(qindex, 0, bit_depth) / 64.0;
default:
- assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
- return -1.0;
+ assert(bit_depth == VPX_BITS_12);
+ return vp9_ac_quant(qindex, 0, bit_depth) / 64.0;
}
#else
return vp9_ac_quant(qindex, 0, bit_depth) / 4.0;
@@ -247,20 +247,65 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) {
return target;
}
+// Update the buffer level before encoding with the per-frame-bandwidth,
+static void update_buffer_level_preencode(VP9_COMP *cpi) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ rc->bits_off_target += rc->avg_frame_bandwidth;
+ // Clip the buffer level to the maximum specified buffer size.
+ rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
+ rc->buffer_level = rc->bits_off_target;
+}
+
+// Update the buffer level before encoding with the per-frame-bandwidth
+// for SVC. The current and all upper temporal layers are updated, needed
+// for the layered rate control which involves cumulative buffer levels for
+// the temporal layers. Allow for using the timestamp(pts) delta for the
+// framerate when the set_ref_frame_config is used.
+static void update_buffer_level_svc_preencode(VP9_COMP *cpi) {
+ SVC *const svc = &cpi->svc;
+ int i;
+ // Set this to 1 to use timestamp delta for "framerate" under
+ // ref_frame_config usage.
+ int use_timestamp = 1;
+ const int64_t ts_delta =
+ svc->time_stamp_superframe - svc->time_stamp_prev[svc->spatial_layer_id];
+ for (i = svc->temporal_layer_id; i < svc->number_temporal_layers; ++i) {
+ const int layer =
+ LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ if (use_timestamp && cpi->svc.use_set_ref_frame_config &&
+ svc->number_temporal_layers == 1 && ts_delta > 0 &&
+ svc->current_superframe > 0) {
+ // TODO(marpan): This may need to be modified for temporal layers.
+ const double framerate_pts = 10000000.0 / ts_delta;
+ lrc->bits_off_target += (int)(lc->target_bandwidth / framerate_pts);
+ } else {
+ lrc->bits_off_target += (int)(lc->target_bandwidth / lc->framerate);
+ }
+ // Clip buffer level to maximum buffer size for the layer.
+ lrc->bits_off_target =
+ VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size);
+ lrc->buffer_level = lrc->bits_off_target;
+ if (i == svc->temporal_layer_id) {
+ cpi->rc.bits_off_target = lrc->bits_off_target;
+ cpi->rc.buffer_level = lrc->buffer_level;
+ }
+ }
+}
+
// Update the buffer level for higher temporal layers, given the encoded current
// temporal layer.
-static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
+static void update_layer_buffer_level_postencode(SVC *svc,
+ int encoded_frame_size) {
int i = 0;
- int current_temporal_layer = svc->temporal_layer_id;
+ const int current_temporal_layer = svc->temporal_layer_id;
for (i = current_temporal_layer + 1; i < svc->number_temporal_layers; ++i) {
const int layer =
LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
- int bits_off_for_this_layer =
- (int)(lc->target_bandwidth / lc->framerate - encoded_frame_size);
- lrc->bits_off_target += bits_off_for_this_layer;
-
+ lrc->bits_off_target -= encoded_frame_size;
// Clip buffer level to maximum buffer size for the layer.
lrc->bits_off_target =
VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size);
@@ -268,21 +313,13 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
}
}
-// Update the buffer level: leaky bucket model.
-static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
- const VP9_COMMON *const cm = &cpi->common;
+// Update the buffer level after encoding with encoded frame size.
+static void update_buffer_level_postencode(VP9_COMP *cpi,
+ int encoded_frame_size) {
RATE_CONTROL *const rc = &cpi->rc;
-
- // Non-viewable frames are a special case and are treated as pure overhead.
- if (!cm->show_frame) {
- rc->bits_off_target -= encoded_frame_size;
- } else {
- rc->bits_off_target += rc->avg_frame_bandwidth - encoded_frame_size;
- }
-
+ rc->bits_off_target -= encoded_frame_size;
// Clip the buffer level to the maximum specified buffer size.
rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size);
-
// For screen-content mode, and if frame-dropper is off, don't let buffer
// level go below threshold, given here as -rc->maximum_ buffer_size.
if (cpi->oxcf.content == VP9E_CONTENT_SCREEN &&
@@ -292,7 +329,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) {
rc->buffer_level = rc->bits_off_target;
if (is_one_pass_cbr_svc(cpi)) {
- update_layer_buffer_level(&cpi->svc, encoded_frame_size);
+ update_layer_buffer_level_postencode(&cpi->svc, encoded_frame_size);
}
}
@@ -355,6 +392,9 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->high_source_sad = 0;
rc->reset_high_source_sad = 0;
rc->high_source_sad_lagindex = -1;
+ rc->high_num_blocks_with_motion = 0;
+ rc->hybrid_intra_scene_change = 0;
+ rc->re_encode_maxq_scene_change = 0;
rc->alt_ref_gf_group = 0;
rc->last_frame_is_src_altref = 0;
rc->fac_active_worst_inter = 150;
@@ -377,6 +417,7 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
rc->rate_correction_factors[i] = 1.0;
+ rc->damped_adjustment[i] = 0;
}
rc->min_gf_interval = oxcf->min_gf_interval;
@@ -388,27 +429,110 @@ void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
oxcf->init_framerate, rc->min_gf_interval);
rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
+
+ rc->force_max_q = 0;
+ rc->last_post_encode_dropped_scene_change = 0;
+ rc->use_post_encode_drop = 0;
+ rc->ext_use_post_encode_drop = 0;
}
-int vp9_rc_drop_frame(VP9_COMP *cpi) {
+static int check_buffer_above_thresh(VP9_COMP *cpi, int drop_mark) {
+ SVC *svc = &cpi->svc;
+ if (!cpi->use_svc || cpi->svc.framedrop_mode != FULL_SUPERFRAME_DROP) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ return (rc->buffer_level > drop_mark);
+ } else {
+ int i;
+ // For SVC in the FULL_SUPERFRAME_DROP): the condition on
+ // buffer (if its above threshold, so no drop) is checked on current and
+ // upper spatial layers. If any spatial layer is not above threshold then
+ // we return 0.
+ for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ RATE_CONTROL *lrc = &lc->rc;
+ // Exclude check for layer whose bitrate is 0.
+ if (lc->target_bandwidth > 0) {
+ const int drop_mark_layer = (int)(cpi->svc.framedrop_thresh[i] *
+ lrc->optimal_buffer_level / 100);
+ if (!(lrc->buffer_level > drop_mark_layer)) return 0;
+ }
+ }
+ return 1;
+ }
+}
+
+static int check_buffer_below_thresh(VP9_COMP *cpi, int drop_mark) {
+ SVC *svc = &cpi->svc;
+ if (!cpi->use_svc || cpi->svc.framedrop_mode == LAYER_DROP) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ return (rc->buffer_level <= drop_mark);
+ } else {
+ int i;
+ // For SVC in the constrained framedrop mode (svc->framedrop_mode =
+ // CONSTRAINED_LAYER_DROP or FULL_SUPERFRAME_DROP): the condition on
+ // buffer (if its below threshold, so drop frame) is checked on current
+ // and upper spatial layers. For FULL_SUPERFRAME_DROP mode if any
+ // spatial layer is <= threshold, then we return 1 (drop).
+ for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ RATE_CONTROL *lrc = &lc->rc;
+ // Exclude check for layer whose bitrate is 0.
+ if (lc->target_bandwidth > 0) {
+ const int drop_mark_layer = (int)(cpi->svc.framedrop_thresh[i] *
+ lrc->optimal_buffer_level / 100);
+ if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP) {
+ if (lrc->buffer_level <= drop_mark_layer) return 1;
+ } else {
+ if (!(lrc->buffer_level <= drop_mark_layer)) return 0;
+ }
+ }
+ }
+ if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP)
+ return 0;
+ else
+ return 1;
+ }
+}
+
+static int drop_frame(VP9_COMP *cpi) {
const VP9EncoderConfig *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
- if (!oxcf->drop_frames_water_mark ||
- (is_one_pass_cbr_svc(cpi) &&
- cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode)) {
+ SVC *svc = &cpi->svc;
+ int drop_frames_water_mark = oxcf->drop_frames_water_mark;
+ if (cpi->use_svc) {
+ // If we have dropped max_consec_drop frames, then we don't
+ // drop this spatial layer, and reset counter to 0.
+ if (svc->drop_count[svc->spatial_layer_id] == svc->max_consec_drop) {
+ svc->drop_count[svc->spatial_layer_id] = 0;
+ return 0;
+ } else {
+ drop_frames_water_mark = svc->framedrop_thresh[svc->spatial_layer_id];
+ }
+ }
+ if (!drop_frames_water_mark ||
+ (svc->spatial_layer_id > 0 &&
+ svc->framedrop_mode == FULL_SUPERFRAME_DROP)) {
return 0;
} else {
- if (rc->buffer_level < 0) {
+ if ((rc->buffer_level < 0 && svc->framedrop_mode != FULL_SUPERFRAME_DROP) ||
+ (check_buffer_below_thresh(cpi, -1) &&
+ svc->framedrop_mode == FULL_SUPERFRAME_DROP)) {
// Always drop if buffer is below 0.
return 1;
} else {
// If buffer is below drop_mark, for now just drop every other frame
// (starting with the next frame) until it increases back over drop_mark.
int drop_mark =
- (int)(oxcf->drop_frames_water_mark * rc->optimal_buffer_level / 100);
- if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) {
+ (int)(drop_frames_water_mark * rc->optimal_buffer_level / 100);
+ if (check_buffer_above_thresh(cpi, drop_mark) &&
+ (rc->decimation_factor > 0)) {
--rc->decimation_factor;
- } else if (rc->buffer_level <= drop_mark && rc->decimation_factor == 0) {
+ } else if (check_buffer_below_thresh(cpi, drop_mark) &&
+ rc->decimation_factor == 0) {
rc->decimation_factor = 1;
}
if (rc->decimation_factor > 0) {
@@ -427,11 +551,129 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) {
}
}
+int post_encode_drop_cbr(VP9_COMP *cpi, size_t *size) {
+ size_t frame_size = *size << 3;
+ int64_t new_buffer_level =
+ cpi->rc.buffer_level + cpi->rc.avg_frame_bandwidth - (int64_t)frame_size;
+
+ // For now we drop if new buffer level (given the encoded frame size) goes
+ // below 0.
+ if (new_buffer_level < 0) {
+ *size = 0;
+ vp9_rc_postencode_update_drop_frame(cpi);
+ // Update flag to use for next frame.
+ if (cpi->rc.high_source_sad ||
+ (cpi->use_svc && cpi->svc.high_source_sad_superframe))
+ cpi->rc.last_post_encode_dropped_scene_change = 1;
+ // Force max_q on next fame.
+ cpi->rc.force_max_q = 1;
+ cpi->rc.avg_frame_qindex[INTER_FRAME] = cpi->rc.worst_quality;
+ cpi->last_frame_dropped = 1;
+ cpi->ext_refresh_frame_flags_pending = 0;
+ if (cpi->use_svc) {
+ SVC *svc = &cpi->svc;
+ int sl = 0;
+ int tl = 0;
+ svc->last_layer_dropped[svc->spatial_layer_id] = 1;
+ svc->drop_spatial_layer[svc->spatial_layer_id] = 1;
+ svc->drop_count[svc->spatial_layer_id]++;
+ svc->skip_enhancement_layer = 1;
+ // Postencode drop is only checked on base spatial layer,
+ // for now if max-q is set on base we force it on all layers.
+ for (sl = 0; sl < svc->number_spatial_layers; ++sl) {
+ for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
+ const int layer =
+ LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ RATE_CONTROL *lrc = &lc->rc;
+ lrc->force_max_q = 1;
+ lrc->avg_frame_qindex[INTER_FRAME] = cpi->rc.worst_quality;
+ }
+ }
+ }
+ return 1;
+ }
+
+ cpi->rc.force_max_q = 0;
+ cpi->rc.last_post_encode_dropped_scene_change = 0;
+ return 0;
+}
+
+int vp9_rc_drop_frame(VP9_COMP *cpi) {
+ SVC *svc = &cpi->svc;
+ int svc_prev_layer_dropped = 0;
+ // In the constrained or full_superframe framedrop mode for svc
+ // (framedrop_mode != LAYER_DROP), if the previous spatial layer was
+ // dropped, drop the current spatial layer.
+ if (cpi->use_svc && svc->spatial_layer_id > 0 &&
+ svc->drop_spatial_layer[svc->spatial_layer_id - 1])
+ svc_prev_layer_dropped = 1;
+ if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP) ||
+ drop_frame(cpi)) {
+ vp9_rc_postencode_update_drop_frame(cpi);
+ cpi->ext_refresh_frame_flags_pending = 0;
+ cpi->last_frame_dropped = 1;
+ if (cpi->use_svc) {
+ svc->last_layer_dropped[svc->spatial_layer_id] = 1;
+ svc->drop_spatial_layer[svc->spatial_layer_id] = 1;
+ svc->drop_count[svc->spatial_layer_id]++;
+ svc->skip_enhancement_layer = 1;
+ if (svc->framedrop_mode == LAYER_DROP ||
+ svc->drop_spatial_layer[0] == 0) {
+ // For the case of constrained drop mode where the base is dropped
+ // (drop_spatial_layer[0] == 1), which means full superframe dropped,
+ // we don't increment the svc frame counters. In particular temporal
+ // layer counter (which is incremented in vp9_inc_frame_in_layer())
+ // won't be incremented, so on a dropped frame we try the same
+ // temporal_layer_id on next incoming frame. This is to avoid an
+ // issue with temporal alignement with full superframe dropping.
+ vp9_inc_frame_in_layer(cpi);
+ }
+ if (svc->spatial_layer_id == svc->number_spatial_layers - 1) {
+ int i;
+ int all_layers_drop = 1;
+ for (i = 0; i < svc->spatial_layer_id; i++) {
+ if (svc->drop_spatial_layer[i] == 0) {
+ all_layers_drop = 0;
+ break;
+ }
+ }
+ if (all_layers_drop == 1) svc->skip_enhancement_layer = 0;
+ }
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static int adjust_q_cbr(const VP9_COMP *cpi, int q) {
+ // This makes sure q is between oscillating Qs to prevent resonance.
+ if (!cpi->rc.reset_high_source_sad &&
+ (!cpi->oxcf.gf_cbr_boost_pct ||
+ !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) &&
+ (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
+ cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
+ int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
+ VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
+ // If the previous frame had overshoot and the current q needs to increase
+ // above the clamped value, reduce the clamp for faster reaction to
+ // overshoot.
+ if (cpi->rc.rc_1_frame == -1 && q > qclamp)
+ q = (q + qclamp) >> 1;
+ else
+ q = qclamp;
+ }
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ vp9_cyclic_refresh_limit_q(cpi, &q);
+ return q;
+}
+
static double get_rate_correction_factor(const VP9_COMP *cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
+ const VP9_COMMON *const cm = &cpi->common;
double rcf;
- if (cpi->common.frame_type == KEY_FRAME) {
+ if (frame_is_intra_only(cm)) {
rcf = rc->rate_correction_factors[KF_STD];
} else if (cpi->oxcf.pass == 2) {
RATE_FACTOR_LEVEL rf_lvl =
@@ -451,13 +693,14 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) {
static void set_rate_correction_factor(VP9_COMP *cpi, double factor) {
RATE_CONTROL *const rc = &cpi->rc;
+ const VP9_COMMON *const cm = &cpi->common;
// Normalize RCF to account for the size-dependent scaling factor.
factor /= rcf_mult[cpi->rc.frame_size_selector];
factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR);
- if (cpi->common.frame_type == KEY_FRAME) {
+ if (frame_is_intra_only(cm)) {
rc->rate_correction_factors[KF_STD] = factor;
} else if (cpi->oxcf.pass == 2) {
RATE_FACTOR_LEVEL rf_lvl =
@@ -478,6 +721,8 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) {
int correction_factor = 100;
double rate_correction_factor = get_rate_correction_factor(cpi);
double adjustment_limit;
+ RATE_FACTOR_LEVEL rf_lvl =
+ cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index];
int projected_size_based_on_q = 0;
@@ -494,8 +739,9 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) {
projected_size_based_on_q =
vp9_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor);
} else {
+ FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type;
projected_size_based_on_q =
- vp9_estimate_bits_at_q(cpi->common.frame_type, cm->base_qindex, cm->MBs,
+ vp9_estimate_bits_at_q(frame_type, cm->base_qindex, cm->MBs,
rate_correction_factor, cm->bit_depth);
}
// Work out a size correction factor.
@@ -503,10 +749,16 @@ void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) {
correction_factor = (int)((100 * (int64_t)cpi->rc.projected_frame_size) /
projected_size_based_on_q);
- // More heavily damped adjustment used if we have been oscillating either side
- // of target.
- adjustment_limit =
- 0.25 + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor)));
+ // Do not use damped adjustment for the first frame of each frame type
+ if (!cpi->rc.damped_adjustment[rf_lvl]) {
+ adjustment_limit = 1.0;
+ cpi->rc.damped_adjustment[rf_lvl] = 1;
+ } else {
+ // More heavily damped adjustment used if we have been oscillating either
+ // side of target.
+ adjustment_limit =
+ 0.25 + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor)));
+ }
cpi->rc.q_2_frame = cpi->rc.q_1_frame;
cpi->rc.q_1_frame = cm->base_qindex;
@@ -569,8 +821,9 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
bits_per_mb_at_this_q =
(int)vp9_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor);
} else {
+ FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type;
bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb(
- cm->frame_type, i, correction_factor, cm->bit_depth);
+ frame_type, i, correction_factor, cm->bit_depth);
}
if (bits_per_mb_at_this_q <= target_bits_per_mb) {
@@ -585,16 +838,9 @@ int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame,
}
} while (++i <= active_worst_quality);
- // In CBR mode, this makes sure q is between oscillating Qs to prevent
- // resonance.
- if (cpi->oxcf.rc_mode == VPX_CBR && !cpi->rc.reset_high_source_sad &&
- (!cpi->oxcf.gf_cbr_boost_pct ||
- !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) &&
- (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) &&
- cpi->rc.q_1_frame != cpi->rc.q_2_frame) {
- q = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame),
- VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame));
- }
+ // Adjustment to q for CBR mode.
+ if (cpi->oxcf.rc_mode == VPX_CBR) return adjust_q_cbr(cpi, q);
+
return q;
}
@@ -623,13 +869,19 @@ static int get_kf_active_quality(const RATE_CONTROL *const rc, int q,
kf_low_motion_minq, kf_high_motion_minq);
}
-static int get_gf_active_quality(const RATE_CONTROL *const rc, int q,
+static int get_gf_active_quality(const VP9_COMP *const cpi, int q,
vpx_bit_depth_t bit_depth) {
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+ const RATE_CONTROL *const rc = &cpi->rc;
+
int *arfgf_low_motion_minq;
int *arfgf_high_motion_minq;
+ const int gfu_boost = cpi->multi_layer_arf
+ ? gf_group->gfu_boost[gf_group->index]
+ : rc->gfu_boost;
ASSIGN_MINQ_TABLE(bit_depth, arfgf_low_motion_minq);
ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
- return get_active_quality(q, rc->gfu_boost, gf_low, gf_high,
+ return get_active_quality(q, gfu_boost, gf_low, gf_high,
arfgf_low_motion_minq, arfgf_high_motion_minq);
}
@@ -674,7 +926,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
int active_worst_quality;
int ambient_qp;
unsigned int num_frames_weight_key = 5 * cpi->svc.number_temporal_layers;
- if (cm->frame_type == KEY_FRAME || rc->reset_high_source_sad)
+ if (frame_is_intra_only(cm) || rc->reset_high_source_sad || rc->force_max_q)
return rc->worst_quality;
// For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME]
// for the first few frames following key frame. These are both initialized
@@ -685,6 +937,7 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
? VPXMIN(rc->avg_frame_qindex[INTER_FRAME],
rc->avg_frame_qindex[KEY_FRAME])
: rc->avg_frame_qindex[INTER_FRAME];
+ active_worst_quality = VPXMIN(rc->worst_quality, (ambient_qp * 5) >> 2);
// For SVC if the current base spatial layer was key frame, use the QP from
// that base layer for ambient_qp.
if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) {
@@ -694,13 +947,15 @@ static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) {
if (lc->is_key_frame) {
const RATE_CONTROL *lrc = &lc->rc;
ambient_qp = VPXMIN(ambient_qp, lrc->last_q[KEY_FRAME]);
+ active_worst_quality = VPXMIN(rc->worst_quality, (ambient_qp * 9) >> 3);
}
}
- active_worst_quality = VPXMIN(rc->worst_quality, ambient_qp * 5 >> 2);
if (rc->buffer_level > rc->optimal_buffer_level) {
// Adjust down.
- // Maximum limit for down adjustment, ~30%.
+ // Maximum limit for down adjustment ~30%; make it lower for screen content.
int max_adjustment_down = active_worst_quality / 3;
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN)
+ max_adjustment_down = active_worst_quality >> 3;
if (max_adjustment_down) {
buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) /
max_adjustment_down);
@@ -779,7 +1034,7 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
} else {
q = active_worst_quality;
}
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
} else {
// Use the lower of active_worst_quality and recent/average Q.
if (cm->current_video_frame > 1) {
@@ -804,21 +1059,8 @@ static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi,
*top_index = active_worst_quality;
*bottom_index = active_best_quality;
-#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
- // Limit Q range for the adaptive loop.
- if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced &&
- !(cm->current_video_frame == 0)) {
- int qdelta = 0;
- vpx_clear_system_state();
- qdelta = vp9_compute_qdelta_by_rate(
- &cpi->rc, cm->frame_type, active_worst_quality, 2.0, cm->bit_depth);
- *top_index = active_worst_quality + qdelta;
- *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index;
- }
-#endif
-
// Special case code to try and match quality with forced key frames
- if (cm->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
+ if (frame_is_intra_only(cm) && rc->this_key_frame_forced) {
q = rc->last_boosted_qindex;
} else {
q = vp9_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
@@ -939,7 +1181,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
if (oxcf->rc_mode == VPX_CQ) {
if (q < cq_level) q = cq_level;
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
@@ -954,7 +1196,7 @@ static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi,
delta_qindex = vp9_compute_qdelta(rc, q, q * 0.50, cm->bit_depth);
active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
} else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
}
} else {
if (oxcf->rc_mode == VPX_Q) {
@@ -1045,78 +1287,143 @@ int vp9_frame_type_qdelta(const VP9_COMP *cpi, int rf_level, int q) {
1.75, // GF_ARF_STD
2.00, // KF_STD
};
- static const FRAME_TYPE frame_type[RATE_FACTOR_LEVELS] = {
- INTER_FRAME, INTER_FRAME, INTER_FRAME, INTER_FRAME, KEY_FRAME
- };
const VP9_COMMON *const cm = &cpi->common;
- int qdelta =
- vp9_compute_qdelta_by_rate(&cpi->rc, frame_type[rf_level], q,
- rate_factor_deltas[rf_level], cm->bit_depth);
+
+ int qdelta = vp9_compute_qdelta_by_rate(
+ &cpi->rc, cm->frame_type, q, rate_factor_deltas[rf_level], cm->bit_depth);
return qdelta;
}
#define STATIC_MOTION_THRESH 95
-static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
- int *top_index) {
+
+static void pick_kf_q_bound_two_pass(const VP9_COMP *cpi, int *bottom_index,
+ int *top_index) {
const VP9_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
- const VP9EncoderConfig *const oxcf = &cpi->oxcf;
- const GF_GROUP *gf_group = &cpi->twopass.gf_group;
- const int cq_level = get_active_cq_level_two_pass(&cpi->twopass, rc, oxcf);
int active_best_quality;
int active_worst_quality = cpi->twopass.active_worst_quality;
- int q;
- int *inter_minq;
- ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
- if (frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) {
+ if (rc->this_key_frame_forced) {
// Handle the special case for key frames forced when we have reached
// the maximum key frame interval. Here force the Q to a range
// based on the ambient Q to reduce the risk of popping.
- if (rc->this_key_frame_forced) {
- double last_boosted_q;
- int delta_qindex;
- int qindex;
-
- if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
- qindex = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
- active_best_quality = qindex;
- last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
- delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
- last_boosted_q * 1.25, cm->bit_depth);
- active_worst_quality =
- VPXMIN(qindex + delta_qindex, active_worst_quality);
- } else {
- qindex = rc->last_boosted_qindex;
- last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
- delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
- last_boosted_q * 0.75, cm->bit_depth);
- active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
- }
+ double last_boosted_q;
+ int delta_qindex;
+ int qindex;
+
+ if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
+ qindex = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
+ active_best_quality = qindex;
+ last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
+ delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
+ last_boosted_q * 1.25, cm->bit_depth);
+ active_worst_quality =
+ VPXMIN(qindex + delta_qindex, active_worst_quality);
} else {
- // Not forced keyframe.
- double q_adj_factor = 1.0;
- double q_val;
- // Baseline value derived from cpi->active_worst_quality and kf boost.
- active_best_quality =
- get_kf_active_quality(rc, active_worst_quality, cm->bit_depth);
+ qindex = rc->last_boosted_qindex;
+ last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth);
+ delta_qindex = vp9_compute_qdelta(rc, last_boosted_q,
+ last_boosted_q * 0.75, cm->bit_depth);
+ active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality);
+ }
+ } else {
+ // Not forced keyframe.
+ double q_adj_factor = 1.0;
+ double q_val;
+ // Baseline value derived from cpi->active_worst_quality and kf boost.
+ active_best_quality =
+ get_kf_active_quality(rc, active_worst_quality, cm->bit_depth);
+ if (cpi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) {
+ active_best_quality /= 4;
+ }
- // Allow somewhat lower kf minq with small image formats.
- if ((cm->width * cm->height) <= (352 * 288)) {
- q_adj_factor -= 0.25;
- }
+ // Dont allow the active min to be lossless (q0) unlesss the max q
+ // already indicates lossless.
+ active_best_quality =
+ VPXMIN(active_worst_quality, VPXMAX(1, active_best_quality));
- // Make a further adjustment based on the kf zero motion measure.
- q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct);
+ // Allow somewhat lower kf minq with small image formats.
+ if ((cm->width * cm->height) <= (352 * 288)) {
+ q_adj_factor -= 0.25;
+ }
- // Convert the adjustment factor to a qindex delta
- // on active_best_quality.
- q_val = vp9_convert_qindex_to_q(active_best_quality, cm->bit_depth);
- active_best_quality +=
- vp9_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
+ // Make a further adjustment based on the kf zero motion measure.
+ q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct);
+
+ // Convert the adjustment factor to a qindex delta
+ // on active_best_quality.
+ q_val = vp9_convert_qindex_to_q(active_best_quality, cm->bit_depth);
+ active_best_quality +=
+ vp9_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth);
+ }
+ *top_index = active_worst_quality;
+ *bottom_index = active_best_quality;
+}
+
+static int rc_constant_q(const VP9_COMP *cpi, int *bottom_index, int *top_index,
+ int gf_group_index) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ const GF_GROUP *gf_group = &cpi->twopass.gf_group;
+ const int is_intra_frame = frame_is_intra_only(cm);
+
+ const int cq_level = get_active_cq_level_two_pass(&cpi->twopass, rc, oxcf);
+
+ int q = cq_level;
+ int active_best_quality = cq_level;
+ int active_worst_quality = cq_level;
+
+ // Key frame qp decision
+ if (is_intra_frame && rc->frames_to_key > 1)
+ pick_kf_q_bound_two_pass(cpi, &active_best_quality, &active_worst_quality);
+
+ // ARF / GF qp decision
+ if (!is_intra_frame && !rc->is_src_frame_alt_ref &&
+ cpi->refresh_alt_ref_frame) {
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
+
+ // Modify best quality for second level arfs. For mode VPX_Q this
+ // becomes the baseline frame q.
+ if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) {
+ const int layer_depth = gf_group->layer_depth[gf_group_index];
+ // linearly fit the frame q depending on the layer depth index from
+ // the base layer ARF.
+ active_best_quality = ((layer_depth - 1) * cq_level +
+ active_best_quality + layer_depth / 2) /
+ layer_depth;
}
- } else if (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ }
+
+ q = active_best_quality;
+ *top_index = active_worst_quality;
+ *bottom_index = active_best_quality;
+ return q;
+}
+
+static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
+ int *top_index, int gf_group_index) {
+ const VP9_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ const GF_GROUP *gf_group = &cpi->twopass.gf_group;
+ const int cq_level = get_active_cq_level_two_pass(&cpi->twopass, rc, oxcf);
+ int active_best_quality;
+ int active_worst_quality = cpi->twopass.active_worst_quality;
+ int q;
+ int *inter_minq;
+ const int boost_frame =
+ !rc->is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame);
+
+ ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq);
+
+ if (oxcf->rc_mode == VPX_Q)
+ return rc_constant_q(cpi, bottom_index, top_index, gf_group_index);
+
+ if (frame_is_intra_only(cm)) {
+ pick_kf_q_bound_two_pass(cpi, &active_best_quality, &active_worst_quality);
+ } else if (boost_frame) {
// Use the lower of active_worst_quality and recent
// average Q as basis for GF/ARF best Q limit unless last frame was
// a key frame.
@@ -1130,62 +1437,56 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
if (oxcf->rc_mode == VPX_CQ) {
if (q < cq_level) q = cq_level;
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
- } else if (oxcf->rc_mode == VPX_Q) {
- if (!cpi->refresh_alt_ref_frame) {
- active_best_quality = cq_level;
- } else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
-
- // Modify best quality for second level arfs. For mode VPX_Q this
- // becomes the baseline frame q.
- if (gf_group->rf_level[gf_group->index] == GF_ARF_LOW)
- active_best_quality = (active_best_quality + cq_level + 1) / 2;
+ // Modify best quality for second level arfs. For mode VPX_Q this
+ // becomes the baseline frame q.
+ if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) {
+ const int layer_depth = gf_group->layer_depth[gf_group_index];
+ // linearly fit the frame q depending on the layer depth index from
+ // the base layer ARF.
+ active_best_quality =
+ ((layer_depth - 1) * q + active_best_quality + layer_depth / 2) /
+ layer_depth;
}
} else {
- active_best_quality = get_gf_active_quality(rc, q, cm->bit_depth);
+ active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth);
}
} else {
- if (oxcf->rc_mode == VPX_Q) {
- active_best_quality = cq_level;
- } else {
- active_best_quality = inter_minq[active_worst_quality];
+ active_best_quality = inter_minq[active_worst_quality];
- // For the constrained quality mode we don't want
- // q to fall below the cq level.
- if ((oxcf->rc_mode == VPX_CQ) && (active_best_quality < cq_level)) {
- active_best_quality = cq_level;
- }
+ // For the constrained quality mode we don't want
+ // q to fall below the cq level.
+ if ((oxcf->rc_mode == VPX_CQ) && (active_best_quality < cq_level)) {
+ active_best_quality = cq_level;
}
}
// Extension to max or min Q if undershoot or overshoot is outside
// the permitted range.
- if (cpi->oxcf.rc_mode != VPX_Q) {
- if (frame_is_intra_only(cm) ||
- (!rc->is_src_frame_alt_ref &&
- (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) {
- active_best_quality -=
- (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
- active_worst_quality += (cpi->twopass.extend_maxq / 2);
- } else {
- active_best_quality -=
- (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
- active_worst_quality += cpi->twopass.extend_maxq;
- }
+ if (frame_is_intra_only(cm) || boost_frame) {
+ active_best_quality -=
+ (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
+ active_worst_quality += (cpi->twopass.extend_maxq / 2);
+ } else {
+ active_best_quality -=
+ (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
+ active_worst_quality += cpi->twopass.extend_maxq;
+
+ // For normal frames do not allow an active minq lower than the q used for
+ // the last boosted frame.
+ active_best_quality = VPXMAX(active_best_quality, rc->last_boosted_qindex);
}
#if LIMIT_QRANGE_FOR_ALTREF_AND_KEY
vpx_clear_system_state();
// Static forced key frames Q restrictions dealt with elsewhere.
- if (!((frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi))) ||
- !rc->this_key_frame_forced ||
- (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) {
- int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group->index],
+ if (!frame_is_intra_only(cm) || !rc->this_key_frame_forced ||
+ cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH) {
+ int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group_index],
active_worst_quality);
active_worst_quality =
VPXMAX(active_worst_quality + qdelta, active_best_quality);
@@ -1205,11 +1506,7 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
active_worst_quality =
clamp(active_worst_quality, active_best_quality, rc->worst_quality);
- if (oxcf->rc_mode == VPX_Q) {
- q = active_best_quality;
- // Special case code to try and match quality with forced key frames.
- } else if ((frame_is_intra_only(cm) || vp9_is_upper_layer_key_frame(cpi)) &&
- rc->this_key_frame_forced) {
+ if (frame_is_intra_only(cm) && rc->this_key_frame_forced) {
// If static since last kf use better of last boosted and last kf q.
if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
q = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
@@ -1242,13 +1539,15 @@ static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index,
int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, int *bottom_index,
int *top_index) {
int q;
+ const int gf_group_index = cpi->twopass.gf_group.index;
if (cpi->oxcf.pass == 0) {
if (cpi->oxcf.rc_mode == VPX_CBR)
q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index);
else
q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index);
} else {
- q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index);
+ q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index,
+ gf_group_index);
}
if (cpi->sf.use_nonrd_pick_mode) {
if (cpi->sf.force_frame_boost == 1) q -= cpi->sf.max_delta_qindex;
@@ -1261,6 +1560,82 @@ int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, int *bottom_index,
return q;
}
+void vp9_configure_buffer_updates(VP9_COMP *cpi, int gf_group_index) {
+ VP9_COMMON *cm = &cpi->common;
+ TWO_PASS *const twopass = &cpi->twopass;
+
+ cpi->rc.is_src_frame_alt_ref = 0;
+ cm->show_existing_frame = 0;
+ switch (twopass->gf_group.update_type[gf_group_index]) {
+ case KF_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 1;
+ break;
+ case LF_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ break;
+ case GF_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ break;
+ case OVERLAY_UPDATE:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 1;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_src_frame_alt_ref = 1;
+ break;
+ case MID_OVERLAY_UPDATE:
+ cpi->refresh_last_frame = 1;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_src_frame_alt_ref = 1;
+ break;
+ case USE_BUF_FRAME:
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 0;
+ cpi->rc.is_src_frame_alt_ref = 1;
+ cm->show_existing_frame = 1;
+ cm->refresh_frame_context = 0;
+ break;
+ default:
+ assert(twopass->gf_group.update_type[gf_group_index] == ARF_UPDATE);
+ cpi->refresh_last_frame = 0;
+ cpi->refresh_golden_frame = 0;
+ cpi->refresh_alt_ref_frame = 1;
+ break;
+ }
+}
+
+void vp9_estimate_qp_gop(VP9_COMP *cpi) {
+ int gop_length = cpi->twopass.gf_group.gf_group_size;
+ int bottom_index, top_index;
+ int idx;
+ const int gf_index = cpi->twopass.gf_group.index;
+ const int is_src_frame_alt_ref = cpi->rc.is_src_frame_alt_ref;
+ const int refresh_frame_context = cpi->common.refresh_frame_context;
+
+ for (idx = 1; idx <= gop_length; ++idx) {
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[idx];
+ int target_rate = cpi->twopass.gf_group.bit_allocation[idx];
+ cpi->twopass.gf_group.index = idx;
+ vp9_rc_set_frame_target(cpi, target_rate);
+ vp9_configure_buffer_updates(cpi, idx);
+ tpl_frame->base_qindex =
+ rc_pick_q_and_bounds_two_pass(cpi, &bottom_index, &top_index, idx);
+ tpl_frame->base_qindex = VPXMAX(tpl_frame->base_qindex, 1);
+ }
+ // Reset the actual index and frame update
+ cpi->twopass.gf_group.index = gf_index;
+ cpi->rc.is_src_frame_alt_ref = is_src_frame_alt_ref;
+ cpi->common.refresh_frame_context = refresh_frame_context;
+ vp9_configure_buffer_updates(cpi, gf_index);
+}
+
void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, int frame_target,
int *frame_under_shoot_limit,
int *frame_over_shoot_limit) {
@@ -1367,7 +1742,8 @@ static void compute_frame_low_motion(VP9_COMP *const cpi) {
int cnt_zeromv = 0;
for (mi_row = 0; mi_row < rows; mi_row++) {
for (mi_col = 0; mi_col < cols; mi_col++) {
- if (abs(mi[0]->mv[0].as_mv.row) < 16 && abs(mi[0]->mv[0].as_mv.col) < 16)
+ if (mi[0]->ref_frame[0] == LAST_FRAME &&
+ abs(mi[0]->mv[0].as_mv.row) < 16 && abs(mi[0]->mv[0].as_mv.col) < 16)
cnt_zeromv++;
mi++;
}
@@ -1381,6 +1757,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
const VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
+ SVC *const svc = &cpi->svc;
const int qindex = cm->base_qindex;
// Update rate control heuristics
@@ -1390,7 +1767,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
vp9_rc_update_rate_correction_factors(cpi);
// Keep a record of last Q and ambient average Q.
- if (cm->frame_type == KEY_FRAME) {
+ if (frame_is_intra_only(cm)) {
rc->last_q[KEY_FRAME] = qindex;
rc->avg_frame_qindex[KEY_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
@@ -1434,13 +1811,13 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
(cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) {
rc->last_boosted_qindex = qindex;
}
- if (cm->frame_type == KEY_FRAME) rc->last_kf_qindex = qindex;
+ if (frame_is_intra_only(cm)) rc->last_kf_qindex = qindex;
- update_buffer_level(cpi, rc->projected_frame_size);
+ update_buffer_level_postencode(cpi, rc->projected_frame_size);
// Rolling monitors of whether we are over or underspending used to help
// regulate min and Max Q in two pass.
- if (cm->frame_type != KEY_FRAME) {
+ if (!frame_is_intra_only(cm)) {
rc->rolling_target_bits = ROUND_POWER_OF_TWO(
rc->rolling_target_bits * 3 + rc->this_frame_target, 2);
rc->rolling_actual_bits = ROUND_POWER_OF_TWO(
@@ -1457,9 +1834,9 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits;
- if (!cpi->use_svc || is_two_pass_svc(cpi)) {
+ if (!cpi->use_svc) {
if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame &&
- (cm->frame_type != KEY_FRAME))
+ (!frame_is_intra_only(cm)))
// Update the alternate reference frame stats as appropriate.
update_alt_ref_frame_stats(cpi);
else
@@ -1467,7 +1844,28 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
update_golden_frame_stats(cpi);
}
- if (cm->frame_type == KEY_FRAME) rc->frames_since_key = 0;
+ // If second (long term) temporal reference is used for SVC,
+ // update the golden frame counter, only for base temporal layer.
+ if (cpi->use_svc && svc->use_gf_temporal_ref_current_layer &&
+ svc->temporal_layer_id == 0) {
+ int i = 0;
+ if (cpi->refresh_golden_frame)
+ rc->frames_since_golden = 0;
+ else
+ rc->frames_since_golden++;
+ // Decrement count down till next gf
+ if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--;
+ // Update the frames_since_golden for all upper temporal layers.
+ for (i = 1; i < svc->number_temporal_layers; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->frames_since_golden = rc->frames_since_golden;
+ }
+ }
+
+ if (frame_is_intra_only(cm)) rc->frames_since_key = 0;
if (cm->show_frame) {
rc->frames_since_key++;
rc->frames_to_key--;
@@ -1481,24 +1879,51 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) {
}
if (oxcf->pass == 0) {
- if (cm->frame_type != KEY_FRAME) {
+ if (!frame_is_intra_only(cm) &&
+ (!cpi->use_svc ||
+ (cpi->use_svc &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame &&
+ svc->spatial_layer_id == svc->number_spatial_layers - 1))) {
compute_frame_low_motion(cpi);
if (cpi->sf.use_altref_onepass) update_altref_usage(cpi);
}
+ // For SVC: set avg_frame_low_motion (only computed on top spatial layer)
+ // to all lower spatial layers.
+ if (cpi->use_svc &&
+ svc->spatial_layer_id == svc->number_spatial_layers - 1) {
+ int i;
+ for (i = 0; i < svc->number_spatial_layers - 1; ++i) {
+ const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->avg_frame_low_motion = rc->avg_frame_low_motion;
+ }
+ }
cpi->rc.last_frame_is_src_altref = cpi->rc.is_src_frame_alt_ref;
}
- if (cm->frame_type != KEY_FRAME) rc->reset_high_source_sad = 0;
+ if (!frame_is_intra_only(cm)) rc->reset_high_source_sad = 0;
rc->last_avg_frame_bandwidth = rc->avg_frame_bandwidth;
+ if (cpi->use_svc && svc->spatial_layer_id < svc->number_spatial_layers - 1)
+ svc->lower_layer_qindex = cm->base_qindex;
}
void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) {
- // Update buffer level with zero size, update frame counters, and return.
- update_buffer_level(cpi, 0);
+ cpi->common.current_video_frame++;
cpi->rc.frames_since_key++;
cpi->rc.frames_to_key--;
cpi->rc.rc_2_frame = 0;
cpi->rc.rc_1_frame = 0;
+ cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth;
+ // For SVC on dropped frame when framedrop_mode != LAYER_DROP:
+ // in this mode the whole superframe may be dropped if only a single layer
+ // has buffer underflow (below threshold). Since this can then lead to
+ // increasing buffer levels/overflow for certain layers even though whole
+ // superframe is dropped, we cap buffer level if its already stable.
+ if (cpi->use_svc && cpi->svc.framedrop_mode != LAYER_DROP &&
+ cpi->rc.buffer_level > cpi->rc.optimal_buffer_level)
+ cpi->rc.buffer_level = cpi->rc.optimal_buffer_level;
}
static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
@@ -1544,10 +1969,9 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
int target;
- // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
if (!cpi->refresh_alt_ref_frame &&
(cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
- rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
+ rc->frames_to_key == 0)) {
cm->frame_type = KEY_FRAME;
rc->this_key_frame_forced =
cm->current_video_frame != 0 && rc->frames_to_key == 0;
@@ -1582,9 +2006,8 @@ void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
// Adjust boost and af_ratio based on avg_frame_low_motion, which varies
// between 0 and 100 (stationary, 100% zero/small motion).
rc->gfu_boost =
- VPXMAX(500,
- DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
- (rc->avg_frame_low_motion + 100));
+ VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
+ (rc->avg_frame_low_motion + 100));
rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400));
}
adjust_gfint_frame_constraint(cpi, rc->frames_to_key);
@@ -1684,30 +2107,80 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) {
return vp9_rc_clamp_iframe_target_size(cpi, target);
}
+static void set_intra_only_frame(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
+ // Don't allow intra_only frame for bypass/flexible SVC mode, or if number
+ // of spatial layers is 1 or if number of spatial or temporal layers > 3.
+ // Also if intra-only is inserted on very first frame, don't allow if
+ // if number of temporal layers > 1. This is because on intra-only frame
+ // only 3 reference buffers can be updated, but for temporal layers > 1
+ // we generally need to use buffer slots 4 and 5.
+ if ((cm->current_video_frame == 0 && svc->number_temporal_layers > 1) ||
+ svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS ||
+ svc->number_spatial_layers > 3 || svc->number_temporal_layers > 3 ||
+ svc->number_spatial_layers == 1)
+ return;
+ cm->show_frame = 0;
+ cm->intra_only = 1;
+ cm->frame_type = INTER_FRAME;
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->ext_refresh_last_frame = 1;
+ cpi->ext_refresh_golden_frame = 1;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ if (cm->current_video_frame == 0) {
+ cpi->lst_fb_idx = 0;
+ cpi->gld_fb_idx = 1;
+ cpi->alt_fb_idx = 2;
+ } else {
+ int i;
+ int count = 0;
+ cpi->lst_fb_idx = -1;
+ cpi->gld_fb_idx = -1;
+ cpi->alt_fb_idx = -1;
+ // For intra-only frame we need to refresh all slots that were
+ // being used for the base layer (fb_idx_base[i] == 1).
+ // Start with assigning last first, then golden and then alt.
+ for (i = 0; i < REF_FRAMES; ++i) {
+ if (svc->fb_idx_base[i] == 1) count++;
+ if (count == 1 && cpi->lst_fb_idx == -1) cpi->lst_fb_idx = i;
+ if (count == 2 && cpi->gld_fb_idx == -1) cpi->gld_fb_idx = i;
+ if (count == 3 && cpi->alt_fb_idx == -1) cpi->alt_fb_idx = i;
+ }
+ // If golden or alt is not being used for base layer, then set them
+ // to the lst_fb_idx.
+ if (cpi->gld_fb_idx == -1) cpi->gld_fb_idx = cpi->lst_fb_idx;
+ if (cpi->alt_fb_idx == -1) cpi->alt_fb_idx = cpi->lst_fb_idx;
+ }
+}
+
void vp9_rc_get_svc_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ SVC *const svc = &cpi->svc;
int target = rc->avg_frame_bandwidth;
- int layer =
- LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
+ int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ if (svc->first_spatial_layer_to_encode)
+ svc->layer_context[svc->temporal_layer_id].is_key_frame = 0;
// Periodic key frames is based on the super-frame counter
// (svc.current_superframe), also only base spatial layer is key frame.
- if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
+ // Key frame is set for any of the following: very first frame, frame flags
+ // indicates key, superframe counter hits key frequencey, or (non-intra) sync
+ // flag is set for spatial layer 0.
+ if ((cm->current_video_frame == 0 && !svc->previous_frame_is_intra_only) ||
+ (cpi->frame_flags & FRAMEFLAGS_KEY) ||
(cpi->oxcf.auto_key &&
- (cpi->svc.current_superframe % cpi->oxcf.key_freq == 0) &&
- cpi->svc.spatial_layer_id == 0)) {
+ (svc->current_superframe % cpi->oxcf.key_freq == 0) &&
+ !svc->previous_frame_is_intra_only && svc->spatial_layer_id == 0) ||
+ (svc->spatial_layer_sync[0] == 1 && svc->spatial_layer_id == 0)) {
cm->frame_type = KEY_FRAME;
rc->source_alt_ref_active = 0;
- if (is_two_pass_svc(cpi)) {
- cpi->svc.layer_context[layer].is_key_frame = 1;
- cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
- } else if (is_one_pass_cbr_svc(cpi)) {
- if (cm->current_video_frame > 0) vp9_svc_reset_key_frame(cpi);
- layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id,
- cpi->svc.temporal_layer_id,
- cpi->svc.number_temporal_layers);
- cpi->svc.layer_context[layer].is_key_frame = 1;
+ if (is_one_pass_cbr_svc(cpi)) {
+ if (cm->current_video_frame > 0) vp9_svc_reset_temporal_layers(cpi, 1);
+ layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
+ svc->number_temporal_layers);
+ svc->layer_context[layer].is_key_frame = 1;
cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG);
// Assumption here is that LAST_FRAME is being updated for a keyframe.
// Thus no change in update flags.
@@ -1715,48 +2188,84 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) {
}
} else {
cm->frame_type = INTER_FRAME;
- if (is_two_pass_svc(cpi)) {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
- if (cpi->svc.spatial_layer_id == 0) {
- lc->is_key_frame = 0;
- } else {
- lc->is_key_frame =
- cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
- if (lc->is_key_frame) cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
- }
- cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
- } else if (is_one_pass_cbr_svc(cpi)) {
- LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
- if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode) {
- lc->is_key_frame = 0;
- } else {
- lc->is_key_frame =
- cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame;
- }
+ if (is_one_pass_cbr_svc(cpi)) {
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ // Add condition current_video_frame > 0 for the case where first frame
+ // is intra only followed by overlay/copy frame. In this case we don't
+ // want to reset is_key_frame to 0 on overlay/copy frame.
+ lc->is_key_frame =
+ (svc->spatial_layer_id == 0 && cm->current_video_frame > 0)
+ ? 0
+ : svc->layer_context[svc->temporal_layer_id].is_key_frame;
target = calc_pframe_target_size_one_pass_cbr(cpi);
}
}
+ // Check if superframe contains a sync layer request.
+ vp9_svc_check_spatial_layer_sync(cpi);
+
+ // If long term termporal feature is enabled, set the period of the update.
+ // The update/refresh of this reference frame is always on base temporal
+ // layer frame.
+ if (svc->use_gf_temporal_ref_current_layer) {
+ // Only use gf long-term prediction on non-key superframes.
+ if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ // Use golden for this reference, which will be used for prediction.
+ int index = svc->spatial_layer_id;
+ if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1;
+ assert(index >= 0);
+ cpi->gld_fb_idx = svc->buffer_gf_temporal_ref[index].idx;
+ // Enable prediction off LAST (last reference) and golden (which will
+ // generally be further behind/long-term reference).
+ cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
+ }
+ // Check for update/refresh of reference: only refresh on base temporal
+ // layer.
+ if (svc->temporal_layer_id == 0) {
+ if (svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ // On key frame we update the buffer index used for long term reference.
+ // Use the alt_ref since it is not used or updated on key frames.
+ int index = svc->spatial_layer_id;
+ if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1;
+ assert(index >= 0);
+ cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ } else if (rc->frames_till_gf_update_due == 0) {
+ // Set perdiod of next update. Make it a multiple of 10, as the cyclic
+ // refresh is typically ~10%, and we'd like the update to happen after
+ // a few cylces of the refresh (so it better quality frame). Note the
+ // cyclic refresh for SVC only operates on base temporal layer frames.
+ // Choose 20 as perdiod for now (2 cycles).
+ rc->baseline_gf_interval = 20;
+ rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ cpi->ext_refresh_golden_frame = 1;
+ rc->gfu_boost = DEFAULT_GF_BOOST;
+ }
+ }
+ } else if (!svc->use_gf_temporal_ref) {
+ rc->frames_till_gf_update_due = INT_MAX;
+ rc->baseline_gf_interval = INT_MAX;
+ }
+ if (svc->set_intra_only_frame) {
+ set_intra_only_frame(cpi);
+ target = calc_iframe_target_size_one_pass_cbr(cpi);
+ }
// Any update/change of global cyclic refresh parameters (amount/delta-qp)
// should be done here, before the frame qp is selected.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_update_parameters(cpi);
vp9_rc_set_frame_target(cpi, target);
- rc->frames_till_gf_update_due = INT_MAX;
- rc->baseline_gf_interval = INT_MAX;
+ if (cm->show_frame) update_buffer_level_svc_preencode(cpi);
}
void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
int target;
- // TODO(yaowu): replace the "auto_key && 0" below with proper decision logic.
- if ((cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
- rc->frames_to_key == 0 || (cpi->oxcf.auto_key && 0))) {
+ if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
+ rc->frames_to_key == 0) {
cm->frame_type = KEY_FRAME;
- rc->this_key_frame_forced =
- cm->current_video_frame != 0 && rc->frames_to_key == 0;
rc->frames_to_key = cpi->oxcf.key_freq;
rc->kf_boost = DEFAULT_KF_BOOST;
rc->source_alt_ref_active = 0;
@@ -1782,12 +2291,15 @@ void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) {
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
vp9_cyclic_refresh_update_parameters(cpi);
- if (cm->frame_type == KEY_FRAME)
+ if (frame_is_intra_only(cm))
target = calc_iframe_target_size_one_pass_cbr(cpi);
else
target = calc_pframe_target_size_one_pass_cbr(cpi);
vp9_rc_set_frame_target(cpi, target);
+
+ if (cm->show_frame) update_buffer_level_preencode(cpi);
+
if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC)
cpi->resize_pending = vp9_resize_one_pass_cbr(cpi);
else
@@ -1859,13 +2371,8 @@ void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi,
rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
cpi->framerate, rc->min_gf_interval);
- // Extended interval for genuinely static scenes
- rc->static_scene_max_gf_interval = MAX_LAG_BUFFERS * 2;
-
- if (is_altref_enabled(cpi)) {
- if (rc->static_scene_max_gf_interval > oxcf->lag_in_frames - 1)
- rc->static_scene_max_gf_interval = oxcf->lag_in_frames - 1;
- }
+ // Extended max interval for genuinely static scenes like slide shows.
+ rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;
if (rc->max_gf_interval > rc->static_scene_max_gf_interval)
rc->max_gf_interval = rc->static_scene_max_gf_interval;
@@ -1909,12 +2416,12 @@ void vp9_rc_update_framerate(VP9_COMP *cpi) {
VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS);
// A maximum bitrate for a frame is defined.
- // The baseline for this aligns with HW implementations that
- // can support decode of 1080P content up to a bitrate of MAX_MB_RATE bits
- // per 16x16 MB (averaged over a frame). However this limit is extended if
- // a very high rate is given on the command line or the the rate cannnot
- // be acheived because of a user specificed max q (e.g. when the user
- // specifies lossless encode.
+ // However this limit is extended if a very high rate is given on the command
+ // line or the the rate cannnot be acheived because of a user specificed max q
+ // (e.g. when the user specifies lossless encode).
+ //
+ // If a level is specified that requires a lower maximum rate then the level
+ // value take precedence.
vbr_max_bits =
(int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) /
100);
@@ -2271,30 +2778,56 @@ static void adjust_gf_boost_lag_one_pass_vbr(VP9_COMP *cpi,
void vp9_scene_detection_onepass(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ YV12_BUFFER_CONFIG const *unscaled_src = cpi->un_scaled_source;
+ YV12_BUFFER_CONFIG const *unscaled_last_src = cpi->unscaled_last_source;
+ uint8_t *src_y;
+ int src_ystride;
+ int src_width;
+ int src_height;
+ uint8_t *last_src_y;
+ int last_src_ystride;
+ int last_src_width;
+ int last_src_height;
+ if (cpi->un_scaled_source == NULL || cpi->unscaled_last_source == NULL ||
+ (cpi->use_svc && cpi->svc.current_superframe == 0))
+ return;
+ src_y = unscaled_src->y_buffer;
+ src_ystride = unscaled_src->y_stride;
+ src_width = unscaled_src->y_width;
+ src_height = unscaled_src->y_height;
+ last_src_y = unscaled_last_src->y_buffer;
+ last_src_ystride = unscaled_last_src->y_stride;
+ last_src_width = unscaled_last_src->y_width;
+ last_src_height = unscaled_last_src->y_height;
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) return;
#endif
rc->high_source_sad = 0;
- if (cpi->Last_Source != NULL &&
- cpi->Last_Source->y_width == cpi->Source->y_width &&
- cpi->Last_Source->y_height == cpi->Source->y_height) {
+ rc->high_num_blocks_with_motion = 0;
+ // For SVC: scene detection is only checked on first spatial layer of
+ // the superframe using the original/unscaled resolutions.
+ if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode &&
+ src_width == last_src_width && src_height == last_src_height) {
YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL };
- uint8_t *src_y = cpi->Source->y_buffer;
- int src_ystride = cpi->Source->y_stride;
- uint8_t *last_src_y = cpi->Last_Source->y_buffer;
- int last_src_ystride = cpi->Last_Source->y_stride;
+ int num_mi_cols = cm->mi_cols;
+ int num_mi_rows = cm->mi_rows;
int start_frame = 0;
int frames_to_buffer = 1;
int frame = 0;
int scene_cut_force_key_frame = 0;
+ int num_zero_temp_sad = 0;
uint64_t avg_sad_current = 0;
- uint32_t min_thresh = 4000;
+ uint32_t min_thresh = 10000;
float thresh = 8.0f;
uint32_t thresh_key = 140000;
if (cpi->oxcf.speed <= 5) thresh_key = 240000;
- if (cpi->oxcf.rc_mode == VPX_VBR) {
- min_thresh = 65000;
- thresh = 2.1f;
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) min_thresh = 65000;
+ if (cpi->oxcf.rc_mode == VPX_VBR) thresh = 2.1f;
+ if (cpi->use_svc && cpi->svc.number_spatial_layers > 1) {
+ const int aligned_width = ALIGN_POWER_OF_TWO(src_width, MI_SIZE_LOG2);
+ const int aligned_height = ALIGN_POWER_OF_TWO(src_height, MI_SIZE_LOG2);
+ num_mi_cols = aligned_width >> MI_SIZE_LOG2;
+ num_mi_rows = aligned_height >> MI_SIZE_LOG2;
}
if (cpi->oxcf.lag_in_frames > 0) {
frames_to_buffer = (cm->current_video_frame == 1)
@@ -2342,14 +2875,15 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
uint64_t avg_sad = 0;
uint64_t tmp_sad = 0;
int num_samples = 0;
- int sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
- int sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
+ int sb_cols = (num_mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
+ int sb_rows = (num_mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE;
if (cpi->oxcf.lag_in_frames > 0) {
src_y = frames[frame]->y_buffer;
src_ystride = frames[frame]->y_stride;
last_src_y = frames[frame + 1]->y_buffer;
last_src_ystride = frames[frame + 1]->y_stride;
}
+ num_zero_temp_sad = 0;
for (sbi_row = 0; sbi_row < sb_rows; ++sbi_row) {
for (sbi_col = 0; sbi_col < sb_cols; ++sbi_col) {
// Checker-board pattern, ignore boundary.
@@ -2361,6 +2895,7 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
last_src_ystride);
avg_sad += tmp_sad;
num_samples++;
+ if (tmp_sad == 0) num_zero_temp_sad++;
}
src_y += 64;
last_src_y += 64;
@@ -2377,7 +2912,8 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
if (avg_sad >
VPXMAX(min_thresh,
(unsigned int)(rc->avg_source_sad[0] * thresh)) &&
- rc->frames_since_key > 1)
+ rc->frames_since_key > 1 + cpi->svc.number_spatial_layers &&
+ num_zero_temp_sad < 3 * (num_samples >> 2))
rc->high_source_sad = 1;
else
rc->high_source_sad = 0;
@@ -2388,6 +2924,8 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
} else {
rc->avg_source_sad[lagframe_idx] = avg_sad;
}
+ if (num_zero_temp_sad < (num_samples >> 1))
+ rc->high_num_blocks_with_motion = 1;
}
}
// For CBR non-screen content mode, check if we should reset the rate
@@ -2407,6 +2945,19 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
if (cm->frame_type != KEY_FRAME && rc->reset_high_source_sad)
rc->this_frame_target = rc->avg_frame_bandwidth;
}
+ // For SVC the new (updated) avg_source_sad[0] for the current superframe
+ // updates the setting for all layers.
+ if (cpi->use_svc) {
+ int sl, tl;
+ SVC *const svc = &cpi->svc;
+ for (sl = 0; sl < svc->number_spatial_layers; ++sl)
+ for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
+ int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
+ LAYER_CONTEXT *const lc = &svc->layer_context[layer];
+ RATE_CONTROL *const lrc = &lc->rc;
+ lrc->avg_source_sad[0] = rc->avg_source_sad[0];
+ }
+ }
// For VBR, under scene change/high content change, force golden refresh.
if (cpi->oxcf.rc_mode == VPX_VBR && cm->frame_type != KEY_FRAME &&
rc->high_source_sad && rc->frames_to_key > 3 &&
@@ -2437,12 +2988,26 @@ void vp9_scene_detection_onepass(VP9_COMP *cpi) {
// Test if encoded frame will significantly overshoot the target bitrate, and
// if so, set the QP, reset/adjust some rate control parameters, and return 1.
+// frame_size = -1 means frame has not been encoded.
int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
VP9_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
- int thresh_qp = 3 * (rc->worst_quality >> 2);
- int thresh_rate = rc->avg_frame_bandwidth * 10;
- if (cm->base_qindex < thresh_qp && frame_size > thresh_rate) {
+ SPEED_FEATURES *const sf = &cpi->sf;
+ int thresh_qp = 7 * (rc->worst_quality >> 3);
+ int thresh_rate = rc->avg_frame_bandwidth << 3;
+ // Lower thresh_qp for video (more overshoot at lower Q) to be
+ // more conservative for video.
+ if (cpi->oxcf.content != VP9E_CONTENT_SCREEN)
+ thresh_qp = rc->worst_quality >> 1;
+ // If this decision is not based on an encoded frame size but just on
+ // scene/slide change detection (i.e., re_encode_overshoot_cbr_rt ==
+ // FAST_DETECTION_MAXQ), for now skip the (frame_size > thresh_rate)
+ // condition in this case.
+ // TODO(marpan): Use a better size/rate condition for this case and
+ // adjust thresholds.
+ if ((sf->overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ ||
+ frame_size > thresh_rate) &&
+ cm->base_qindex < thresh_qp) {
double rate_correction_factor =
cpi->rc.rate_correction_factors[INTER_NORMAL];
const int target_size = cpi->rc.avg_frame_bandwidth;
@@ -2452,6 +3017,29 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
int enumerator;
// Force a re-encode, and for now use max-QP.
*q = cpi->rc.worst_quality;
+ cpi->cyclic_refresh->counter_encode_maxq_scene_change = 0;
+ cpi->rc.re_encode_maxq_scene_change = 1;
+ // If the frame_size is much larger than the threshold (big content change)
+ // and the encoded frame used alot of Intra modes, then force hybrid_intra
+ // encoding for the re-encode on this scene change. hybrid_intra will
+ // use rd-based intra mode selection for small blocks.
+ if (sf->overshoot_detection_cbr_rt == RE_ENCODE_MAXQ &&
+ frame_size > (thresh_rate << 1) && cpi->svc.spatial_layer_id == 0) {
+ MODE_INFO **mi = cm->mi_grid_visible;
+ int sum_intra_usage = 0;
+ int mi_row, mi_col;
+ int tot = 0;
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) {
+ if (mi[0]->ref_frame[0] == INTRA_FRAME) sum_intra_usage++;
+ tot++;
+ mi++;
+ }
+ mi += 8;
+ }
+ sum_intra_usage = 100 * sum_intra_usage / (cm->mi_rows * cm->mi_cols);
+ if (sum_intra_usage > 60) cpi->rc.hybrid_intra_scene_change = 1;
+ }
// Adjust avg_frame_qindex, buffer_level, and rate correction factors, as
// these parameters will affect QP selection for subsequent frames. If they
// have settled down to a very different (low QP) state, then not adjusting
@@ -2479,21 +3067,27 @@ int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) {
cpi->rc.rate_correction_factors[INTER_NORMAL] = rate_correction_factor;
}
// For temporal layers, reset the rate control parametes across all
- // temporal layers.
+ // temporal layers. If the first_spatial_layer_to_encode > 0, then this
+ // superframe has skipped lower base layers. So in this case we should also
+ // reset and force max-q for spatial layers < first_spatial_layer_to_encode.
if (cpi->use_svc) {
- int i = 0;
+ int tl = 0;
+ int sl = 0;
SVC *svc = &cpi->svc;
- for (i = 0; i < svc->number_temporal_layers; ++i) {
- const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i,
- svc->number_temporal_layers);
- LAYER_CONTEXT *lc = &svc->layer_context[layer];
- RATE_CONTROL *lrc = &lc->rc;
- lrc->avg_frame_qindex[INTER_FRAME] = *q;
- lrc->buffer_level = rc->optimal_buffer_level;
- lrc->bits_off_target = rc->optimal_buffer_level;
- lrc->rc_1_frame = 0;
- lrc->rc_2_frame = 0;
- lrc->rate_correction_factors[INTER_NORMAL] = rate_correction_factor;
+ for (sl = 0; sl < svc->first_spatial_layer_to_encode; ++sl) {
+ for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
+ const int layer =
+ LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ RATE_CONTROL *lrc = &lc->rc;
+ lrc->avg_frame_qindex[INTER_FRAME] = *q;
+ lrc->buffer_level = lrc->optimal_buffer_level;
+ lrc->bits_off_target = lrc->optimal_buffer_level;
+ lrc->rc_1_frame = 0;
+ lrc->rc_2_frame = 0;
+ lrc->rate_correction_factors[INTER_NORMAL] = rate_correction_factor;
+ lrc->force_max_q = 1;
+ }
}
}
return 1;
diff --git a/libvpx/vp9/encoder/vp9_ratectrl.h b/libvpx/vp9/encoder/vp9_ratectrl.h
index c1b210677..a5c1f4cf0 100644
--- a/libvpx/vp9/encoder/vp9_ratectrl.h
+++ b/libvpx/vp9/encoder/vp9_ratectrl.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_RATECTRL_H_
-#define VP9_ENCODER_VP9_RATECTRL_H_
+#ifndef VPX_VP9_ENCODER_VP9_RATECTRL_H_
+#define VPX_VP9_ENCODER_VP9_RATECTRL_H_
#include "vpx/vpx_codec.h"
#include "vpx/vpx_integer.h"
@@ -34,6 +34,14 @@ extern "C" {
#define FRAME_OVERHEAD_BITS 200
+// Threshold used to define a KF group as static (e.g. a slide show).
+// Essentially this means that no frame in the group has more than 1% of MBs
+// that are not marked as coded with 0,0 motion in the first pass.
+#define STATIC_KF_GROUP_THRESH 99
+
+// The maximum duration of a GF group that is static (for example a slide show).
+#define MAX_STATIC_GF_GROUP_LENGTH 250
+
typedef enum {
INTER_NORMAL = 0,
INTER_HIGH = 1,
@@ -167,15 +175,28 @@ typedef struct {
uint64_t avg_source_sad[MAX_LAG_BUFFERS];
uint64_t prev_avg_source_sad_lag;
int high_source_sad_lagindex;
+ int high_num_blocks_with_motion;
int alt_ref_gf_group;
int last_frame_is_src_altref;
int high_source_sad;
int count_last_scene_change;
+ int hybrid_intra_scene_change;
+ int re_encode_maxq_scene_change;
int avg_frame_low_motion;
int af_ratio_onepass_vbr;
int force_qpmin;
int reset_high_source_sad;
double perc_arf_usage;
+ int force_max_q;
+ // Last frame was dropped post encode on scene change.
+ int last_post_encode_dropped_scene_change;
+ // Enable post encode frame dropping for screen content. Only enabled when
+ // ext_use_post_encode_drop is enabled by user.
+ int use_post_encode_drop;
+ // External flag to enable post encode frame dropping, controlled by user.
+ int ext_use_post_encode_drop;
+
+ int damped_adjustment[RATE_FACTOR_LEVELS];
} RATE_CONTROL;
struct VP9_COMP;
@@ -184,7 +205,7 @@ struct VP9EncoderConfig;
void vp9_rc_init(const struct VP9EncoderConfig *oxcf, int pass,
RATE_CONTROL *rc);
-int vp9_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
+int vp9_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs,
double correction_factor, vpx_bit_depth_t bit_depth);
double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth);
@@ -195,9 +216,9 @@ void vp9_rc_init_minq_luts(void);
int vp9_rc_get_default_min_gf_interval(int width, int height, double framerate);
// Note vp9_rc_get_default_max_gf_interval() requires the min_gf_interval to
-// be passed in to ensure that the max_gf_interval returned is at least as bis
+// be passed in to ensure that the max_gf_interval returned is at least as big
// as that.
-int vp9_rc_get_default_max_gf_interval(double framerate, int min_frame_rate);
+int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval);
// Generally at the high level, the following flow is expected
// to be enforced for rate control:
@@ -237,13 +258,16 @@ void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi);
// Changes only the rate correction factors in the rate control structure.
void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi);
+// Post encode drop for CBR mode.
+int post_encode_drop_cbr(struct VP9_COMP *cpi, size_t *size);
+
// Decide if we should drop this frame: For 1-pass CBR.
// Changes only the decimation count in the rate control structure
int vp9_rc_drop_frame(struct VP9_COMP *cpi);
// Computes frame size bounds.
void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi,
- int this_frame_target,
+ int frame_target,
int *frame_under_shoot_limit,
int *frame_over_shoot_limit);
@@ -294,8 +318,12 @@ void vp9_scene_detection_onepass(struct VP9_COMP *cpi);
int vp9_encodedframe_overshoot(struct VP9_COMP *cpi, int frame_size, int *q);
+void vp9_configure_buffer_updates(struct VP9_COMP *cpi, int gf_group_index);
+
+void vp9_estimate_qp_gop(struct VP9_COMP *cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_RATECTRL_H_
+#endif // VPX_VP9_ENCODER_VP9_RATECTRL_H_
diff --git a/libvpx/vp9/encoder/vp9_rd.c b/libvpx/vp9/encoder/vp9_rd.c
index 6b2306ce9..894b1497b 100644
--- a/libvpx/vp9/encoder/vp9_rd.c
+++ b/libvpx/vp9/encoder/vp9_rd.c
@@ -69,10 +69,12 @@ static void fill_mode_costs(VP9_COMP *cpi) {
const FRAME_CONTEXT *const fc = cpi->common.fc;
int i, j;
- for (i = 0; i < INTRA_MODES; ++i)
- for (j = 0; j < INTRA_MODES; ++j)
+ for (i = 0; i < INTRA_MODES; ++i) {
+ for (j = 0; j < INTRA_MODES; ++j) {
vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
vp9_intra_mode_tree);
+ }
+ }
vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
for (i = 0; i < INTRA_MODES; ++i) {
@@ -82,9 +84,28 @@ static void fill_mode_costs(VP9_COMP *cpi) {
fc->uv_mode_prob[i], vp9_intra_mode_tree);
}
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
+ for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
vp9_cost_tokens(cpi->switchable_interp_costs[i],
fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
+ }
+
+ for (i = TX_8X8; i < TX_SIZES; ++i) {
+ for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
+ const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
+ int k;
+ for (k = 0; k <= i; ++k) {
+ int cost = 0;
+ int m;
+ for (m = 0; m <= k - (k == i); ++m) {
+ if (m == k)
+ cost += vp9_cost_zero(tx_probs[m]);
+ else
+ cost += vp9_cost_one(tx_probs[m]);
+ }
+ cpi->tx_size_cost[i - 1][j][k] = cost;
+ }
+ }
+ }
}
static void fill_token_costs(vp9_coeff_cost *c,
@@ -143,40 +164,74 @@ void vp9_init_me_luts(void) {
static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
8, 8, 4, 4, 2, 2, 1, 0 };
-static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
- 128, 144 };
-int64_t vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
- const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
+// Note that the element below for frame type "USE_BUF_FRAME", which indicates
+// that the show frame flag is set, should not be used as no real frame
+// is encoded so we should not reach here. However, a dummy value
+// is inserted here to make sure the data structure has the right number
+// of values assigned.
+static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
+ 128, 144, 144 };
+
+int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
+ // largest dc_quant is 21387, therefore rdmult should always fit in int32_t
+ const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
+ uint32_t rdmult = q * q;
+
+ if (cpi->common.frame_type != KEY_FRAME) {
+ if (qindex < 128)
+ rdmult = rdmult * 4;
+ else if (qindex < 190)
+ rdmult = rdmult * 4 + rdmult / 2;
+ else
+ rdmult = rdmult * 3;
+ } else {
+ if (qindex < 64)
+ rdmult = rdmult * 4;
+ else if (qindex <= 128)
+ rdmult = rdmult * 3 + rdmult / 2;
+ else if (qindex < 190)
+ rdmult = rdmult * 4 + rdmult / 2;
+ else
+ rdmult = rdmult * 7 + rdmult / 2;
+ }
#if CONFIG_VP9_HIGHBITDEPTH
- int64_t rdmult = 0;
switch (cpi->common.bit_depth) {
- case VPX_BITS_8: rdmult = 88 * q * q / 24; break;
- case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break;
- case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break;
- default:
- assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
- return -1;
+ case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
+ case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
+ default: break;
}
-#else
- int64_t rdmult = 88 * q * q / 24;
#endif // CONFIG_VP9_HIGHBITDEPTH
- return rdmult;
+ return rdmult > 0 ? rdmult : 1;
}
-int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
- int64_t rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
-
+static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
+ int64_t rdmult_64 = rdmult;
if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
- const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
+ const int gfu_boost = cpi->multi_layer_arf
+ ? gf_group->gfu_boost[gf_group->index]
+ : cpi->rc.gfu_boost;
+ const int boost_index = VPXMIN(15, (gfu_boost / 100));
- rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
- rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
+ rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
+ rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
}
- if (rdmult < 1) rdmult = 1;
- return (int)rdmult;
+ return (int)rdmult_64;
+}
+
+int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
+ int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
+ return modulate_rdmult(cpi, rdmult);
+}
+
+int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
+ int rdmult =
+ vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
+ rdmult = (int)((double)rdmult / beta);
+ rdmult = rdmult > 0 ? rdmult : 1;
+ return modulate_rdmult(cpi, rdmult);
}
static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
@@ -185,10 +240,10 @@ static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
switch (bit_depth) {
case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
- case VPX_BITS_12: q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break;
default:
- assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
- return -1;
+ assert(bit_depth == VPX_BITS_12);
+ q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
+ break;
}
#else
(void)bit_depth;
@@ -209,12 +264,11 @@ void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
x->sadperbit16 = sad_per_bit16lut_10[qindex];
x->sadperbit4 = sad_per_bit4lut_10[qindex];
break;
- case VPX_BITS_12:
+ default:
+ assert(cpi->common.bit_depth == VPX_BITS_12);
x->sadperbit16 = sad_per_bit16lut_12[qindex];
x->sadperbit4 = sad_per_bit4lut_12[qindex];
break;
- default:
- assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
}
#else
(void)cpi;
@@ -471,13 +525,13 @@ void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
for (i = 0; i < num_4x4_h; i += 4)
t_left[i] = !!*(const uint32_t *)&left[i];
break;
- case TX_32X32:
+ default:
+ assert(tx_size == TX_32X32);
for (i = 0; i < num_4x4_w; i += 8)
t_above[i] = !!*(const uint64_t *)&above[i];
for (i = 0; i < num_4x4_h; i += 8)
t_left[i] = !!*(const uint64_t *)&left[i];
break;
- default: assert(0 && "Invalid transform size."); break;
}
}
@@ -493,8 +547,7 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
uint8_t *src_y_ptr = x->plane[0].src.buf;
uint8_t *ref_y_ptr;
const int num_mv_refs =
- MAX_MV_REF_CANDIDATES +
- (cpi->sf.adaptive_motion_search && block_size < x->max_partition_size);
+ MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
MV pred_mv[3];
pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
@@ -504,11 +557,12 @@ void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
+
// Get the sad for each candidate reference mv.
for (i = 0; i < num_mv_refs; ++i) {
const MV *this_mv = &pred_mv[i];
int fp_row, fp_col;
-
+ if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
if (i == 1 && near_same_nearest) continue;
fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
@@ -573,6 +627,7 @@ YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
const VP9_COMMON *const cm = &cpi->common;
const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
+ assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
? &cm->buffer_pool->frame_bufs[scaled_idx].buf
: NULL;
diff --git a/libvpx/vp9/encoder/vp9_rd.h b/libvpx/vp9/encoder/vp9_rd.h
index 59022c106..fa85f2176 100644
--- a/libvpx/vp9/encoder/vp9_rd.h
+++ b/libvpx/vp9/encoder/vp9_rd.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_RD_H_
-#define VP9_ENCODER_VP9_RD_H_
+#ifndef VPX_VP9_ENCODER_VP9_RD_H_
+#define VPX_VP9_ENCODER_VP9_RD_H_
#include <limits.h>
@@ -108,9 +108,14 @@ typedef struct RD_OPT {
int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES];
int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+#if CONFIG_CONSISTENT_RECODE
+ int64_t prediction_type_threshes_prev[MAX_REF_FRAMES][REFERENCE_MODES];
+ int64_t filter_threshes_prev[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS];
+#endif
int RDMULT;
int RDDIV;
+ double r0;
} RD_OPT;
typedef struct RD_COST {
@@ -129,16 +134,17 @@ struct TileDataEnc;
struct VP9_COMP;
struct macroblock;
-int64_t vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi,
- int qindex);
+int vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, int qindex);
int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex);
+int vp9_get_adaptive_rdmult(const struct VP9_COMP *cpi, double beta);
+
void vp9_initialize_rd_consts(struct VP9_COMP *cpi);
void vp9_initialize_me_consts(struct VP9_COMP *cpi, MACROBLOCK *x, int qindex);
-void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n,
+void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
unsigned int qstep, int *rate, int64_t *dist);
void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
@@ -169,8 +175,8 @@ void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi);
void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi);
-void vp9_update_rd_thresh_fact(int (*fact)[MAX_MODES], int rd_thresh, int bsize,
- int best_mode_index);
+void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
+ int bsize, int best_mode_index);
static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
const int *const thresh_fact) {
@@ -212,4 +218,4 @@ unsigned int vp9_high_get_sby_perpixel_variance(struct VP9_COMP *cpi,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_RD_H_
+#endif // VPX_VP9_ENCODER_VP9_RD_H_
diff --git a/libvpx/vp9/encoder/vp9_rdopt.c b/libvpx/vp9/encoder/vp9_rdopt.c
index 2ba6378c5..debe88f9d 100644
--- a/libvpx/vp9/encoder/vp9_rdopt.c
+++ b/libvpx/vp9/encoder/vp9_rdopt.c
@@ -59,7 +59,9 @@ typedef struct {
MV_REFERENCE_FRAME ref_frame[2];
} MODE_DEFINITION;
-typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION;
+typedef struct {
+ MV_REFERENCE_FRAME ref_frame[2];
+} REF_DEFINITION;
struct rdcost_block_args {
const VP9_COMP *cpi;
@@ -541,8 +543,9 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &xd->plane[plane];
+ const int eob = p->eobs[block];
- if (x->block_tx_domain) {
+ if (x->block_tx_domain && eob) {
const int ss_txfrm_size = tx_size << 1;
int64_t this_sse;
const int shift = tx_size == TX_32X32 ? 0 : 2;
@@ -582,14 +585,13 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
const uint8_t *src = &p->src.buf[src_idx];
const uint8_t *dst = &pd->dst.buf[dst_idx];
const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block);
- const uint16_t *eob = &p->eobs[block];
unsigned int tmp;
tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row,
blk_col, plane_bsize, tx_bsize);
*out_sse = (int64_t)tmp * 16;
- if (*eob) {
+ if (eob) {
#if CONFIG_VP9_HIGHBITDEPTH
DECLARE_ALIGNED(16, uint16_t, recon16[1024]);
uint8_t *recon = (uint8_t *)recon16;
@@ -602,22 +604,22 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
32, NULL, 0, 0, 0, 0, bs, bs, xd->bd);
if (xd->lossless) {
- vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, *eob, xd->bd);
+ vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, eob, xd->bd);
} else {
switch (tx_size) {
case TX_4X4:
- vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, *eob, xd->bd);
+ vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, eob, xd->bd);
break;
case TX_8X8:
- vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, *eob, xd->bd);
+ vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, eob, xd->bd);
break;
case TX_16X16:
- vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, *eob, xd->bd);
+ vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, eob, xd->bd);
break;
- case TX_32X32:
- vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, *eob, xd->bd);
+ default:
+ assert(tx_size == TX_32X32);
+ vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, eob, xd->bd);
break;
- default: assert(0 && "Invalid transform size");
}
}
recon = CONVERT_TO_BYTEPTR(recon16);
@@ -625,16 +627,16 @@ static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane,
#endif // CONFIG_VP9_HIGHBITDEPTH
vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs);
switch (tx_size) {
- case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, *eob); break;
- case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, *eob); break;
- case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, *eob); break;
- case TX_4X4:
+ case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, eob); break;
+ case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, eob); break;
+ case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, eob); break;
+ default:
+ assert(tx_size == TX_4X4);
// this is like vp9_short_idct4x4 but has a special case around
// eob<=1, which is significant (not just an optimization) for
// the lossless case.
- x->inv_txfm_add(dqcoeff, recon, 32, *eob);
+ x->inv_txfm_add(dqcoeff, recon, 32, eob);
break;
- default: assert(0 && "Invalid transform size"); break;
}
#if CONFIG_VP9_HIGHBITDEPTH
}
@@ -699,17 +701,18 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
blk_row, blk_col, plane_bsize, tx_bsize);
dist = (int64_t)tmp * 16;
}
- } else if (max_txsize_lookup[plane_bsize] == tx_size) {
- if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
- SKIP_TXFM_NONE) {
+ } else {
+ int skip_txfm_flag = SKIP_TXFM_NONE;
+ if (max_txsize_lookup[plane_bsize] == tx_size)
+ skip_txfm_flag = x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))];
+ if (skip_txfm_flag == SKIP_TXFM_NONE) {
// full forward transform and quantization
vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
if (x->block_qcoeff_opt)
vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
tx_size, &dist, &sse);
- } else if (x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))] ==
- SKIP_TXFM_AC_ONLY) {
+ } else if (skip_txfm_flag == SKIP_TXFM_AC_ONLY) {
// compute DC coefficient
tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block);
tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block);
@@ -736,13 +739,6 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
dist = sse;
}
- } else {
- // full forward transform and quantization
- vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size);
- if (x->block_qcoeff_opt)
- vp9_optimize_b(x, plane, block, tx_size, coeff_ctx);
- dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col,
- tx_size, &dist, &sse);
}
rd = RDCOST(x->rdmult, x->rddiv, 0, dist);
@@ -761,7 +757,8 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
rd = VPXMIN(rd1, rd2);
if (plane == 0) {
x->zcoeff_blk[tx_size][block] =
- !x->plane[plane].eobs[block] || (rd1 > rd2 && !xd->lossless);
+ !x->plane[plane].eobs[block] ||
+ (x->sharpness == 0 && rd1 > rd2 && !xd->lossless);
x->sum_y_eobs[tx_size] += x->plane[plane].eobs[block];
}
@@ -781,7 +778,7 @@ static void block_rd_txfm(int plane, int block, int blk_row, int blk_col,
static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
int64_t *distortion, int *skippable, int64_t *sse,
int64_t ref_best_rd, int plane, BLOCK_SIZE bsize,
- TX_SIZE tx_size, int use_fast_coef_casting) {
+ TX_SIZE tx_size, int use_fast_coef_costing) {
MACROBLOCKD *const xd = &x->e_mbd;
const struct macroblockd_plane *const pd = &xd->plane[plane];
struct rdcost_block_args args;
@@ -789,7 +786,7 @@ static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate,
args.cpi = cpi;
args.x = x;
args.best_rd = ref_best_rd;
- args.use_fast_coef_costing = use_fast_coef_casting;
+ args.use_fast_coef_costing = use_fast_coef_costing;
args.skippable = 1;
if (plane == 0) xd->mi[0]->tx_size = tx_size;
@@ -843,20 +840,20 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
{ INT64_MAX, INT64_MAX },
{ INT64_MAX, INT64_MAX },
{ INT64_MAX, INT64_MAX } };
- int n, m;
+ int n;
int s0, s1;
- int64_t best_rd = INT64_MAX;
+ int64_t best_rd = ref_best_rd;
TX_SIZE best_tx = max_tx_size;
int start_tx, end_tx;
-
- const vpx_prob *tx_probs = get_tx_probs2(max_tx_size, xd, &cm->fc->tx_probs);
+ const int tx_size_ctx = get_tx_size_context(xd);
assert(skip_prob > 0);
s0 = vp9_cost_bit(skip_prob, 0);
s1 = vp9_cost_bit(skip_prob, 1);
if (cm->tx_mode == TX_MODE_SELECT) {
start_tx = max_tx_size;
- end_tx = 0;
+ end_tx = VPXMAX(start_tx - cpi->sf.tx_size_search_depth, 0);
+ if (bs > BLOCK_32X32) end_tx = VPXMIN(end_tx + 1, start_tx);
} else {
TX_SIZE chosen_tx_size =
VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]);
@@ -865,15 +862,9 @@ static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate,
}
for (n = start_tx; n >= end_tx; n--) {
- int r_tx_size = 0;
- for (m = 0; m <= n - (n == (int)max_tx_size); m++) {
- if (m == n)
- r_tx_size += vp9_cost_zero(tx_probs[m]);
- else
- r_tx_size += vp9_cost_one(tx_probs[m]);
- }
- txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], ref_best_rd, 0,
- bs, n, cpi->sf.use_fast_coef_costing);
+ const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n];
+ txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, n,
+ cpi->sf.use_fast_coef_costing);
r[n][1] = r[n][0];
if (r[n][0] < INT_MAX) {
r[n][1] += r_tx_size;
@@ -1466,11 +1457,11 @@ static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
if (is_compound)
this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int;
break;
- case ZEROMV:
+ default:
+ assert(mode == ZEROMV);
this_mv[0].as_int = 0;
if (is_compound) this_mv[1].as_int = 0;
break;
- default: break;
}
mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int;
@@ -1829,8 +1820,8 @@ static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
bestsme = cpi->find_fractional_mv_step(
x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], 0,
- cpi->sf.mv.subpel_iters_per_step, NULL, x->nmvjointcost, x->mvcost,
- &dis, &sse, second_pred, pw, ph);
+ cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost,
+ &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search);
}
// Restore the pointer to the first (possibly scaled) prediction buffer.
@@ -1884,6 +1875,8 @@ static int64_t rd_pick_best_sub8x8_mode(
const BLOCK_SIZE bsize = mi->sb_type;
const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
+ const int pw = num_4x4_blocks_wide << 2;
+ const int ph = num_4x4_blocks_high << 2;
ENTROPY_CONTEXT t_above[2], t_left[2];
int subpelmv = 1, have_ref = 0;
SPEED_FEATURES *const sf = &cpi->sf;
@@ -1992,8 +1985,11 @@ static int64_t rd_pick_best_sub8x8_mode(
mvp_full.col = bsi->mvp.as_mv.col >> 3;
if (sf->adaptive_motion_search) {
- mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3;
- mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3;
+ if (x->pred_mv[mi->ref_frame[0]].row != INT16_MAX &&
+ x->pred_mv[mi->ref_frame[0]].col != INT16_MAX) {
+ mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3;
+ mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3;
+ }
step_param = VPXMAX(step_param, 8);
}
@@ -2015,16 +2011,16 @@ static int64_t rd_pick_best_sub8x8_mode(
cpi->find_fractional_mv_step(
x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv,
x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop,
- sf->mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
+ sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list),
x->nmvjointcost, x->mvcost, &distortion,
- &x->pred_sse[mi->ref_frame[0]], NULL, 0, 0);
+ &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph,
+ cpi->sf.use_accurate_subpel_search);
// save motion search result for use in compound prediction
seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv;
}
- if (sf->adaptive_motion_search)
- x->pred_mv[mi->ref_frame[0]] = *new_mv;
+ x->pred_mv[mi->ref_frame[0]] = *new_mv;
// restore src pointers
mi_buf_restore(x, orig_src, orig_pre);
@@ -2319,6 +2315,61 @@ static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x,
block_size);
}
+#if CONFIG_NON_GREEDY_MV
+#define MAX_PREV_NB_FULL_MV_NUM 8
+static int find_prev_nb_full_mvs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
+ int ref_frame, BLOCK_SIZE bsize, int mi_row,
+ int mi_col, int_mv *nb_full_mvs) {
+ int i;
+ const TileInfo *tile = &xd->tile;
+ int full_mv_num = 0;
+ assert(bsize >= BLOCK_8X8);
+ for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
+ const POSITION *mv_ref = &mv_ref_blocks[bsize][i];
+ if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
+ const MODE_INFO *nb_mi =
+ xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
+ if (nb_mi->sb_type >= BLOCK_8X8) {
+ if (nb_mi->ref_frame[0] == ref_frame) {
+ nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[0].as_mv);
+ ++full_mv_num;
+ if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) {
+ return full_mv_num;
+ }
+ } else if (nb_mi->ref_frame[1] == ref_frame) {
+ nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[1].as_mv);
+ ++full_mv_num;
+ if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) {
+ return full_mv_num;
+ }
+ }
+ } else {
+ int j;
+ for (j = 0; j < 4; ++j) {
+ // TODO(angiebird): avoid using duplicated mvs
+ if (nb_mi->ref_frame[0] == ref_frame) {
+ nb_full_mvs[full_mv_num].as_mv =
+ get_full_mv(&nb_mi->bmi[j].as_mv[0].as_mv);
+ ++full_mv_num;
+ if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) {
+ return full_mv_num;
+ }
+ } else if (nb_mi->ref_frame[1] == ref_frame) {
+ nb_full_mvs[full_mv_num].as_mv =
+ get_full_mv(&nb_mi->bmi[j].as_mv[1].as_mv);
+ ++full_mv_num;
+ if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) {
+ return full_mv_num;
+ }
+ }
+ }
+ }
+ }
+ }
+ return full_mv_num;
+}
+#endif // CONFIG_NON_GREEDY_MV
+
static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int mi_row, int mi_col, int_mv *tmp_mv,
int *rate_mv) {
@@ -2326,19 +2377,33 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
const VP9_COMMON *cm = &cpi->common;
MODE_INFO *mi = xd->mi[0];
struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } };
- int bestsme = INT_MAX;
int step_param;
- int sadpb = x->sadperbit16;
MV mvp_full;
int ref = mi->ref_frame[0];
MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv;
const MvLimits tmp_mv_limits = x->mv_limits;
int cost_list[5];
-
+ const int best_predmv_idx = x->mv_best_ref_index[ref];
const YV12_BUFFER_CONFIG *scaled_ref_frame =
vp9_get_scaled_ref_frame(cpi, ref);
-
+ const int pw = num_4x4_blocks_wide_lookup[bsize] << 2;
+ const int ph = num_4x4_blocks_high_lookup[bsize] << 2;
MV pred_mv[3];
+
+#if CONFIG_NON_GREEDY_MV
+ double mv_dist = 0;
+ double mv_cost = 0;
+ double lambda = (pw * ph) / 4;
+ double bestsme;
+ int_mv nb_full_mvs[MAX_PREV_NB_FULL_MV_NUM];
+
+ const int nb_full_mv_num =
+ find_prev_nb_full_mvs(cm, xd, ref, bsize, mi_row, mi_col, nb_full_mvs);
+#else // CONFIG_NON_GREEDY_MV
+ int bestsme = INT_MAX;
+ int sadpb = x->sadperbit16;
+#endif // CONFIG_NON_GREEDY_MV
+
pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv;
pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv;
pred_mv[2] = x->pred_mv[ref];
@@ -2367,7 +2432,7 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
}
if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) {
- int boffset =
+ const int boffset =
2 * (b_width_log2_lookup[BLOCK_64X64] -
VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize]));
step_param = VPXMAX(step_param, boffset);
@@ -2385,8 +2450,8 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int i;
for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) {
if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) {
- x->pred_mv[ref].row = 0;
- x->pred_mv[ref].col = 0;
+ x->pred_mv[ref].row = INT16_MAX;
+ x->pred_mv[ref].col = INT16_MAX;
tmp_mv->as_int = INVALID_MV;
if (scaled_ref_frame) {
@@ -2404,14 +2469,69 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
// after full-pixel motion search.
vp9_set_mv_search_range(&x->mv_limits, &ref_mv);
- mvp_full = pred_mv[x->mv_best_ref_index[ref]];
-
+ mvp_full = pred_mv[best_predmv_idx];
mvp_full.col >>= 3;
mvp_full.row >>= 3;
+#if CONFIG_NON_GREEDY_MV
+ bestsme = vp9_full_pixel_diamond_new(
+ cpi, x, &mvp_full, step_param, lambda, 1, &cpi->fn_ptr[bsize],
+ nb_full_mvs, nb_full_mv_num, &tmp_mv->as_mv, &mv_dist, &mv_cost);
+#else // CONFIG_NON_GREEDY_MV
bestsme = vp9_full_pixel_search(
cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb,
cond_cost_list(cpi, cost_list), &ref_mv, &tmp_mv->as_mv, INT_MAX, 1);
+#endif // CONFIG_NON_GREEDY_MV
+
+ if (cpi->sf.enhanced_full_pixel_motion_search) {
+ int i;
+ for (i = 0; i < 3; ++i) {
+#if CONFIG_NON_GREEDY_MV
+ double this_me;
+#else // CONFIG_NON_GREEDY_MV
+ int this_me;
+#endif // CONFIG_NON_GREEDY_MV
+ MV this_mv;
+ int diff_row;
+ int diff_col;
+ int step;
+
+ if (pred_mv[i].row == INT16_MAX || pred_mv[i].col == INT16_MAX) continue;
+ if (i == best_predmv_idx) continue;
+
+ diff_row = ((int)pred_mv[i].row -
+ pred_mv[i > 0 ? (i - 1) : best_predmv_idx].row) >>
+ 3;
+ diff_col = ((int)pred_mv[i].col -
+ pred_mv[i > 0 ? (i - 1) : best_predmv_idx].col) >>
+ 3;
+ if (diff_row == 0 && diff_col == 0) continue;
+ if (diff_row < 0) diff_row = -diff_row;
+ if (diff_col < 0) diff_col = -diff_col;
+ step = get_msb((diff_row + diff_col + 1) >> 1);
+ if (step <= 0) continue;
+
+ mvp_full = pred_mv[i];
+ mvp_full.col >>= 3;
+ mvp_full.row >>= 3;
+#if CONFIG_NON_GREEDY_MV
+ this_me = vp9_full_pixel_diamond_new(
+ cpi, x, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step),
+ lambda, 1, &cpi->fn_ptr[bsize], nb_full_mvs, nb_full_mv_num, &this_mv,
+ &mv_dist, &mv_cost);
+#else // CONFIG_NON_GREEDY_MV
+ this_me = vp9_full_pixel_search(
+ cpi, x, bsize, &mvp_full,
+ VPXMAX(step_param, MAX_MVSEARCH_STEPS - step),
+ cpi->sf.mv.search_method, sadpb, cond_cost_list(cpi, cost_list),
+ &ref_mv, &this_mv, INT_MAX, 1);
+#endif // CONFIG_NON_GREEDY_MV
+ if (this_me < bestsme) {
+ tmp_mv->as_mv = this_mv;
+ bestsme = this_me;
+ }
+ }
+ }
x->mv_limits = tmp_mv_limits;
@@ -2420,13 +2540,14 @@ static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
cpi->find_fractional_mv_step(
x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit,
&cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop,
- cpi->sf.mv.subpel_iters_per_step, cond_cost_list(cpi, cost_list),
- x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0);
+ cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list),
+ x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph,
+ cpi->sf.use_accurate_subpel_search);
}
*rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost,
x->mvcost, MV_COST_WEIGHT);
- if (cpi->sf.adaptive_motion_search) x->pred_mv[ref] = tmp_mv->as_mv;
+ x->pred_mv[ref] = tmp_mv->as_mv;
if (scaled_ref_frame) {
int i;
@@ -2771,7 +2892,7 @@ static int64_t handle_inter_mode(
memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm));
memcpy(x->bsse, bsse, sizeof(bsse));
- if (!skip_txfm_sb) {
+ if (!skip_txfm_sb || xd->lossless) {
int skippable_y, skippable_uv;
int64_t sseuv = INT64_MAX;
int64_t rdcosty = INT64_MAX;
@@ -2898,7 +3019,7 @@ static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x,
#if CONFIG_VP9_HIGHBITDEPTH
if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- if (source_variance > 0) {
+ if (source_variance > 100) {
rec_variance = vp9_high_get_sby_perpixel_variance(cpi, &xd->plane[0].dst,
bsize, xd->bd);
src_variance = source_variance;
@@ -2909,7 +3030,7 @@ static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x,
vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, xd->bd);
}
} else {
- if (source_variance > 0) {
+ if (source_variance > 100) {
rec_variance =
vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
src_variance = source_variance;
@@ -2919,7 +3040,7 @@ static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x,
}
}
#else
- if (source_variance > 0) {
+ if (source_variance > 100) {
rec_variance = vp9_get_sby_perpixel_variance(cpi, &xd->plane[0].dst, bsize);
src_variance = source_variance;
} else {
@@ -3066,17 +3187,19 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
const int intra_cost_penalty =
vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q);
int best_skip2 = 0;
- uint8_t ref_frame_skip_mask[2] = { 0 };
+ uint8_t ref_frame_skip_mask[2] = { 0, 1 };
uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 };
int mode_skip_start = sf->mode_skip_start + 1;
const int *const rd_threshes = rd_opt->threshes[segment_id][bsize];
const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize];
int64_t mode_threshold[MAX_MODES];
- int *tile_mode_map = tile_data->mode_map[bsize];
- int mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid
- // lock mechanism involved with reads from
- // tile_mode_map
+ int8_t *tile_mode_map = tile_data->mode_map[bsize];
+ int8_t mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid
+ // lock mechanism involved with reads from
+ // tile_mode_map
const int mode_search_skip_flags = sf->mode_search_skip_flags;
+ const int is_rect_partition =
+ num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize];
int64_t mask_filter = 0;
int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS];
@@ -3105,7 +3228,8 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
x->pred_mv_sad[ref_frame] = INT_MAX;
- if (cpi->ref_frame_flags & flag_list[ref_frame]) {
+ if ((cpi->ref_frame_flags & flag_list[ref_frame]) &&
+ !(is_rect_partition && (ctx->skip_ref_frame_mask & (1 << ref_frame)))) {
assert(get_ref_frame_buffer(cpi, ref_frame) != NULL);
setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col,
frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb);
@@ -3228,18 +3352,21 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
vp9_zero(x->sum_y_eobs);
+ if (is_rect_partition) {
+ if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue;
+ if (second_ref_frame > 0 &&
+ (ctx->skip_ref_frame_mask & (1 << second_ref_frame)))
+ continue;
+ }
+
// Look at the reference frame of the best mode so far and set the
// skip mask to look at a subset of the remaining modes.
if (midx == mode_skip_start && best_mode_index >= 0) {
switch (best_mbmode.ref_frame[0]) {
case INTRA_FRAME: break;
- case LAST_FRAME:
- ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK;
- ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
- break;
+ case LAST_FRAME: ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK; break;
case GOLDEN_FRAME:
ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK;
- ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK;
break;
case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK; break;
case NONE:
@@ -3313,6 +3440,10 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
if (comp_pred) {
if (!cpi->allow_comp_inter_inter) continue;
+ if (cm->ref_frame_sign_bias[ref_frame] ==
+ cm->ref_frame_sign_bias[second_ref_frame])
+ continue;
+
// Skip compound inter modes if ARF is not available.
if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
@@ -3616,9 +3747,13 @@ void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data,
}
if (best_mode_index < 0 || best_rd >= best_rd_so_far) {
- // If adaptive interp filter is enabled, then the current leaf node of 8x8
- // data is needed for sub8x8. Hence preserve the context.
+// If adaptive interp filter is enabled, then the current leaf node of 8x8
+// data is needed for sub8x8. Hence preserve the context.
+#if CONFIG_CONSISTENT_RECODE
+ if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
+#else
if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0];
+#endif
rd_cost->rate = INT_MAX;
rd_cost->rdcost = INT64_MAX;
return;
@@ -3894,7 +4029,8 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
#if CONFIG_BETTER_HW_COMPATIBILITY
// forbid 8X4 and 4X8 partitions if any reference frame is scaled.
if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) {
- int ref_scaled = vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf);
+ int ref_scaled = ref_frame > INTRA_FRAME &&
+ vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf);
if (second_ref_frame > INTRA_FRAME)
ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf);
if (ref_scaled) continue;
@@ -3940,6 +4076,11 @@ void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data,
comp_pred = second_ref_frame > INTRA_FRAME;
if (comp_pred) {
if (!cpi->allow_comp_inter_inter) continue;
+
+ if (cm->ref_frame_sign_bias[ref_frame] ==
+ cm->ref_frame_sign_bias[second_ref_frame])
+ continue;
+
if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue;
// Do not allow compound prediction if the segment level reference frame
// feature is in use as in this case there can only be one reference.
diff --git a/libvpx/vp9/encoder/vp9_rdopt.h b/libvpx/vp9/encoder/vp9_rdopt.h
index 795c91aef..8b810bc47 100644
--- a/libvpx/vp9/encoder/vp9_rdopt.h
+++ b/libvpx/vp9/encoder/vp9_rdopt.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_RDOPT_H_
-#define VP9_ENCODER_VP9_RDOPT_H_
+#ifndef VPX_VP9_ENCODER_VP9_RDOPT_H_
+#define VPX_VP9_ENCODER_VP9_RDOPT_H_
#include "vp9/common/vp9_blockd.h"
@@ -56,4 +56,4 @@ void vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_RDOPT_H_
+#endif // VPX_VP9_ENCODER_VP9_RDOPT_H_
diff --git a/libvpx/vp9/encoder/vp9_resize.c b/libvpx/vp9/encoder/vp9_resize.c
index f6c4aad4d..23a320ae5 100644
--- a/libvpx/vp9/encoder/vp9_resize.c
+++ b/libvpx/vp9/encoder/vp9_resize.c
@@ -424,11 +424,11 @@ void vp9_resize_plane(const uint8_t *const input, int height, int width,
int in_stride, uint8_t *output, int height2, int width2,
int out_stride) {
int i;
- uint8_t *intbuf = (uint8_t *)malloc(sizeof(uint8_t) * width2 * height);
+ uint8_t *intbuf = (uint8_t *)calloc(width2 * height, sizeof(*intbuf));
uint8_t *tmpbuf =
- (uint8_t *)malloc(sizeof(uint8_t) * (width < height ? height : width));
- uint8_t *arrbuf = (uint8_t *)malloc(sizeof(uint8_t) * height);
- uint8_t *arrbuf2 = (uint8_t *)malloc(sizeof(uint8_t) * height2);
+ (uint8_t *)calloc(width < height ? height : width, sizeof(*tmpbuf));
+ uint8_t *arrbuf = (uint8_t *)calloc(height, sizeof(*arrbuf));
+ uint8_t *arrbuf2 = (uint8_t *)calloc(height2, sizeof(*arrbuf2));
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
goto Error;
assert(width > 0);
@@ -720,6 +720,10 @@ void vp9_highbd_resize_plane(const uint8_t *const input, int height, int width,
uint16_t *arrbuf2 = (uint16_t *)malloc(sizeof(uint16_t) * height2);
if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL)
goto Error;
+ assert(width > 0);
+ assert(height > 0);
+ assert(width2 > 0);
+ assert(height2 > 0);
for (i = 0; i < height; ++i) {
highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width,
intbuf + width2 * i, width2, tmpbuf, bd);
diff --git a/libvpx/vp9/encoder/vp9_resize.h b/libvpx/vp9/encoder/vp9_resize.h
index d3282ee19..5d4ce97eb 100644
--- a/libvpx/vp9/encoder/vp9_resize.h
+++ b/libvpx/vp9/encoder/vp9_resize.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_RESIZE_H_
-#define VP9_ENCODER_VP9_RESIZE_H_
+#ifndef VPX_VP9_ENCODER_VP9_RESIZE_H_
+#define VPX_VP9_ENCODER_VP9_RESIZE_H_
#include <stdio.h>
#include "vpx/vpx_integer.h"
@@ -65,4 +65,4 @@ void vp9_highbd_resize_frame444(const uint8_t *const y, int y_stride,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_RESIZE_H_
+#endif // VPX_VP9_ENCODER_VP9_RESIZE_H_
diff --git a/libvpx/vp9/encoder/vp9_segmentation.c b/libvpx/vp9/encoder/vp9_segmentation.c
index 4a5a68e07..812d3fccd 100644
--- a/libvpx/vp9/encoder/vp9_segmentation.c
+++ b/libvpx/vp9/encoder/vp9_segmentation.c
@@ -46,6 +46,19 @@ void vp9_clear_segdata(struct segmentation *seg, int segment_id,
seg->feature_data[segment_id][feature_id] = 0;
}
+void vp9_psnr_aq_mode_setup(struct segmentation *seg) {
+ int i;
+
+ vp9_enable_segmentation(seg);
+ vp9_clearall_segfeatures(seg);
+ seg->abs_delta = SEGMENT_DELTADATA;
+
+ for (i = 0; i < MAX_SEGMENTS; ++i) {
+ vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, 2 * (i - (MAX_SEGMENTS / 2)));
+ vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q);
+ }
+}
+
// Based on set of segment counts calculate a probability tree
static void calc_segtree_probs(int *segcounts, vpx_prob *segment_tree_probs) {
// Work out probabilities of each segment
diff --git a/libvpx/vp9/encoder/vp9_segmentation.h b/libvpx/vp9/encoder/vp9_segmentation.h
index 562805543..aa34dc88b 100644
--- a/libvpx/vp9/encoder/vp9_segmentation.h
+++ b/libvpx/vp9/encoder/vp9_segmentation.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_SEGMENTATION_H_
-#define VP9_ENCODER_VP9_SEGMENTATION_H_
+#ifndef VPX_VP9_ENCODER_VP9_SEGMENTATION_H_
+#define VPX_VP9_ENCODER_VP9_SEGMENTATION_H_
#include "vp9/common/vp9_blockd.h"
#include "vp9/encoder/vp9_encoder.h"
@@ -26,6 +26,8 @@ void vp9_disable_segfeature(struct segmentation *seg, int segment_id,
void vp9_clear_segdata(struct segmentation *seg, int segment_id,
SEG_LVL_FEATURES feature_id);
+void vp9_psnr_aq_mode_setup(struct segmentation *seg);
+
// The values given for each segment can be either deltas (from the default
// value chosen for the frame) or absolute values.
//
@@ -47,4 +49,4 @@ void vp9_reset_segment_features(struct segmentation *seg);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_SEGMENTATION_H_
+#endif // VPX_VP9_ENCODER_VP9_SEGMENTATION_H_
diff --git a/libvpx/vp9/encoder/vp9_skin_detection.h b/libvpx/vp9/encoder/vp9_skin_detection.h
index 8880bff46..46a722af9 100644
--- a/libvpx/vp9/encoder/vp9_skin_detection.h
+++ b/libvpx/vp9/encoder/vp9_skin_detection.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_SKIN_MAP_H_
-#define VP9_ENCODER_VP9_SKIN_MAP_H_
+#ifndef VPX_VP9_ENCODER_VP9_SKIN_DETECTION_H_
+#define VPX_VP9_ENCODER_VP9_SKIN_DETECTION_H_
#include "vp9/common/vp9_blockd.h"
#include "vpx_dsp/skin_detection.h"
@@ -37,4 +37,4 @@ void vp9_output_skin_map(struct VP9_COMP *const cpi, FILE *yuv_skinmap_file);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_SKIN_MAP_H_
+#endif // VPX_VP9_ENCODER_VP9_SKIN_DETECTION_H_
diff --git a/libvpx/vp9/encoder/vp9_speed_features.c b/libvpx/vp9/encoder/vp9_speed_features.c
index a05db60c6..5aede927b 100644
--- a/libvpx/vp9/encoder/vp9_speed_features.c
+++ b/libvpx/vp9/encoder/vp9_speed_features.c
@@ -32,7 +32,7 @@ static MESH_PATTERN
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
static int frame_is_boosted(const VP9_COMP *cpi) {
- return frame_is_kf_gf_arf(cpi) || vp9_is_upper_layer_key_frame(cpi);
+ return frame_is_kf_gf_arf(cpi);
}
// Sets a partition size down to which the auto partition code will always
@@ -61,46 +61,92 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
SPEED_FEATURES *sf,
int speed) {
VP9_COMMON *const cm = &cpi->common;
+ const int min_frame_size = VPXMIN(cm->width, cm->height);
+ const int is_480p_or_larger = min_frame_size >= 480;
+ const int is_720p_or_larger = min_frame_size >= 720;
+ const int is_1080p_or_larger = min_frame_size >= 1080;
+ const int is_2160p_or_larger = min_frame_size >= 2160;
// speed 0 features
sf->partition_search_breakout_thr.dist = (1 << 20);
sf->partition_search_breakout_thr.rate = 80;
+ sf->use_square_only_thresh_high = BLOCK_SIZES;
+ sf->use_square_only_thresh_low = BLOCK_4X4;
- // Currently, the machine-learning based partition search early termination
- // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
- if (VPXMIN(cm->width, cm->height) >= 480) {
+ if (is_480p_or_larger) {
+ // Currently, the machine-learning based partition search early termination
+ // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0.
sf->ml_partition_search_early_termination = 1;
+ } else {
+ sf->use_square_only_thresh_high = BLOCK_32X32;
+ }
+
+ if (!is_1080p_or_larger) {
+ sf->use_ml_partition_search_breakout = 1;
+ if (is_720p_or_larger) {
+ sf->ml_partition_search_breakout_thresh[0] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[1] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[2] = 0.0f;
+ } else {
+ sf->ml_partition_search_breakout_thresh[0] = 2.5f;
+ sf->ml_partition_search_breakout_thresh[1] = 1.5f;
+ sf->ml_partition_search_breakout_thresh[2] = 1.5f;
+ }
}
if (speed >= 1) {
sf->ml_partition_search_early_termination = 0;
-
- if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->use_ml_partition_search_breakout = 1;
+ if (is_480p_or_larger)
+ sf->use_square_only_thresh_high = BLOCK_64X64;
+ else
+ sf->use_square_only_thresh_high = BLOCK_32X32;
+ sf->use_square_only_thresh_low = BLOCK_16X16;
+ if (is_720p_or_larger) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
- sf->partition_search_breakout_thr.dist = (1 << 23);
+ sf->partition_search_breakout_thr.dist = (1 << 22);
+ sf->ml_partition_search_breakout_thresh[0] = -5.0f;
+ sf->ml_partition_search_breakout_thresh[1] = -5.0f;
+ sf->ml_partition_search_breakout_thresh[2] = -9.0f;
} else {
sf->disable_split_mask = DISABLE_COMPOUND_SPLIT;
sf->partition_search_breakout_thr.dist = (1 << 21);
+ sf->ml_partition_search_breakout_thresh[0] = -1.0f;
+ sf->ml_partition_search_breakout_thresh[1] = -1.0f;
+ sf->ml_partition_search_breakout_thresh[2] = -1.0f;
+ }
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) {
+ sf->ml_partition_search_breakout_thresh[0] -= 1.0f;
+ sf->ml_partition_search_breakout_thresh[1] -= 1.0f;
+ sf->ml_partition_search_breakout_thresh[2] -= 1.0f;
}
+#endif // CONFIG_VP9_HIGHBITDEPTH
}
if (speed >= 2) {
- if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->use_square_only_thresh_high = BLOCK_4X4;
+ sf->use_square_only_thresh_low = BLOCK_SIZES;
+ if (is_720p_or_larger) {
sf->disable_split_mask =
cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT;
sf->adaptive_pred_interp_filter = 0;
sf->partition_search_breakout_thr.dist = (1 << 24);
sf->partition_search_breakout_thr.rate = 120;
+ sf->use_ml_partition_search_breakout = 0;
} else {
sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY;
sf->partition_search_breakout_thr.dist = (1 << 22);
sf->partition_search_breakout_thr.rate = 100;
+ sf->ml_partition_search_breakout_thresh[0] = 0.0f;
+ sf->ml_partition_search_breakout_thresh[1] = -1.0f;
+ sf->ml_partition_search_breakout_thresh[2] = -4.0f;
}
sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
// Use a set of speed features for 4k videos.
- if (VPXMIN(cm->width, cm->height) >= 2160) {
+ if (is_2160p_or_larger) {
sf->use_square_partition_only = 1;
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
@@ -112,7 +158,8 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
}
if (speed >= 3) {
- if (VPXMIN(cm->width, cm->height) >= 720) {
+ sf->use_ml_partition_search_breakout = 0;
+ if (is_720p_or_larger) {
sf->disable_split_mask = DISABLE_ALL_SPLIT;
sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0;
sf->partition_search_breakout_thr.dist = (1 << 25);
@@ -137,7 +184,7 @@ static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi,
if (speed >= 4) {
sf->partition_search_breakout_thr.rate = 300;
- if (VPXMIN(cm->width, cm->height) >= 720) {
+ if (is_720p_or_larger) {
sf->partition_search_breakout_thr.dist = (1 << 26);
} else {
sf->partition_search_breakout_thr.dist = (1 << 24);
@@ -166,28 +213,40 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->adaptive_rd_thresh_row_mt = 0;
sf->allow_skip_recode = 1;
sf->less_rectangular_check = 1;
- sf->use_square_partition_only = !frame_is_boosted(cpi);
- sf->use_square_only_threshold = BLOCK_16X16;
+ sf->use_square_partition_only = !boosted;
+ sf->prune_ref_frame_for_rect_partitions = 1;
+ sf->ml_var_partition_pruning = 1;
+
+ sf->ml_prune_rect_partition_threhold[0] = -1;
+ sf->ml_prune_rect_partition_threhold[1] = 350;
+ sf->ml_prune_rect_partition_threhold[2] = 325;
+ sf->ml_prune_rect_partition_threhold[3] = 250;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
sf->exhaustive_searches_thresh = (1 << 22);
- for (i = 0; i < MAX_MESH_STEP; ++i) {
- int mesh_density_level = 0;
- sf->mesh_patterns[i].range =
- good_quality_mesh_patterns[mesh_density_level][i].range;
- sf->mesh_patterns[i].interval =
- good_quality_mesh_patterns[mesh_density_level][i].interval;
- }
} else {
sf->exhaustive_searches_thresh = INT_MAX;
}
+ for (i = 0; i < MAX_MESH_STEP; ++i) {
+ const int mesh_density_level = 0;
+ sf->mesh_patterns[i].range =
+ good_quality_mesh_patterns[mesh_density_level][i].range;
+ sf->mesh_patterns[i].interval =
+ good_quality_mesh_patterns[mesh_density_level][i].interval;
+ }
+
if (speed >= 1) {
+ sf->ml_var_partition_pruning = !boosted;
+ sf->ml_prune_rect_partition_threhold[1] = 200;
+ sf->ml_prune_rect_partition_threhold[2] = 200;
+ sf->ml_prune_rect_partition_threhold[3] = 200;
+
if (oxcf->pass == 2) {
TWO_PASS *const twopass = &cpi->twopass;
if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) ||
vp9_internal_image_edge(cpi)) {
- sf->use_square_partition_only = !frame_is_boosted(cpi);
+ sf->use_square_partition_only = !boosted;
} else {
sf->use_square_partition_only = !frame_is_intra_only(cm);
}
@@ -199,15 +258,12 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5];
sf->allow_quant_coeff_opt = sf->optimize_coefficients;
sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5];
-
- sf->use_square_only_threshold = BLOCK_4X4;
sf->less_rectangular_check = 1;
-
sf->use_rd_breakout = 1;
sf->adaptive_motion_search = 1;
sf->mv.auto_mv_step_size = 1;
sf->adaptive_rd_thresh = 2;
- sf->mv.subpel_iters_per_step = 1;
+ sf->mv.subpel_search_level = 1;
sf->mode_skip_start = 10;
sf->adaptive_pred_interp_filter = 1;
sf->allow_acl = 0;
@@ -223,9 +279,11 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
: INT_MAX;
+ sf->use_accurate_subpel_search = USE_4_TAPS;
}
if (speed >= 2) {
+ sf->ml_var_partition_pruning = 0;
if (oxcf->vbr_corpus_complexity)
sf->recode_loop = ALLOW_RECODE_FIRST;
else
@@ -247,6 +305,12 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX;
sf->recode_tolerance_low = 15;
sf->recode_tolerance_high = 45;
+ sf->enhanced_full_pixel_motion_search = 0;
+ sf->prune_ref_frame_for_rect_partitions = 0;
+ sf->ml_prune_rect_partition_threhold[1] = -1;
+ sf->ml_prune_rect_partition_threhold[2] = -1;
+ sf->ml_prune_rect_partition_threhold[3] = -1;
+ sf->mv.subpel_search_level = 0;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
for (i = 0; i < MAX_MESH_STEP; ++i) {
@@ -257,6 +321,8 @@ static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi,
good_quality_mesh_patterns[mesh_density_level][i].interval;
}
}
+
+ sf->use_accurate_subpel_search = USE_2_TAPS;
}
if (speed >= 3) {
@@ -358,6 +424,7 @@ static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi,
static void set_rt_speed_feature_framesize_independent(
VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, vp9e_tune_content content) {
VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
const int is_keyframe = cm->frame_type == KEY_FRAME;
const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key;
sf->static_segmentation = 0;
@@ -374,6 +441,12 @@ static void set_rt_speed_feature_framesize_independent(
sf->use_compound_nonrd_pickmode = 0;
sf->nonrd_keyframe = 0;
sf->svc_use_lowres_part = 0;
+ sf->overshoot_detection_cbr_rt = NO_DETECTION;
+ sf->disable_16x16part_nonkey = 0;
+ sf->disable_golden_ref = 0;
+ sf->enable_tpl_model = 0;
+ sf->enhanced_full_pixel_motion_search = 0;
+ sf->use_accurate_subpel_search = USE_2_TAPS;
if (speed >= 1) {
sf->allow_txfm_domain_distortion = 1;
@@ -407,7 +480,7 @@ static void set_rt_speed_feature_framesize_independent(
// Reference masking only enabled for 1 spatial layer, and if none of the
// references have been scaled. The latter condition needs to be checked
// for external or internal dynamic resize.
- sf->reference_masking = (cpi->svc.number_spatial_layers == 1);
+ sf->reference_masking = (svc->number_spatial_layers == 1);
if (sf->reference_masking == 1 &&
(cpi->external_resize == 1 ||
cpi->oxcf.resize_mode == RESIZE_DYNAMIC)) {
@@ -440,7 +513,7 @@ static void set_rt_speed_feature_framesize_independent(
sf->disable_filter_search_var_thresh = 100;
sf->use_uv_intra_rd_estimate = 1;
sf->skip_encode_sb = 1;
- sf->mv.subpel_iters_per_step = 1;
+ sf->mv.subpel_search_level = 0;
sf->adaptive_rd_thresh = 4;
sf->mode_skip_start = 6;
sf->allow_skip_recode = 0;
@@ -460,7 +533,7 @@ static void set_rt_speed_feature_framesize_independent(
sf->adjust_partitioning_from_last_frame =
cm->last_frame_type != cm->frame_type ||
(0 == (frames_since_key + 1) % sf->last_partitioning_redo_frequency);
- sf->mv.subpel_force_stop = 1;
+ sf->mv.subpel_force_stop = QUARTER_PEL;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_DC_H_V;
sf->intra_uv_mode_mask[i] = INTRA_DC;
@@ -489,6 +562,16 @@ static void set_rt_speed_feature_framesize_independent(
(frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1);
sf->max_delta_qindex = is_keyframe ? 20 : 15;
sf->partition_search_type = REFERENCE_PARTITION;
+#if CONFIG_ML_VAR_PARTITION
+ if (!frame_is_intra_only(cm) && cm->width >= 360 && cm->height >= 360)
+ sf->partition_search_type = ML_BASED_PARTITION;
+ else
+ sf->partition_search_type = REFERENCE_PARTITION;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
+ sf->partition_search_type = REFERENCE_PARTITION;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#endif // CONFIG_ML_VAR_PARTITION
if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
cpi->rc.is_src_frame_alt_ref) {
sf->partition_search_type = VAR_BASED_PARTITION;
@@ -531,6 +614,17 @@ static void set_rt_speed_feature_framesize_independent(
sf->limit_newmv_early_exit = 1;
if (!cpi->use_svc) sf->bias_golden = 1;
}
+ // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent
+ // increase in encoding time.
+ if (cpi->use_svc && svc->spatial_layer_id > 0) sf->nonrd_keyframe = 1;
+ if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG &&
+ cpi->oxcf.rc_mode == VPX_CBR)
+ sf->overshoot_detection_cbr_rt = FAST_DETECTION_MAXQ;
+ if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 &&
+ cm->width <= 1280 && cm->height <= 720) {
+ sf->use_altref_onepass = 1;
+ sf->use_compound_nonrd_pickmode = 1;
+ }
}
if (speed >= 6) {
@@ -538,9 +632,16 @@ static void set_rt_speed_feature_framesize_independent(
sf->use_altref_onepass = 1;
sf->use_compound_nonrd_pickmode = 1;
}
+#if CONFIG_ML_VAR_PARTITION
+ if (frame_is_intra_only(cm) || cm->width < 360 || cm->height < 360)
+ sf->partition_search_type = VAR_BASED_PARTITION;
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH)
+ sf->partition_search_type = VAR_BASED_PARTITION;
+#endif // CONFIG_VP9_HIGHBITDEPTH
+#else
sf->partition_search_type = VAR_BASED_PARTITION;
- // Turn on this to use non-RD key frame coding mode.
- sf->use_nonrd_pick_mode = 1;
+#endif // CONFIG_ML_VAR_PARTITION
sf->mv.search_method = NSTEP;
sf->mv.reduce_first_step_size = 1;
sf->skip_encode_sb = 0;
@@ -553,7 +654,7 @@ static void set_rt_speed_feature_framesize_independent(
(cm->width * cm->height <= 640 * 360) ? 40000 : 60000;
if (cpi->content_state_sb_fd == NULL &&
(!cpi->use_svc ||
- cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
+ svc->spatial_layer_id == svc->number_spatial_layers - 1)) {
cpi->content_state_sb_fd = (uint8_t *)vpx_calloc(
(cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t));
}
@@ -562,7 +663,7 @@ static void set_rt_speed_feature_framesize_independent(
// Enable short circuit for low temporal variance.
sf->short_circuit_low_temp_var = 1;
}
- if (cpi->svc.temporal_layer_id > 0) {
+ if (svc->temporal_layer_id > 0) {
sf->adaptive_rd_thresh = 4;
sf->limit_newmv_early_exit = 0;
sf->base_mv_aggressive = 1;
@@ -576,16 +677,15 @@ static void set_rt_speed_feature_framesize_independent(
sf->mv.fullpel_search_step_param = 10;
// For SVC: use better mv search on base temporal layer, and only
// on base spatial layer if highest resolution is above 640x360.
- if (cpi->svc.number_temporal_layers > 2 &&
- cpi->svc.temporal_layer_id == 0 &&
- (cpi->svc.spatial_layer_id == 0 ||
+ if (svc->number_temporal_layers > 2 && svc->temporal_layer_id == 0 &&
+ (svc->spatial_layer_id == 0 ||
cpi->oxcf.width * cpi->oxcf.height <= 640 * 360)) {
sf->mv.search_method = NSTEP;
sf->mv.fullpel_search_step_param = 6;
}
- if (cpi->svc.temporal_layer_id > 0 || cpi->svc.spatial_layer_id > 1) {
+ if (svc->temporal_layer_id > 0 || svc->spatial_layer_id > 1) {
sf->use_simple_block_yrd = 1;
- if (cpi->svc.non_reference_frame)
+ if (svc->non_reference_frame)
sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_EVENMORE;
}
if (cpi->use_svc && cpi->row_mt && cpi->oxcf.max_threads > 1)
@@ -596,22 +696,29 @@ static void set_rt_speed_feature_framesize_independent(
if (!cpi->last_frame_dropped && cpi->resize_state == ORIG &&
!cpi->external_resize &&
(!cpi->use_svc ||
- cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
+ (svc->spatial_layer_id == svc->number_spatial_layers - 1 &&
+ !svc->last_layer_dropped[svc->number_spatial_layers - 1]))) {
sf->copy_partition_flag = 1;
cpi->max_copied_frame = 2;
// The top temporal enhancement layer (for number of temporal layers > 1)
// are non-reference frames, so use large/max value for max_copied_frame.
- if (cpi->svc.number_temporal_layers > 1 &&
- cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1)
+ if (svc->number_temporal_layers > 1 &&
+ svc->temporal_layer_id == svc->number_temporal_layers - 1)
cpi->max_copied_frame = 255;
}
// For SVC: enable use of lower resolution partition for higher resolution,
// only for 3 spatial layers and when config/top resolution is above VGA.
// Enable only for non-base temporal layer frames.
- if (cpi->use_svc && cpi->svc.number_spatial_layers == 3 &&
- cpi->svc.temporal_layer_id > 0 &&
+ if (cpi->use_svc && svc->use_partition_reuse &&
+ svc->number_spatial_layers == 3 && svc->temporal_layer_id > 0 &&
cpi->oxcf.width * cpi->oxcf.height > 640 * 480)
sf->svc_use_lowres_part = 1;
+ // For SVC when golden is used as second temporal reference: to avoid
+ // encode time increase only use this feature on base temporal layer.
+ // (i.e remove golden flag from frame_flags for temporal_layer_id > 0).
+ if (cpi->use_svc && svc->use_gf_temporal_ref_current_layer &&
+ svc->temporal_layer_id > 0)
+ cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
}
if (speed >= 8) {
@@ -622,7 +729,7 @@ static void set_rt_speed_feature_framesize_independent(
if (cpi->row_mt && cpi->oxcf.max_threads > 1)
sf->adaptive_rd_thresh_row_mt = 1;
- if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = 3;
+ if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = FULL_PEL;
if (content == VP9E_CONTENT_SCREEN) sf->lpf_pick = LPF_PICK_MINIMAL_LPF;
// Only keep INTRA_DC mode for speed 8.
if (!is_keyframe) {
@@ -652,6 +759,22 @@ static void set_rt_speed_feature_framesize_independent(
sf->limit_newmv_early_exit = 0;
sf->use_simple_block_yrd = 1;
}
+
+ if (speed >= 9) {
+ sf->mv.enable_adaptive_subpel_force_stop = 1;
+ sf->mv.adapt_subpel_force_stop.mv_thresh = 2;
+ if (cpi->rc.avg_frame_low_motion < 40)
+ sf->mv.adapt_subpel_force_stop.mv_thresh = 1;
+ sf->mv.adapt_subpel_force_stop.force_stop_below = QUARTER_PEL;
+ sf->mv.adapt_subpel_force_stop.force_stop_above = HALF_PEL;
+ // Disable partition blocks below 16x16, except for low-resolutions.
+ if (cm->frame_type != KEY_FRAME && cm->width >= 320 && cm->height >= 240)
+ sf->disable_16x16part_nonkey = 1;
+ // Allow for disabling GOLDEN reference, for CBR mode.
+ if (cpi->oxcf.rc_mode == VPX_CBR) sf->disable_golden_ref = 1;
+ if (cpi->rc.avg_frame_low_motion < 65) sf->default_interp_filter = BILINEAR;
+ }
+
if (sf->use_altref_onepass) {
if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) {
sf->partition_search_type = FIXED_PARTITION;
@@ -666,6 +789,19 @@ static void set_rt_speed_feature_framesize_independent(
(uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1),
sizeof(*cpi->count_lastgolden_frame_usage));
}
+ if (svc->previous_frame_is_intra_only) {
+ sf->partition_search_type = FIXED_PARTITION;
+ sf->always_this_block_size = BLOCK_64X64;
+ }
+ // Special case for screen content: increase motion search on base spatial
+ // layer when high motion is detected or previous SL0 frame was dropped.
+ if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed >= 5 &&
+ (svc->high_num_blocks_with_motion || svc->last_layer_dropped[0])) {
+ sf->mv.search_method = NSTEP;
+ // TODO(marpan/jianj): Tune this setting for screensharing. For now use
+ // small step_param for all spatial layers.
+ sf->mv.fullpel_search_step_param = 2;
+ }
}
void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
@@ -679,6 +815,7 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
sf->partition_search_breakout_thr.dist = (1 << 19);
sf->partition_search_breakout_thr.rate = 80;
sf->ml_partition_search_early_termination = 0;
+ sf->use_ml_partition_search_breakout = 0;
if (oxcf->mode == REALTIME) {
set_rt_speed_feature_framesize_dependent(cpi, sf, oxcf->speed);
@@ -710,12 +847,6 @@ void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi) {
if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact &&
oxcf->max_threads > 1)
sf->adaptive_rd_thresh = 0;
-
- // This is only used in motion vector unit test.
- if (cpi->oxcf.motion_vector_unit_test == 1)
- cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv;
- else if (cpi->oxcf.motion_vector_unit_test == 2)
- cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv;
}
void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
@@ -730,8 +861,8 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->mv.search_method = NSTEP;
sf->recode_loop = ALLOW_RECODE_FIRST;
sf->mv.subpel_search_method = SUBPEL_TREE;
- sf->mv.subpel_iters_per_step = 2;
- sf->mv.subpel_force_stop = 0;
+ sf->mv.subpel_search_level = 2;
+ sf->mv.subpel_force_stop = EIGHTH_PEL;
sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf);
sf->mv.reduce_first_step_size = 0;
sf->coeff_prob_appx_step = 1;
@@ -741,6 +872,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->tx_size_search_method = USE_FULL_RD;
sf->use_lp32x32fdct = 0;
sf->adaptive_motion_search = 0;
+ sf->enhanced_full_pixel_motion_search = 1;
sf->adaptive_pred_interp_filter = 0;
sf->adaptive_mode_search = 0;
sf->cb_pred_filter_search = 0;
@@ -752,7 +884,8 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->partition_search_type = SEARCH_PARTITION;
sf->less_rectangular_check = 0;
sf->use_square_partition_only = 0;
- sf->use_square_only_threshold = BLOCK_SIZES;
+ sf->use_square_only_thresh_high = BLOCK_SIZES;
+ sf->use_square_only_thresh_low = BLOCK_4X4;
sf->auto_min_max_partition_size = NOT_IN_USE;
sf->rd_auto_partition_min_limit = BLOCK_4X4;
sf->default_max_partition_size = BLOCK_64X64;
@@ -771,6 +904,8 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->allow_quant_coeff_opt = sf->optimize_coefficients;
sf->quant_opt_thresh = 99.0;
sf->allow_acl = 1;
+ sf->enable_tpl_model = oxcf->enable_tpl_model;
+ sf->prune_ref_frame_for_rect_partitions = 0;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
@@ -804,10 +939,17 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->limit_newmv_early_exit = 0;
sf->bias_golden = 0;
sf->base_mv_aggressive = 0;
+ sf->ml_prune_rect_partition_threhold[0] = -1;
+ sf->ml_prune_rect_partition_threhold[1] = -1;
+ sf->ml_prune_rect_partition_threhold[2] = -1;
+ sf->ml_prune_rect_partition_threhold[3] = -1;
+ sf->ml_var_partition_pruning = 0;
+ sf->use_accurate_subpel_search = USE_8_TAPS;
// Some speed-up features even for best quality as minimal impact on quality.
sf->adaptive_rd_thresh = 1;
sf->tx_size_search_breakout = 1;
+ sf->tx_size_search_depth = 2;
sf->exhaustive_searches_thresh =
(cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)
@@ -837,7 +979,7 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
sf->optimize_coefficients = 0;
}
- if (sf->mv.subpel_force_stop == 3) {
+ if (sf->mv.subpel_force_stop == FULL_PEL) {
// Whole pel only
cpi->find_fractional_mv_step = vp9_skip_sub_pixel_tree;
} else if (sf->mv.subpel_search_method == SUBPEL_TREE) {
@@ -850,6 +992,12 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_evenmore;
}
+ // This is only used in motion vector unit test.
+ if (cpi->oxcf.motion_vector_unit_test == 1)
+ cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv;
+ else if (cpi->oxcf.motion_vector_unit_test == 2)
+ cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv;
+
x->optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1;
x->min_partition_size = sf->default_min_partition_size;
@@ -867,10 +1015,4 @@ void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi) {
if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact &&
oxcf->max_threads > 1)
sf->adaptive_rd_thresh = 0;
-
- // This is only used in motion vector unit test.
- if (cpi->oxcf.motion_vector_unit_test == 1)
- cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv;
- else if (cpi->oxcf.motion_vector_unit_test == 2)
- cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv;
}
diff --git a/libvpx/vp9/encoder/vp9_speed_features.h b/libvpx/vp9/encoder/vp9_speed_features.h
index 50d52bc23..9b09ec474 100644
--- a/libvpx/vp9/encoder/vp9_speed_features.h
+++ b/libvpx/vp9/encoder/vp9_speed_features.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_SPEED_FEATURES_H_
-#define VP9_ENCODER_VP9_SPEED_FEATURES_H_
+#ifndef VPX_VP9_ENCODER_VP9_SPEED_FEATURES_H_
+#define VPX_VP9_ENCODER_VP9_SPEED_FEATURES_H_
#include "vp9/common/vp9_enums.h"
@@ -57,7 +57,8 @@ typedef enum {
BIGDIA = 3,
SQUARE = 4,
FAST_HEX = 5,
- FAST_DIAMOND = 6
+ FAST_DIAMOND = 6,
+ MESH = 7
} SEARCH_METHODS;
typedef enum {
@@ -135,20 +136,25 @@ typedef enum {
} INTERP_FILTER_MASK;
typedef enum {
- // Search partitions using RD/NONRD criterion
+ // Search partitions using RD/NONRD criterion.
SEARCH_PARTITION,
- // Always use a fixed size partition
+ // Always use a fixed size partition.
FIXED_PARTITION,
REFERENCE_PARTITION,
// Use an arbitrary partitioning scheme based on source variance within
- // a 64X64 SB
+ // a 64X64 SB.
VAR_BASED_PARTITION,
- // Use non-fixed partitions based on source variance
- SOURCE_VAR_BASED_PARTITION
+ // Use non-fixed partitions based on source variance.
+ SOURCE_VAR_BASED_PARTITION,
+
+#if CONFIG_ML_VAR_PARTITION
+ // Make partition decisions with machine learning models.
+ ML_BASED_PARTITION
+#endif // CONFIG_ML_VAR_PARTITION
} PARTITION_SEARCH_TYPE;
typedef enum {
@@ -161,6 +167,19 @@ typedef enum {
ONE_LOOP_REDUCED = 1
} FAST_COEFF_UPDATE;
+typedef enum { EIGHTH_PEL, QUARTER_PEL, HALF_PEL, FULL_PEL } SUBPEL_FORCE_STOP;
+
+typedef struct ADAPT_SUBPEL_FORCE_STOP {
+ // Threshold for full pixel motion vector;
+ int mv_thresh;
+
+ // subpel_force_stop if full pixel MV is below the threshold.
+ SUBPEL_FORCE_STOP force_stop_below;
+
+ // subpel_force_stop if full pixel MV is equal to or above the threshold.
+ SUBPEL_FORCE_STOP force_stop_above;
+} ADAPT_SUBPEL_FORCE_STOP;
+
typedef struct MV_SPEED_FEATURES {
// Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc).
SEARCH_METHODS search_method;
@@ -179,15 +198,17 @@ typedef struct MV_SPEED_FEATURES {
// the same process. Along the way it skips many diagonals.
SUBPEL_SEARCH_METHODS subpel_search_method;
- // Maximum number of steps in logarithmic subpel search before giving up.
- int subpel_iters_per_step;
+ // Subpel MV search level. Can take values 0 - 2. Higher values mean more
+ // extensive subpel search.
+ int subpel_search_level;
+
+ // When to stop subpel motion search.
+ SUBPEL_FORCE_STOP subpel_force_stop;
+
+ // If it's enabled, different subpel_force_stop will be used for different MV.
+ int enable_adaptive_subpel_force_stop;
- // Control when to stop subpel search:
- // 0: Full subpel search.
- // 1: Stop at quarter pixel.
- // 2: Stop at half pixel.
- // 3: Stop at full pixel.
- int subpel_force_stop;
+ ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop;
// This variable sets the step_param used in full pel motion search.
int fullpel_search_step_param;
@@ -205,6 +226,28 @@ typedef struct MESH_PATTERN {
int interval;
} MESH_PATTERN;
+typedef enum {
+ // No reaction to rate control on a detected slide/scene change.
+ NO_DETECTION = 0,
+
+ // Set to larger Q (max_q set by user) based only on the
+ // detected slide/scene change and current/past Q.
+ FAST_DETECTION_MAXQ = 1,
+
+ // Based on (first pass) encoded frame, if large frame size is detected
+ // then set to higher Q for the second re-encode. This involves 2 pass
+ // encoding on slide change, so slower than 1, but more accurate for
+ // detecting overshoot.
+ RE_ENCODE_MAXQ = 2
+} OVERSHOOT_DETECTION_CBR_RT;
+
+typedef enum {
+ USE_2_TAPS = 0,
+ USE_4_TAPS,
+ USE_8_TAPS,
+ USE_8_TAPS_SHARP,
+} SUBPEL_SEARCH_TYPE;
+
typedef struct SPEED_FEATURES {
MV_SPEED_FEATURES mv;
@@ -258,6 +301,9 @@ typedef struct SPEED_FEATURES {
// alternate reference frames.
int allow_acl;
+ // Temporal dependency model based encoding mode optimization
+ int enable_tpl_model;
+
// Use transform domain distortion. Use pixel domain distortion in speed 0
// and certain situations in higher speed to improve the RD model precision.
int allow_txfm_domain_distortion;
@@ -272,6 +318,9 @@ typedef struct SPEED_FEATURES {
// for intra and model coefs for the rest.
TX_SIZE_SEARCH_METHOD tx_size_search_method;
+ // How many levels of tx size to search, starting from the largest.
+ int tx_size_search_depth;
+
// Low precision 32x32 fdct keeps everything in 16 bits and thus is less
// precise but significantly faster than the non lp version.
int use_lp32x32fdct;
@@ -293,9 +342,20 @@ typedef struct SPEED_FEATURES {
// rd than partition type split.
int less_rectangular_check;
- // Disable testing non square partitions. (eg 16x32)
+ // Disable testing non square partitions(eg 16x32) for block sizes larger than
+ // use_square_only_thresh_high or smaller than use_square_only_thresh_low.
int use_square_partition_only;
- BLOCK_SIZE use_square_only_threshold;
+ BLOCK_SIZE use_square_only_thresh_high;
+ BLOCK_SIZE use_square_only_thresh_low;
+
+ // Prune reference frames for rectangular partitions.
+ int prune_ref_frame_for_rect_partitions;
+
+ // Threshold values used for ML based rectangular partition search pruning.
+ // If < 0, the feature is turned off.
+ // Higher values mean more aggressiveness to skip rectangular partition
+ // search that results in better encoding speed but worse coding performance.
+ int ml_prune_rect_partition_threhold[4];
// Sets min and max partition sizes for this 64x64 region based on the
// same 64x64 in last encoded frame, and the left and above neighbor.
@@ -327,6 +387,9 @@ typedef struct SPEED_FEATURES {
// point for this motion search and limits the search range around it.
int adaptive_motion_search;
+ // Do extra full pixel motion search to obtain better motion vector.
+ int enhanced_full_pixel_motion_search;
+
// Threshold for allowing exhaistive motion search.
int exhaustive_searches_thresh;
@@ -448,9 +511,19 @@ typedef struct SPEED_FEATURES {
// Partition search early breakout thresholds.
PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr;
+ // Use ML-based partition search early breakout.
+ int use_ml_partition_search_breakout;
+ // Higher values mean more aggressiveness for partition search breakout that
+ // results in better encoding speed but worse compression performance.
+ float ml_partition_search_breakout_thresh[3];
+
// Machine-learning based partition search early termination
int ml_partition_search_early_termination;
+ // Machine-learning based partition search pruning using prediction residue
+ // variance.
+ int ml_var_partition_pruning;
+
// Allow skipping partition search for still image frame
int allow_partition_search_skip;
@@ -508,6 +581,20 @@ typedef struct SPEED_FEATURES {
// For SVC: enables use of partition from lower spatial resolution.
int svc_use_lowres_part;
+
+ // Flag to indicate process for handling overshoot on slide/scene change,
+ // for real-time CBR mode.
+ OVERSHOOT_DETECTION_CBR_RT overshoot_detection_cbr_rt;
+
+ // Disable partitioning of 16x16 blocks.
+ int disable_16x16part_nonkey;
+
+ // Allow for disabling golden reference.
+ int disable_golden_ref;
+
+ // Allow sub-pixel search to use interpolation filters with different taps in
+ // order to achieve accurate motion search result.
+ SUBPEL_SEARCH_TYPE use_accurate_subpel_search;
} SPEED_FEATURES;
struct VP9_COMP;
@@ -519,4 +606,4 @@ void vp9_set_speed_features_framesize_dependent(struct VP9_COMP *cpi);
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_SPEED_FEATURES_H_
+#endif // VPX_VP9_ENCODER_VP9_SPEED_FEATURES_H_
diff --git a/libvpx/vp9/encoder/vp9_subexp.h b/libvpx/vp9/encoder/vp9_subexp.h
index 26c89e2ea..f0d544b52 100644
--- a/libvpx/vp9/encoder/vp9_subexp.h
+++ b/libvpx/vp9/encoder/vp9_subexp.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_SUBEXP_H_
-#define VP9_ENCODER_VP9_SUBEXP_H_
+#ifndef VPX_VP9_ENCODER_VP9_SUBEXP_H_
+#define VPX_VP9_ENCODER_VP9_SUBEXP_H_
#ifdef __cplusplus
extern "C" {
@@ -37,4 +37,4 @@ int vp9_prob_diff_update_savings_search_model(const unsigned int *ct,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_SUBEXP_H_
+#endif // VPX_VP9_ENCODER_VP9_SUBEXP_H_
diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.c b/libvpx/vp9/encoder/vp9_svc_layercontext.c
index 2636bd9a5..3223f714b 100644
--- a/libvpx/vp9/encoder/vp9_svc_layercontext.c
+++ b/libvpx/vp9/encoder/vp9_svc_layercontext.c
@@ -19,6 +19,14 @@
#define SMALL_FRAME_WIDTH 32
#define SMALL_FRAME_HEIGHT 16
+static void swap_ptr(void *a, void *b) {
+ void **a_p = (void **)a;
+ void **b_p = (void **)b;
+ void *c = *a_p;
+ *a_p = *b_p;
+ *b_p = c;
+}
+
void vp9_init_layer_context(VP9_COMP *const cpi) {
SVC *const svc = &cpi->svc;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -29,24 +37,49 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
svc->spatial_layer_id = 0;
svc->temporal_layer_id = 0;
- svc->first_spatial_layer_to_encode = 0;
- svc->rc_drop_superframe = 0;
svc->force_zero_mode_spatial_ref = 0;
svc->use_base_mv = 0;
+ svc->use_partition_reuse = 0;
+ svc->use_gf_temporal_ref = 1;
+ svc->use_gf_temporal_ref_current_layer = 0;
svc->scaled_temp_is_alloc = 0;
svc->scaled_one_half = 0;
svc->current_superframe = 0;
svc->non_reference_frame = 0;
-
- for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1;
+ svc->skip_enhancement_layer = 0;
+ svc->disable_inter_layer_pred = INTER_LAYER_PRED_ON;
+ svc->framedrop_mode = CONSTRAINED_LAYER_DROP;
+ svc->set_intra_only_frame = 0;
+ svc->previous_frame_is_intra_only = 0;
+ svc->superframe_has_layer_sync = 0;
+ svc->use_set_ref_frame_config = 0;
+ svc->num_encoded_top_layer = 0;
+
+ for (i = 0; i < REF_FRAMES; ++i) {
+ svc->fb_idx_spatial_layer_id[i] = -1;
+ svc->fb_idx_temporal_layer_id[i] = -1;
+ svc->fb_idx_base[i] = 0;
+ }
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
+ svc->last_layer_dropped[sl] = 0;
+ svc->drop_spatial_layer[sl] = 0;
svc->ext_frame_flags[sl] = 0;
- svc->ext_lst_fb_idx[sl] = 0;
- svc->ext_gld_fb_idx[sl] = 1;
- svc->ext_alt_fb_idx[sl] = 2;
- svc->downsample_filter_type[sl] = EIGHTTAP;
- svc->downsample_filter_phase[sl] = 0; // Set to 8 for averaging filter.
+ svc->lst_fb_idx[sl] = 0;
+ svc->gld_fb_idx[sl] = 1;
+ svc->alt_fb_idx[sl] = 2;
+ svc->downsample_filter_type[sl] = BILINEAR;
+ svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter.
+ svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark;
+ svc->fb_idx_upd_tl0[sl] = -1;
+ svc->drop_count[sl] = 0;
+ svc->spatial_layer_sync[sl] = 0;
}
+ svc->max_consec_drop = INT_MAX;
+
+ svc->buffer_gf_temporal_ref[1].idx = 7;
+ svc->buffer_gf_temporal_ref[0].idx = 6;
+ svc->buffer_gf_temporal_ref[1].is_used = 0;
+ svc->buffer_gf_temporal_ref[0].is_used = 0;
if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) {
if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, SMALL_FRAME_WIDTH,
@@ -84,6 +117,8 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
lrc->ni_frames = 0;
lrc->decimation_count = 0;
lrc->decimation_factor = 0;
+ lrc->worst_quality = oxcf->worst_allowed_q;
+ lrc->best_quality = oxcf->best_allowed_q;
for (i = 0; i < RATE_FACTOR_LEVELS; ++i) {
lrc->rate_correction_factors[i] = 1.0;
@@ -122,6 +157,9 @@ void vp9_init_layer_context(VP9_COMP *const cpi) {
size_t consec_zero_mv_size;
VP9_COMMON *const cm = &cpi->common;
lc->sb_index = 0;
+ lc->actual_num_seg1_blocks = 0;
+ lc->actual_num_seg2_blocks = 0;
+ lc->counter_encode_maxq_scene_change = 0;
CHECK_MEM_ERROR(cm, lc->map,
vpx_malloc(mi_rows * mi_cols * sizeof(*lc->map)));
memset(lc->map, 0, mi_rows * mi_cols);
@@ -154,6 +192,8 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi,
int sl, tl, layer = 0, spatial_layer_target;
float bitrate_alloc = 1.0;
+ cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode;
+
if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
@@ -290,6 +330,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
LAYER_CONTEXT *const lc = get_layer_context(cpi);
const int old_frame_since_key = cpi->rc.frames_since_key;
const int old_frame_to_key = cpi->rc.frames_to_key;
+ const int old_ext_use_post_encode_drop = cpi->rc.ext_use_post_encode_drop;
cpi->rc = lc->rc;
cpi->twopass = lc->twopass;
@@ -303,26 +344,23 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) {
// Reset the frames_since_key and frames_to_key counters to their values
// before the layer restore. Keep these defined for the stream (not layer).
if (cpi->svc.number_temporal_layers > 1 ||
- (cpi->svc.number_spatial_layers > 1 && !is_two_pass_svc(cpi))) {
+ cpi->svc.number_spatial_layers > 1) {
cpi->rc.frames_since_key = old_frame_since_key;
cpi->rc.frames_to_key = old_frame_to_key;
}
-
+ cpi->rc.ext_use_post_encode_drop = old_ext_use_post_encode_drop;
// For spatial-svc, allow cyclic-refresh to be applied on the spatial layers,
// for the base temporal layer.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
cpi->svc.number_spatial_layers > 1 && cpi->svc.temporal_layer_id == 0) {
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
- signed char *temp = cr->map;
- uint8_t *temp2 = cr->last_coded_q_map;
- uint8_t *temp3 = cpi->consec_zero_mv;
- cr->map = lc->map;
- lc->map = temp;
- cr->last_coded_q_map = lc->last_coded_q_map;
- lc->last_coded_q_map = temp2;
- cpi->consec_zero_mv = lc->consec_zero_mv;
- lc->consec_zero_mv = temp3;
+ swap_ptr(&cr->map, &lc->map);
+ swap_ptr(&cr->last_coded_q_map, &lc->last_coded_q_map);
+ swap_ptr(&cpi->consec_zero_mv, &lc->consec_zero_mv);
cr->sb_index = lc->sb_index;
+ cr->actual_num_seg1_blocks = lc->actual_num_seg1_blocks;
+ cr->actual_num_seg2_blocks = lc->actual_num_seg2_blocks;
+ cr->counter_encode_maxq_scene_change = lc->counter_encode_maxq_scene_change;
}
}
@@ -350,6 +388,9 @@ void vp9_save_layer_context(VP9_COMP *const cpi) {
lc->consec_zero_mv = cpi->consec_zero_mv;
cpi->consec_zero_mv = temp3;
lc->sb_index = cr->sb_index;
+ lc->actual_num_seg1_blocks = cr->actual_num_seg1_blocks;
+ lc->actual_num_seg2_blocks = cr->actual_num_seg2_blocks;
+ lc->counter_encode_maxq_scene_change = cr->counter_encode_maxq_scene_change;
}
}
@@ -381,15 +422,6 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) {
++cpi->svc.current_superframe;
}
-int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) {
- return is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0 &&
- cpi->svc
- .layer_context[cpi->svc.spatial_layer_id *
- cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id]
- .is_key_frame;
-}
-
void get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out,
int *height_out) {
@@ -408,6 +440,40 @@ void get_layer_resolution(const int width_org, const int height_org,
*height_out = h;
}
+static void reset_fb_idx_unused(VP9_COMP *const cpi) {
+ // If a reference frame is not referenced or refreshed, then set the
+ // fb_idx for that reference to the first one used/referenced.
+ // This is to avoid setting fb_idx for a reference to a slot that is not
+ // used/needed (i.e., since that reference is not referenced or refreshed).
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
+ MV_REFERENCE_FRAME ref_frame;
+ MV_REFERENCE_FRAME first_ref = 0;
+ int first_fb_idx = 0;
+ int fb_idx[3] = { cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx };
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
+ if (cpi->ref_frame_flags & flag_list[ref_frame]) {
+ first_ref = ref_frame;
+ first_fb_idx = fb_idx[ref_frame - 1];
+ break;
+ }
+ }
+ if (first_ref > 0) {
+ if (first_ref != LAST_FRAME &&
+ !(cpi->ref_frame_flags & flag_list[LAST_FRAME]) &&
+ !cpi->ext_refresh_last_frame)
+ cpi->lst_fb_idx = first_fb_idx;
+ else if (first_ref != GOLDEN_FRAME &&
+ !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) &&
+ !cpi->ext_refresh_golden_frame)
+ cpi->gld_fb_idx = first_fb_idx;
+ else if (first_ref != ALTREF_FRAME &&
+ !(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]) &&
+ !cpi->ext_refresh_alt_ref_frame)
+ cpi->alt_fb_idx = first_fb_idx;
+ }
+}
+
// The function sets proper ref_frame_flags, buffer indices, and buffer update
// variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering
// scheme.
@@ -511,6 +577,8 @@ static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) {
cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
}
+
+ reset_fb_idx_unused(cpi);
}
// The function sets proper ref_frame_flags, buffer indices, and buffer update
@@ -546,6 +614,8 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
if (!spatial_id) {
cpi->ref_frame_flags = VP9_LAST_FLAG;
} else {
+ if (spatial_id == cpi->svc.number_spatial_layers - 1)
+ cpi->ext_refresh_alt_ref_frame = 0;
cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG;
}
}
@@ -568,6 +638,8 @@ static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) {
cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1;
cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id;
}
+
+ reset_fb_idx_unused(cpi);
}
// The function sets proper ref_frame_flags, buffer indices, and buffer update
@@ -600,54 +672,161 @@ static void set_flags_and_fb_idx_for_temporal_mode_noLayering(
} else {
cpi->gld_fb_idx = 0;
}
+
+ reset_fb_idx_unused(cpi);
+}
+
+static void set_flags_and_fb_idx_bypass_via_set_ref_frame_config(
+ VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ int sl = svc->spatial_layer_id = svc->spatial_layer_to_encode;
+ cpi->svc.temporal_layer_id = cpi->svc.temporal_layer_id_per_spatial[sl];
+ cpi->ext_refresh_frame_flags_pending = 1;
+ cpi->lst_fb_idx = svc->lst_fb_idx[sl];
+ cpi->gld_fb_idx = svc->gld_fb_idx[sl];
+ cpi->alt_fb_idx = svc->alt_fb_idx[sl];
+ cpi->ext_refresh_last_frame = 0;
+ cpi->ext_refresh_golden_frame = 0;
+ cpi->ext_refresh_alt_ref_frame = 0;
+ cpi->ref_frame_flags = 0;
+ if (svc->reference_last[sl]) cpi->ref_frame_flags |= VP9_LAST_FLAG;
+ if (svc->reference_golden[sl]) cpi->ref_frame_flags |= VP9_GOLD_FLAG;
+ if (svc->reference_altref[sl]) cpi->ref_frame_flags |= VP9_ALT_FLAG;
+}
+
+void vp9_copy_flags_ref_update_idx(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
+ int sl = svc->spatial_layer_id;
+ svc->lst_fb_idx[sl] = cpi->lst_fb_idx;
+ svc->gld_fb_idx[sl] = cpi->gld_fb_idx;
+ svc->alt_fb_idx[sl] = cpi->alt_fb_idx;
+ // For the fixed SVC mode: pass the refresh_lst/gld/alt_frame flags to the
+ // update_buffer_slot, this is needed for the GET_SVC_REF_FRAME_CONFIG api.
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ int ref;
+ for (ref = 0; ref < REF_FRAMES; ++ref) {
+ svc->update_buffer_slot[sl] &= ~(1 << ref);
+ if ((ref == svc->lst_fb_idx[sl] && cpi->refresh_last_frame) ||
+ (ref == svc->gld_fb_idx[sl] && cpi->refresh_golden_frame) ||
+ (ref == svc->alt_fb_idx[sl] && cpi->refresh_alt_ref_frame))
+ svc->update_buffer_slot[sl] |= (1 << ref);
+ }
+ }
+ // TODO(jianj): Remove these 3, deprecated.
+ svc->update_last[sl] = (uint8_t)cpi->refresh_last_frame;
+ svc->update_golden[sl] = (uint8_t)cpi->refresh_golden_frame;
+ svc->update_altref[sl] = (uint8_t)cpi->refresh_alt_ref_frame;
+
+ svc->reference_last[sl] =
+ (uint8_t)(cpi->ref_frame_flags & flag_list[LAST_FRAME]);
+ svc->reference_golden[sl] =
+ (uint8_t)(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]);
+ svc->reference_altref[sl] =
+ (uint8_t)(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]);
}
int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
int width = 0, height = 0;
+ SVC *const svc = &cpi->svc;
LAYER_CONTEXT *lc = NULL;
- if (cpi->svc.number_spatial_layers > 1) cpi->svc.use_base_mv = 1;
- cpi->svc.force_zero_mode_spatial_ref = 1;
- cpi->svc.mi_stride[cpi->svc.spatial_layer_id] = cpi->common.mi_stride;
+ svc->skip_enhancement_layer = 0;
+ if (svc->number_spatial_layers > 1) {
+ svc->use_base_mv = 1;
+ svc->use_partition_reuse = 1;
+ }
+ svc->force_zero_mode_spatial_ref = 1;
+ svc->mi_stride[svc->spatial_layer_id] = cpi->common.mi_stride;
+ svc->mi_rows[svc->spatial_layer_id] = cpi->common.mi_rows;
+ svc->mi_cols[svc->spatial_layer_id] = cpi->common.mi_cols;
- if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
+ if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
set_flags_and_fb_idx_for_temporal_mode3(cpi);
- } else if (cpi->svc.temporal_layering_mode ==
+ } else if (svc->temporal_layering_mode ==
VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) {
set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi);
- } else if (cpi->svc.temporal_layering_mode ==
- VP9E_TEMPORAL_LAYERING_MODE_0101) {
+ } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0101) {
set_flags_and_fb_idx_for_temporal_mode2(cpi);
- } else if (cpi->svc.temporal_layering_mode ==
- VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
- // In the BYPASS/flexible mode, the encoder is relying on the application
- // to specify, for each spatial layer, the flags and buffer indices for the
- // layering.
- // Note that the check (cpi->ext_refresh_frame_flags_pending == 0) is
- // needed to support the case where the frame flags may be passed in via
- // vpx_codec_encode(), which can be used for the temporal-only svc case.
- // TODO(marpan): Consider adding an enc_config parameter to better handle
- // this case.
- if (cpi->ext_refresh_frame_flags_pending == 0) {
- int sl;
- cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
- sl = cpi->svc.spatial_layer_id;
- vp9_apply_encoding_flags(cpi, cpi->svc.ext_frame_flags[sl]);
- cpi->lst_fb_idx = cpi->svc.ext_lst_fb_idx[sl];
- cpi->gld_fb_idx = cpi->svc.ext_gld_fb_idx[sl];
- cpi->alt_fb_idx = cpi->svc.ext_alt_fb_idx[sl];
- }
- }
-
- if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode)
- cpi->svc.rc_drop_superframe = 0;
-
- lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id *
- cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id];
+ } else if (svc->temporal_layering_mode ==
+ VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->use_set_ref_frame_config) {
+ set_flags_and_fb_idx_bypass_via_set_ref_frame_config(cpi);
+ }
+
+ if (cpi->lst_fb_idx == svc->buffer_gf_temporal_ref[0].idx ||
+ cpi->gld_fb_idx == svc->buffer_gf_temporal_ref[0].idx ||
+ cpi->alt_fb_idx == svc->buffer_gf_temporal_ref[0].idx)
+ svc->buffer_gf_temporal_ref[0].is_used = 1;
+ if (cpi->lst_fb_idx == svc->buffer_gf_temporal_ref[1].idx ||
+ cpi->gld_fb_idx == svc->buffer_gf_temporal_ref[1].idx ||
+ cpi->alt_fb_idx == svc->buffer_gf_temporal_ref[1].idx)
+ svc->buffer_gf_temporal_ref[1].is_used = 1;
+
+ // For the fixed (non-flexible/bypass) SVC mode:
+ // If long term temporal reference is enabled at the sequence level
+ // (use_gf_temporal_ref == 1), and inter_layer is disabled (on inter-frames),
+ // we can use golden as a second temporal reference
+ // (since the spatial/inter-layer reference is disabled).
+ // We check that the fb_idx for this reference (buffer_gf_temporal_ref.idx) is
+ // unused (slot 7 and 6 should be available for 3-3 layer system).
+ // For now usage of this second temporal reference will only be used for
+ // highest and next to highest spatial layer (i.e., top and middle layer for
+ // 3 spatial layers).
+ svc->use_gf_temporal_ref_current_layer = 0;
+ if (svc->use_gf_temporal_ref && !svc->buffer_gf_temporal_ref[0].is_used &&
+ !svc->buffer_gf_temporal_ref[1].is_used &&
+ svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->disable_inter_layer_pred != INTER_LAYER_PRED_ON &&
+ svc->number_spatial_layers <= 3 && svc->number_temporal_layers <= 3 &&
+ svc->spatial_layer_id >= svc->number_spatial_layers - 2) {
+ // Enable the second (long-term) temporal reference at the frame-level.
+ svc->use_gf_temporal_ref_current_layer = 1;
+ }
+
+ // Check if current superframe has any layer sync, only check once on
+ // base layer.
+ if (svc->spatial_layer_id == 0) {
+ int sl = 0;
+ // Default is no sync.
+ svc->superframe_has_layer_sync = 0;
+ for (sl = 0; sl < svc->number_spatial_layers; ++sl) {
+ if (cpi->svc.spatial_layer_sync[sl]) svc->superframe_has_layer_sync = 1;
+ }
+ }
+
+ // Reset the drop flags for all spatial layers, on the base layer.
+ if (svc->spatial_layer_id == 0) {
+ vp9_zero(svc->drop_spatial_layer);
+ // TODO(jianj/marpan): Investigate why setting svc->lst/gld/alt_fb_idx
+ // causes an issue with frame dropping and temporal layers, when the frame
+ // flags are passed via the encode call (bypass mode). Issue is that we're
+ // resetting ext_refresh_frame_flags_pending to 0 on frame drops.
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ memset(&svc->lst_fb_idx, -1, sizeof(svc->lst_fb_idx));
+ memset(&svc->gld_fb_idx, -1, sizeof(svc->lst_fb_idx));
+ memset(&svc->alt_fb_idx, -1, sizeof(svc->lst_fb_idx));
+ // These are set by API before the superframe is encoded and they are
+ // passed to encoder layer by layer. Don't reset them on layer 0 in bypass
+ // mode.
+ vp9_zero(svc->update_buffer_slot);
+ vp9_zero(svc->reference_last);
+ vp9_zero(svc->reference_golden);
+ vp9_zero(svc->reference_altref);
+ // TODO(jianj): Remove these 3, deprecated.
+ vp9_zero(svc->update_last);
+ vp9_zero(svc->update_golden);
+ vp9_zero(svc->update_altref);
+ }
+ }
+
+ lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers +
+ svc->temporal_layer_id];
// Setting the worst/best_quality via the encoder control: SET_SVC_PARAMETERS,
// only for non-BYPASS mode for now.
- if (cpi->svc.temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS ||
+ svc->use_set_ref_frame_config) {
RATE_CONTROL *const lrc = &lc->rc;
lrc->worst_quality = vp9_quantizer_to_qindex(lc->max_q);
lrc->best_quality = vp9_quantizer_to_qindex(lc->min_q);
@@ -657,157 +836,76 @@ int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) {
lc->scaling_factor_num, lc->scaling_factor_den, &width,
&height);
- // For resolutions <= VGA: set phase of the filter = 8 (for symmetric
- // averaging filter), use bilinear for now.
- if (width * height <= 640 * 480) {
- cpi->svc.downsample_filter_type[cpi->svc.spatial_layer_id] = BILINEAR;
- cpi->svc.downsample_filter_phase[cpi->svc.spatial_layer_id] = 8;
- }
-
- // The usage of use_base_mv assumes down-scale of 2x2. For now, turn off use
- // of base motion vectors if spatial scale factors for any layers are not 2,
+ // Use Eightap_smooth for low resolutions.
+ if (width * height <= 320 * 240)
+ svc->downsample_filter_type[svc->spatial_layer_id] = EIGHTTAP_SMOOTH;
+ // For scale factors > 0.75, set the phase to 0 (aligns decimated pixel
+ // to source pixel).
+ lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers +
+ svc->temporal_layer_id];
+ if (lc->scaling_factor_num > (3 * lc->scaling_factor_den) >> 2)
+ svc->downsample_filter_phase[svc->spatial_layer_id] = 0;
+
+ // The usage of use_base_mv or partition_reuse assumes down-scale of 2x2.
+ // For now, turn off use of base motion vectors and partition reuse if the
+ // spatial scale factors for any layers are not 2,
// keep the case of 3 spatial layers with scale factor of 4x4 for base layer.
// TODO(marpan): Fix this to allow for use_base_mv for scale factors != 2.
- if (cpi->svc.number_spatial_layers > 1) {
+ if (svc->number_spatial_layers > 1) {
int sl;
- for (sl = 0; sl < cpi->svc.number_spatial_layers - 1; ++sl) {
- lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id];
+ for (sl = 0; sl < svc->number_spatial_layers - 1; ++sl) {
+ lc = &svc->layer_context[sl * svc->number_temporal_layers +
+ svc->temporal_layer_id];
if ((lc->scaling_factor_num != lc->scaling_factor_den >> 1) &&
!(lc->scaling_factor_num == lc->scaling_factor_den >> 2 && sl == 0 &&
- cpi->svc.number_spatial_layers == 3)) {
- cpi->svc.use_base_mv = 0;
+ svc->number_spatial_layers == 3)) {
+ svc->use_base_mv = 0;
+ svc->use_partition_reuse = 0;
break;
}
}
+ // For non-zero spatial layers: if the previous spatial layer was dropped
+ // disable the base_mv and partition_reuse features.
+ if (svc->spatial_layer_id > 0 &&
+ svc->drop_spatial_layer[svc->spatial_layer_id - 1]) {
+ svc->use_base_mv = 0;
+ svc->use_partition_reuse = 0;
+ }
}
- cpi->svc.non_reference_frame = 0;
+ svc->non_reference_frame = 0;
if (cpi->common.frame_type != KEY_FRAME && !cpi->ext_refresh_last_frame &&
- !cpi->ext_refresh_golden_frame && !cpi->ext_refresh_alt_ref_frame) {
- cpi->svc.non_reference_frame = 1;
+ !cpi->ext_refresh_golden_frame && !cpi->ext_refresh_alt_ref_frame)
+ svc->non_reference_frame = 1;
+ // For non-flexible mode, where update_buffer_slot is used, need to check if
+ // all buffer slots are not refreshed.
+ if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+ if (svc->update_buffer_slot[svc->spatial_layer_id] != 0)
+ svc->non_reference_frame = 0;
}
- if (vp9_set_size_literal(cpi, width, height) != 0)
- return VPX_CODEC_INVALID_PARAM;
-
- return 0;
-}
-
-#if CONFIG_SPATIAL_SVC
-#define SMALL_FRAME_FB_IDX 7
-
-int vp9_svc_start_frame(VP9_COMP *const cpi) {
- int width = 0, height = 0;
- LAYER_CONTEXT *lc;
- struct lookahead_entry *buf;
- int count = 1 << (cpi->svc.number_temporal_layers - 1);
-
- cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode;
- lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id];
-
- cpi->svc.temporal_layer_id = 0;
- while ((lc->current_video_frame_in_layer % count) != 0) {
- ++cpi->svc.temporal_layer_id;
- count >>= 1;
- }
-
- cpi->ref_frame_flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG;
-
- cpi->lst_fb_idx = cpi->svc.spatial_layer_id;
-
- if (cpi->svc.spatial_layer_id == 0)
- cpi->gld_fb_idx =
- (lc->gold_ref_idx >= 0) ? lc->gold_ref_idx : cpi->lst_fb_idx;
- else
- cpi->gld_fb_idx = cpi->svc.spatial_layer_id - 1;
-
- if (lc->current_video_frame_in_layer == 0) {
- if (cpi->svc.spatial_layer_id >= 2) {
- cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2;
- } else {
- cpi->alt_fb_idx = cpi->lst_fb_idx;
- cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_ALT_FLAG);
- }
- } else {
- if (cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id]) {
- cpi->alt_fb_idx = lc->alt_ref_idx;
- if (!lc->has_alt_frame) cpi->ref_frame_flags &= (~VP9_ALT_FLAG);
- } else {
- // Find a proper alt_fb_idx for layers that don't have alt ref frame
- if (cpi->svc.spatial_layer_id == 0) {
- cpi->alt_fb_idx = cpi->lst_fb_idx;
- } else {
- LAYER_CONTEXT *lc_lower =
- &cpi->svc.layer_context[cpi->svc.spatial_layer_id - 1];
-
- if (cpi->oxcf.ss_enable_auto_arf[cpi->svc.spatial_layer_id - 1] &&
- lc_lower->alt_ref_source != NULL)
- cpi->alt_fb_idx = lc_lower->alt_ref_idx;
- else if (cpi->svc.spatial_layer_id >= 2)
- cpi->alt_fb_idx = cpi->svc.spatial_layer_id - 2;
- else
- cpi->alt_fb_idx = cpi->lst_fb_idx;
- }
- }
+ if (svc->spatial_layer_id == 0) {
+ svc->high_source_sad_superframe = 0;
+ svc->high_num_blocks_with_motion = 0;
}
- get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height,
- lc->scaling_factor_num, lc->scaling_factor_den, &width,
- &height);
-
- // Workaround for multiple frame contexts. In some frames we can't use prev_mi
- // since its previous frame could be changed during decoding time. The idea is
- // we put a empty invisible frame in front of them, then we will not use
- // prev_mi when encoding these frames.
-
- buf = vp9_lookahead_peek(cpi->lookahead, 0);
- if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2 &&
- cpi->svc.encode_empty_frame_state == NEED_TO_ENCODE &&
- lc->rc.frames_to_key != 0 &&
- !(buf != NULL && (buf->flags & VPX_EFLAG_FORCE_KF))) {
- if ((cpi->svc.number_temporal_layers > 1 &&
- cpi->svc.temporal_layer_id < cpi->svc.number_temporal_layers - 1) ||
- (cpi->svc.number_spatial_layers > 1 &&
- cpi->svc.spatial_layer_id == 0)) {
- struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, 0);
-
- if (buf != NULL) {
- cpi->svc.empty_frame.ts_start = buf->ts_start;
- cpi->svc.empty_frame.ts_end = buf->ts_end;
- cpi->svc.encode_empty_frame_state = ENCODING;
- cpi->common.show_frame = 0;
- cpi->ref_frame_flags = 0;
- cpi->common.frame_type = INTER_FRAME;
- cpi->lst_fb_idx = cpi->gld_fb_idx = cpi->alt_fb_idx =
- SMALL_FRAME_FB_IDX;
-
- if (cpi->svc.encode_intra_empty_frame != 0) cpi->common.intra_only = 1;
-
- width = SMALL_FRAME_WIDTH;
- height = SMALL_FRAME_HEIGHT;
- }
- }
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->last_layer_dropped[svc->spatial_layer_id] &&
+ svc->fb_idx_upd_tl0[svc->spatial_layer_id] != -1 &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ // For fixed/non-flexible mode, if the previous frame (same spatial layer
+ // from previous superframe) was dropped, make sure the lst_fb_idx
+ // for this frame corresponds to the buffer index updated on (last) encoded
+ // TL0 frame (with same spatial layer).
+ cpi->lst_fb_idx = svc->fb_idx_upd_tl0[svc->spatial_layer_id];
}
- cpi->oxcf.worst_allowed_q = vp9_quantizer_to_qindex(lc->max_q);
- cpi->oxcf.best_allowed_q = vp9_quantizer_to_qindex(lc->min_q);
-
- vp9_change_config(cpi, &cpi->oxcf);
-
if (vp9_set_size_literal(cpi, width, height) != 0)
return VPX_CODEC_INVALID_PARAM;
- vp9_set_high_precision_mv(cpi, 1);
-
- cpi->alt_ref_source = get_layer_context(cpi)->alt_ref_source;
-
return 0;
}
-#undef SMALL_FRAME_FB_IDX
-#endif // CONFIG_SPATIAL_SVC
-
struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi,
struct lookahead_ctx *ctx,
int drain) {
@@ -840,7 +938,7 @@ void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) {
}
// Reset on key frame: reset counters, references and buffer updates.
-void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
+void vp9_svc_reset_temporal_layers(VP9_COMP *const cpi, int is_key) {
int sl, tl;
SVC *const svc = &cpi->svc;
LAYER_CONTEXT *lc = NULL;
@@ -848,7 +946,7 @@ void vp9_svc_reset_key_frame(VP9_COMP *const cpi) {
for (tl = 0; tl < svc->number_temporal_layers; ++tl) {
lc = &cpi->svc.layer_context[sl * svc->number_temporal_layers + tl];
lc->current_video_frame_in_layer = 0;
- lc->frames_from_key_frame = 0;
+ if (is_key) lc->frames_from_key_frame = 0;
}
}
if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) {
@@ -887,3 +985,245 @@ void vp9_svc_check_reset_layer_rc_flag(VP9_COMP *const cpi) {
}
}
}
+
+void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
+ // Check for disabling inter-layer (spatial) prediction, if
+ // svc.disable_inter_layer_pred is set. If the previous spatial layer was
+ // dropped then disable the prediction from this (scaled) reference.
+ // For INTER_LAYER_PRED_OFF_NONKEY: inter-layer prediction is disabled
+ // on key frames or if any spatial layer is a sync layer.
+ if ((svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF_NONKEY &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame &&
+ !svc->superframe_has_layer_sync) ||
+ svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF ||
+ svc->drop_spatial_layer[svc->spatial_layer_id - 1]) {
+ MV_REFERENCE_FRAME ref_frame;
+ static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG,
+ VP9_ALT_FLAG };
+ for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
+ const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame);
+ if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) {
+ const struct scale_factors *const scale_fac =
+ &cm->frame_refs[ref_frame - 1].sf;
+ if (vp9_is_scaled(scale_fac))
+ cpi->ref_frame_flags &= (~flag_list[ref_frame]);
+ }
+ }
+ }
+ // For fixed/non-flexible SVC: check for disabling inter-layer prediction.
+ // If the reference for inter-layer prediction (the reference that is scaled)
+ // is not the previous spatial layer from the same superframe, then we disable
+ // inter-layer prediction. Only need to check when inter_layer prediction is
+ // not set to OFF mode.
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->disable_inter_layer_pred != INTER_LAYER_PRED_OFF) {
+ // We only use LAST and GOLDEN for prediction in real-time mode, so we
+ // check both here.
+ MV_REFERENCE_FRAME ref_frame;
+ for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ref_frame++) {
+ struct scale_factors *scale_fac = &cm->frame_refs[ref_frame - 1].sf;
+ if (vp9_is_scaled(scale_fac)) {
+ // If this reference was updated on the previous spatial layer of the
+ // current superframe, then we keep this reference (don't disable).
+ // Otherwise we disable the inter-layer prediction.
+ // This condition is verified by checking if the current frame buffer
+ // index is equal to any of the slots for the previous spatial layer,
+ // and if so, check if that slot was updated/refreshed. If that is the
+ // case, then this reference is valid for inter-layer prediction under
+ // the mode INTER_LAYER_PRED_ON_CONSTRAINED.
+ int fb_idx =
+ ref_frame == LAST_FRAME ? cpi->lst_fb_idx : cpi->gld_fb_idx;
+ int ref_flag = ref_frame == LAST_FRAME ? VP9_LAST_FLAG : VP9_GOLD_FLAG;
+ int sl = svc->spatial_layer_id;
+ int disable = 1;
+ if (fb_idx < 0) continue;
+ if ((fb_idx == svc->lst_fb_idx[sl - 1] &&
+ (svc->update_buffer_slot[sl - 1] & (1 << fb_idx))) ||
+ (fb_idx == svc->gld_fb_idx[sl - 1] &&
+ (svc->update_buffer_slot[sl - 1] & (1 << fb_idx))) ||
+ (fb_idx == svc->alt_fb_idx[sl - 1] &&
+ (svc->update_buffer_slot[sl - 1] & (1 << fb_idx))))
+ disable = 0;
+ if (disable) cpi->ref_frame_flags &= (~ref_flag);
+ }
+ }
+ }
+}
+
+void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ // For fixed/non-flexible mode, the following constraint are expected,
+ // when inter-layer prediciton is on (default).
+ if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON &&
+ svc->framedrop_mode != LAYER_DROP) {
+ if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ // On non-key frames: LAST is always temporal reference, GOLDEN is
+ // spatial reference.
+ if (svc->temporal_layer_id == 0)
+ // Base temporal only predicts from base temporal.
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == 0);
+ else
+ // Non-base temporal only predicts from lower temporal layer.
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] <
+ svc->temporal_layer_id);
+ if (svc->spatial_layer_id > 0 && cpi->ref_frame_flags & VP9_GOLD_FLAG &&
+ svc->spatial_layer_id > svc->first_spatial_layer_to_encode) {
+ // Non-base spatial only predicts from lower spatial layer with same
+ // temporal_id.
+ assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] ==
+ svc->temporal_layer_id);
+ }
+ } else if (svc->spatial_layer_id > 0 &&
+ svc->spatial_layer_id > svc->first_spatial_layer_to_encode) {
+ // Only 1 reference for frame whose base is key; reference may be LAST
+ // or GOLDEN, so we check both.
+ if (cpi->ref_frame_flags & VP9_LAST_FLAG) {
+ assert(svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] ==
+ svc->temporal_layer_id);
+ } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) {
+ assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] ==
+ svc->spatial_layer_id - 1);
+ assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] ==
+ svc->temporal_layer_id);
+ }
+ }
+ } else if (svc->use_gf_temporal_ref_current_layer &&
+ !svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ // For the usage of golden as second long term reference: the
+ // temporal_layer_id of that reference must be base temporal layer 0, and
+ // spatial_layer_id of that reference must be same as current
+ // spatial_layer_id. If not, disable feature.
+ // TODO(marpan): Investigate when this can happen, and maybe put this check
+ // and reset in a different place.
+ if (svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] !=
+ svc->spatial_layer_id ||
+ svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] != 0)
+ svc->use_gf_temporal_ref_current_layer = 0;
+ }
+}
+
+#if CONFIG_VP9_TEMPORAL_DENOISING
+int vp9_denoise_svc_non_key(VP9_COMP *const cpi) {
+ int layer =
+ LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id,
+ cpi->svc.number_temporal_layers);
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
+ return denoise_svc(cpi) && !lc->is_key_frame;
+}
+#endif
+
+void vp9_svc_check_spatial_layer_sync(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ // Only for superframes whose base is not key, as those are
+ // already sync frames.
+ if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) {
+ if (svc->spatial_layer_id == 0) {
+ // On base spatial layer: if the current superframe has a layer sync then
+ // reset the pattern counters and reset to base temporal layer.
+ if (svc->superframe_has_layer_sync)
+ vp9_svc_reset_temporal_layers(cpi, cpi->common.frame_type == KEY_FRAME);
+ }
+ // If the layer sync is set for this current spatial layer then
+ // disable the temporal reference.
+ if (svc->spatial_layer_id > 0 &&
+ svc->spatial_layer_sync[svc->spatial_layer_id]) {
+ cpi->ref_frame_flags &= (~VP9_LAST_FLAG);
+ if (svc->use_gf_temporal_ref_current_layer) {
+ int index = svc->spatial_layer_id;
+ // If golden is used as second reference: need to remove it from
+ // prediction, reset refresh period to 0, and update the reference.
+ svc->use_gf_temporal_ref_current_layer = 0;
+ cpi->rc.baseline_gf_interval = 0;
+ cpi->rc.frames_till_gf_update_due = 0;
+ // On layer sync frame we must update the buffer index used for long
+ // term reference. Use the alt_ref since it is not used or updated on
+ // sync frames.
+ if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1;
+ assert(index >= 0);
+ cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx;
+ cpi->ext_refresh_alt_ref_frame = 1;
+ }
+ }
+ }
+}
+
+void vp9_svc_update_ref_frame_buffer_idx(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ // Update the usage of frame buffer index for base spatial layers.
+ if (svc->spatial_layer_id == 0) {
+ if ((cpi->ref_frame_flags & VP9_LAST_FLAG) || cpi->refresh_last_frame)
+ svc->fb_idx_base[cpi->lst_fb_idx] = 1;
+ if ((cpi->ref_frame_flags & VP9_GOLD_FLAG) || cpi->refresh_golden_frame)
+ svc->fb_idx_base[cpi->gld_fb_idx] = 1;
+ if ((cpi->ref_frame_flags & VP9_ALT_FLAG) || cpi->refresh_alt_ref_frame)
+ svc->fb_idx_base[cpi->alt_fb_idx] = 1;
+ }
+}
+
+static void vp9_svc_update_ref_frame_bypass_mode(VP9_COMP *const cpi) {
+ // For non-flexible/bypass SVC mode: check for refreshing other buffer
+ // slots.
+ SVC *const svc = &cpi->svc;
+ VP9_COMMON *const cm = &cpi->common;
+ BufferPool *const pool = cm->buffer_pool;
+ int i;
+ for (i = 0; i < REF_FRAMES; i++) {
+ if (cm->frame_type == KEY_FRAME ||
+ svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i)) {
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx);
+ svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id;
+ }
+ }
+}
+
+void vp9_svc_update_ref_frame(VP9_COMP *const cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ SVC *const svc = &cpi->svc;
+ BufferPool *const pool = cm->buffer_pool;
+
+ if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS &&
+ svc->use_set_ref_frame_config) {
+ vp9_svc_update_ref_frame_bypass_mode(cpi);
+ } else if (cm->frame_type == KEY_FRAME) {
+ // Keep track of frame index for each reference frame.
+ int i;
+ // On key frame update all reference frame slots.
+ for (i = 0; i < REF_FRAMES; i++) {
+ svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id;
+ // LAST/GOLDEN/ALTREF is already updated above.
+ if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx && i != cpi->alt_fb_idx)
+ ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx);
+ }
+ } else {
+ if (cpi->refresh_last_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id;
+ }
+ if (cpi->refresh_golden_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] = svc->temporal_layer_id;
+ }
+ if (cpi->refresh_alt_ref_frame) {
+ svc->fb_idx_spatial_layer_id[cpi->alt_fb_idx] = svc->spatial_layer_id;
+ svc->fb_idx_temporal_layer_id[cpi->alt_fb_idx] = svc->temporal_layer_id;
+ }
+ }
+ // Copy flags from encoder to SVC struct.
+ vp9_copy_flags_ref_update_idx(cpi);
+ vp9_svc_update_ref_frame_buffer_idx(cpi);
+}
+
+void vp9_svc_adjust_frame_rate(VP9_COMP *const cpi) {
+ int64_t this_duration =
+ cpi->svc.timebase_fac * cpi->svc.duration[cpi->svc.spatial_layer_id];
+ vp9_new_framerate(cpi, 10000000.0 / this_duration);
+}
diff --git a/libvpx/vp9/encoder/vp9_svc_layercontext.h b/libvpx/vp9/encoder/vp9_svc_layercontext.h
index b7cdfd962..c25644617 100644
--- a/libvpx/vp9/encoder/vp9_svc_layercontext.h
+++ b/libvpx/vp9/encoder/vp9_svc_layercontext.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_
-#define VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_
+#ifndef VPX_VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_
+#define VPX_VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_
#include "vpx/vpx_encoder.h"
@@ -19,6 +19,24 @@
extern "C" {
#endif
+typedef enum {
+ // Inter-layer prediction is on on all frames.
+ INTER_LAYER_PRED_ON,
+ // Inter-layer prediction is off on all frames.
+ INTER_LAYER_PRED_OFF,
+ // Inter-layer prediction is off on non-key frames and non-sync frames.
+ INTER_LAYER_PRED_OFF_NONKEY,
+ // Inter-layer prediction is on on all frames, but constrained such
+ // that any layer S (> 0) can only predict from previous spatial
+ // layer S-1, from the same superframe.
+ INTER_LAYER_PRED_ON_CONSTRAINED
+} INTER_LAYER_PRED;
+
+typedef struct BUFFER_LONGTERM_REF {
+ int idx;
+ int is_used;
+} BUFFER_LONGTERM_REF;
+
typedef struct {
RATE_CONTROL rc;
int target_bandwidth;
@@ -42,10 +60,14 @@ typedef struct {
size_t layer_size;
struct vpx_psnr_pkt psnr_pkt;
// Cyclic refresh parameters (aq-mode=3), that need to be updated per-frame.
+ // TODO(jianj/marpan): Is it better to use the full cyclic refresh struct.
int sb_index;
signed char *map;
uint8_t *last_coded_q_map;
uint8_t *consec_zero_mv;
+ int actual_num_seg1_blocks;
+ int actual_num_seg2_blocks;
+ int counter_encode_maxq_scene_change;
uint8_t speed;
} LAYER_CONTEXT;
@@ -56,8 +78,6 @@ typedef struct SVC {
int number_temporal_layers;
int spatial_layer_to_encode;
- int first_spatial_layer_to_encode;
- int rc_drop_superframe;
// Workaround for multiple frame contexts
enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state;
@@ -81,14 +101,20 @@ typedef struct SVC {
// Frame flags and buffer indexes for each spatial layer, set by the
// application (external settings).
int ext_frame_flags[VPX_MAX_LAYERS];
- int ext_lst_fb_idx[VPX_MAX_LAYERS];
- int ext_gld_fb_idx[VPX_MAX_LAYERS];
- int ext_alt_fb_idx[VPX_MAX_LAYERS];
- int ref_frame_index[REF_FRAMES];
+ int lst_fb_idx[VPX_MAX_LAYERS];
+ int gld_fb_idx[VPX_MAX_LAYERS];
+ int alt_fb_idx[VPX_MAX_LAYERS];
int force_zero_mode_spatial_ref;
+ // Sequence level flag to enable second (long term) temporal reference.
+ int use_gf_temporal_ref;
+ // Frame level flag to enable second (long term) temporal reference.
+ int use_gf_temporal_ref_current_layer;
+ // Allow second reference for at most 2 top highest resolution layers.
+ BUFFER_LONGTERM_REF buffer_gf_temporal_ref[2];
int current_superframe;
int non_reference_frame;
int use_base_mv;
+ int use_partition_reuse;
// Used to control the downscaling filter for source scaling, for 1 pass CBR.
// downsample_filter_phase: = 0 will do sub-sampling (no weighted average),
// = 8 will center the target pixel and get a symmetric averaging filter.
@@ -99,8 +125,70 @@ typedef struct SVC {
BLOCK_SIZE *prev_partition_svc;
int mi_stride[VPX_MAX_LAYERS];
+ int mi_rows[VPX_MAX_LAYERS];
+ int mi_cols[VPX_MAX_LAYERS];
int first_layer_denoise;
+
+ int skip_enhancement_layer;
+
+ int lower_layer_qindex;
+
+ int last_layer_dropped[VPX_MAX_LAYERS];
+ int drop_spatial_layer[VPX_MAX_LAYERS];
+ int framedrop_thresh[VPX_MAX_LAYERS];
+ int drop_count[VPX_MAX_LAYERS];
+ int max_consec_drop;
+ SVC_LAYER_DROP_MODE framedrop_mode;
+
+ INTER_LAYER_PRED disable_inter_layer_pred;
+
+ // Flag to indicate scene change and high num of motion blocks at current
+ // superframe, scene detection is currently checked for each superframe prior
+ // to encoding, on the full resolution source.
+ int high_source_sad_superframe;
+ int high_num_blocks_with_motion;
+
+ // Flags used to get SVC pattern info.
+ int update_buffer_slot[VPX_SS_MAX_LAYERS];
+ uint8_t reference_last[VPX_SS_MAX_LAYERS];
+ uint8_t reference_golden[VPX_SS_MAX_LAYERS];
+ uint8_t reference_altref[VPX_SS_MAX_LAYERS];
+ // TODO(jianj): Remove these last 3, deprecated.
+ uint8_t update_last[VPX_SS_MAX_LAYERS];
+ uint8_t update_golden[VPX_SS_MAX_LAYERS];
+ uint8_t update_altref[VPX_SS_MAX_LAYERS];
+
+ // Keep track of the frame buffer index updated/refreshed on the base
+ // temporal superframe.
+ int fb_idx_upd_tl0[VPX_SS_MAX_LAYERS];
+
+ // Keep track of the spatial and temporal layer id of the frame that last
+ // updated the frame buffer index.
+ uint8_t fb_idx_spatial_layer_id[REF_FRAMES];
+ uint8_t fb_idx_temporal_layer_id[REF_FRAMES];
+
+ int spatial_layer_sync[VPX_SS_MAX_LAYERS];
+ uint8_t set_intra_only_frame;
+ uint8_t previous_frame_is_intra_only;
+ uint8_t superframe_has_layer_sync;
+
+ uint8_t fb_idx_base[REF_FRAMES];
+
+ int use_set_ref_frame_config;
+
+ int temporal_layer_id_per_spatial[VPX_SS_MAX_LAYERS];
+
+ int first_spatial_layer_to_encode;
+
+ // Parameters for allowing framerate per spatial layer, and buffer
+ // update based on timestamps.
+ int64_t duration[VPX_SS_MAX_LAYERS];
+ int64_t timebase_fac;
+ int64_t time_stamp_superframe;
+ int64_t time_stamp_prev[VPX_SS_MAX_LAYERS];
+
+ int num_encoded_top_layer;
} SVC;
struct VP9_COMP;
@@ -148,16 +236,34 @@ struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi,
// Start a frame and initialize svc parameters
int vp9_svc_start_frame(struct VP9_COMP *const cpi);
+#if CONFIG_VP9_TEMPORAL_DENOISING
+int vp9_denoise_svc_non_key(struct VP9_COMP *const cpi);
+#endif
+
+void vp9_copy_flags_ref_update_idx(struct VP9_COMP *const cpi);
+
int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi);
void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi);
-void vp9_svc_reset_key_frame(struct VP9_COMP *const cpi);
+void vp9_svc_reset_temporal_layers(struct VP9_COMP *const cpi, int is_key);
void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi);
+void vp9_svc_constrain_inter_layer_pred(struct VP9_COMP *const cpi);
+
+void vp9_svc_assert_constraints_pattern(struct VP9_COMP *const cpi);
+
+void vp9_svc_check_spatial_layer_sync(struct VP9_COMP *const cpi);
+
+void vp9_svc_update_ref_frame_buffer_idx(struct VP9_COMP *const cpi);
+
+void vp9_svc_update_ref_frame(struct VP9_COMP *const cpi);
+
+void vp9_svc_adjust_frame_rate(struct VP9_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_SVC_LAYERCONTEXT_
+#endif // VPX_VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_
diff --git a/libvpx/vp9/encoder/vp9_temporal_filter.c b/libvpx/vp9/encoder/vp9_temporal_filter.c
index 2758c42ae..cd340c394 100644
--- a/libvpx/vp9/encoder/vp9_temporal_filter.c
+++ b/libvpx/vp9/encoder/vp9_temporal_filter.c
@@ -38,53 +38,141 @@ static int fixed_divide[512];
static void temporal_filter_predictors_mb_c(
MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr,
int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col,
- uint8_t *pred, struct scale_factors *scale, int x, int y) {
+ uint8_t *pred, struct scale_factors *scale, int x, int y, MV *blk_mvs,
+ int use_32x32) {
const int which_mv = 0;
- const MV mv = { mv_row, mv_col };
const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP];
+ int i, j, k = 0, ys = (BH >> 1), xs = (BW >> 1);
enum mv_precision mv_precision_uv;
int uv_stride;
- if (uv_block_width == 8) {
+ if (uv_block_width == (BW >> 1)) {
uv_stride = (stride + 1) >> 1;
mv_precision_uv = MV_PRECISION_Q4;
} else {
uv_stride = stride;
mv_precision_uv = MV_PRECISION_Q3;
}
+#if !CONFIG_VP9_HIGHBITDEPTH
+ (void)xd;
+#endif
+ if (use_32x32) {
+ const MV mv = { mv_row, mv_col };
#if CONFIG_VP9_HIGHBITDEPTH
- if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
- vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(y_mb_ptr), stride,
- CONVERT_TO_SHORTPTR(&pred[0]), 16, &mv,
- scale, 16, 16, which_mv, kernel,
- MV_PRECISION_Q3, x, y, xd->bd);
-
- vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(u_mb_ptr), uv_stride,
- CONVERT_TO_SHORTPTR(&pred[256]),
- uv_block_width, &mv, scale, uv_block_width,
- uv_block_height, which_mv, kernel,
- mv_precision_uv, x, y, xd->bd);
-
- vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(v_mb_ptr), uv_stride,
- CONVERT_TO_SHORTPTR(&pred[512]),
- uv_block_width, &mv, scale, uv_block_width,
- uv_block_height, which_mv, kernel,
- mv_precision_uv, x, y, xd->bd);
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(y_mb_ptr), stride,
+ CONVERT_TO_SHORTPTR(&pred[0]), BW, &mv,
+ scale, BW, BH, which_mv, kernel,
+ MV_PRECISION_Q3, x, y, xd->bd);
+
+ vp9_highbd_build_inter_predictor(
+ CONVERT_TO_SHORTPTR(u_mb_ptr), uv_stride,
+ CONVERT_TO_SHORTPTR(&pred[BLK_PELS]), uv_block_width, &mv, scale,
+ uv_block_width, uv_block_height, which_mv, kernel, mv_precision_uv, x,
+ y, xd->bd);
+
+ vp9_highbd_build_inter_predictor(
+ CONVERT_TO_SHORTPTR(v_mb_ptr), uv_stride,
+ CONVERT_TO_SHORTPTR(&pred[(BLK_PELS << 1)]), uv_block_width, &mv,
+ scale, uv_block_width, uv_block_height, which_mv, kernel,
+ mv_precision_uv, x, y, xd->bd);
+ return;
+ }
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], BW, &mv, scale, BW,
+ BH, which_mv, kernel, MV_PRECISION_Q3, x, y);
+
+ vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[BLK_PELS],
+ uv_block_width, &mv, scale, uv_block_width,
+ uv_block_height, which_mv, kernel,
+ mv_precision_uv, x, y);
+
+ vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[(BLK_PELS << 1)],
+ uv_block_width, &mv, scale, uv_block_width,
+ uv_block_height, which_mv, kernel,
+ mv_precision_uv, x, y);
return;
}
+
+ // While use_32x32 = 0, construct the 32x32 predictor using 4 16x16
+ // predictors.
+ // Y predictor
+ for (i = 0; i < BH; i += ys) {
+ for (j = 0; j < BW; j += xs) {
+ const MV mv = blk_mvs[k];
+ const int y_offset = i * stride + j;
+ const int p_offset = i * BW + j;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_highbd_build_inter_predictor(
+ CONVERT_TO_SHORTPTR(y_mb_ptr + y_offset), stride,
+ CONVERT_TO_SHORTPTR(&pred[p_offset]), BW, &mv, scale, xs, ys,
+ which_mv, kernel, MV_PRECISION_Q3, x, y, xd->bd);
+ } else {
+ vp9_build_inter_predictor(y_mb_ptr + y_offset, stride, &pred[p_offset],
+ BW, &mv, scale, xs, ys, which_mv, kernel,
+ MV_PRECISION_Q3, x, y);
+ }
+#else
+ vp9_build_inter_predictor(y_mb_ptr + y_offset, stride, &pred[p_offset],
+ BW, &mv, scale, xs, ys, which_mv, kernel,
+ MV_PRECISION_Q3, x, y);
#endif // CONFIG_VP9_HIGHBITDEPTH
- (void)xd;
- vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], 16, &mv, scale, 16, 16,
- which_mv, kernel, MV_PRECISION_Q3, x, y);
+ k++;
+ }
+ }
+
+ // U and V predictors
+ ys = (uv_block_height >> 1);
+ xs = (uv_block_width >> 1);
+ k = 0;
- vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[256], uv_block_width,
- &mv, scale, uv_block_width, uv_block_height,
- which_mv, kernel, mv_precision_uv, x, y);
+ for (i = 0; i < uv_block_height; i += ys) {
+ for (j = 0; j < uv_block_width; j += xs) {
+ const MV mv = blk_mvs[k];
+ const int uv_offset = i * uv_stride + j;
+ const int p_offset = i * uv_block_width + j;
- vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[512], uv_block_width,
- &mv, scale, uv_block_width, uv_block_height,
- which_mv, kernel, mv_precision_uv, x, y);
+#if CONFIG_VP9_HIGHBITDEPTH
+ if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
+ vp9_highbd_build_inter_predictor(
+ CONVERT_TO_SHORTPTR(u_mb_ptr + uv_offset), uv_stride,
+ CONVERT_TO_SHORTPTR(&pred[BLK_PELS + p_offset]), uv_block_width,
+ &mv, scale, xs, ys, which_mv, kernel, mv_precision_uv, x, y,
+ xd->bd);
+
+ vp9_highbd_build_inter_predictor(
+ CONVERT_TO_SHORTPTR(v_mb_ptr + uv_offset), uv_stride,
+ CONVERT_TO_SHORTPTR(&pred[(BLK_PELS << 1) + p_offset]),
+ uv_block_width, &mv, scale, xs, ys, which_mv, kernel,
+ mv_precision_uv, x, y, xd->bd);
+ } else {
+ vp9_build_inter_predictor(u_mb_ptr + uv_offset, uv_stride,
+ &pred[BLK_PELS + p_offset], uv_block_width,
+ &mv, scale, xs, ys, which_mv, kernel,
+ mv_precision_uv, x, y);
+
+ vp9_build_inter_predictor(v_mb_ptr + uv_offset, uv_stride,
+ &pred[(BLK_PELS << 1) + p_offset],
+ uv_block_width, &mv, scale, xs, ys, which_mv,
+ kernel, mv_precision_uv, x, y);
+ }
+#else
+ vp9_build_inter_predictor(u_mb_ptr + uv_offset, uv_stride,
+ &pred[BLK_PELS + p_offset], uv_block_width, &mv,
+ scale, xs, ys, which_mv, kernel,
+ mv_precision_uv, x, y);
+
+ vp9_build_inter_predictor(v_mb_ptr + uv_offset, uv_stride,
+ &pred[(BLK_PELS << 1) + p_offset],
+ uv_block_width, &mv, scale, xs, ys, which_mv,
+ kernel, mv_precision_uv, x, y);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ k++;
+ }
+ }
}
void vp9_temporal_filter_init(void) {
@@ -94,6 +182,186 @@ void vp9_temporal_filter_init(void) {
for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i;
}
+static INLINE int mod_index(int sum_dist, int index, int rounding, int strength,
+ int filter_weight) {
+ int mod = (sum_dist * 3) / index;
+ mod += rounding;
+ mod >>= strength;
+
+ mod = VPXMIN(16, mod);
+
+ mod = 16 - mod;
+ mod *= filter_weight;
+
+ return mod;
+}
+
+static INLINE int get_filter_weight(unsigned int i, unsigned int j,
+ unsigned int block_height,
+ unsigned int block_width, int *blk_fw,
+ int use_32x32) {
+ int filter_weight = 0;
+
+ if (use_32x32)
+ // blk_fw[0] ~ blk_fw[3] are the same.
+ return blk_fw[0];
+
+ if (i < block_height / 2) {
+ if (j < block_width / 2)
+ filter_weight = blk_fw[0];
+ else
+ filter_weight = blk_fw[1];
+ } else {
+ if (j < block_width / 2)
+ filter_weight = blk_fw[2];
+ else
+ filter_weight = blk_fw[3];
+ }
+ return filter_weight;
+}
+
+static void apply_temporal_filter(
+ const uint8_t *y_frame1, int y_stride, const uint8_t *y_pred,
+ int y_buf_stride, const uint8_t *u_frame1, const uint8_t *v_frame1,
+ int uv_stride, const uint8_t *u_pred, const uint8_t *v_pred,
+ int uv_buf_stride, unsigned int block_width, unsigned int block_height,
+ int ss_x, int ss_y, int strength, int *blk_fw, int use_32x32,
+ uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator,
+ uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count) {
+ unsigned int i, j, k, m;
+ int modifier;
+ const int rounding = (1 << strength) >> 1;
+ const unsigned int uv_block_width = block_width >> ss_x;
+ const unsigned int uv_block_height = block_height >> ss_y;
+ DECLARE_ALIGNED(16, uint16_t, y_diff_sse[BLK_PELS]);
+ DECLARE_ALIGNED(16, uint16_t, u_diff_sse[BLK_PELS]);
+ DECLARE_ALIGNED(16, uint16_t, v_diff_sse[BLK_PELS]);
+
+ int idx = 0, idy;
+
+ assert(strength >= 0);
+ assert(strength <= 6);
+
+ memset(y_diff_sse, 0, BLK_PELS * sizeof(uint16_t));
+ memset(u_diff_sse, 0, BLK_PELS * sizeof(uint16_t));
+ memset(v_diff_sse, 0, BLK_PELS * sizeof(uint16_t));
+
+ // Calculate diff^2 for each pixel of the 16x16 block.
+ // TODO(yunqing): the following code needs to be optimized.
+ for (i = 0; i < block_height; i++) {
+ for (j = 0; j < block_width; j++) {
+ const int16_t diff =
+ y_frame1[i * (int)y_stride + j] - y_pred[i * (int)block_width + j];
+ y_diff_sse[idx++] = diff * diff;
+ }
+ }
+ idx = 0;
+ for (i = 0; i < uv_block_height; i++) {
+ for (j = 0; j < uv_block_width; j++) {
+ const int16_t diffu =
+ u_frame1[i * uv_stride + j] - u_pred[i * uv_buf_stride + j];
+ const int16_t diffv =
+ v_frame1[i * uv_stride + j] - v_pred[i * uv_buf_stride + j];
+ u_diff_sse[idx] = diffu * diffu;
+ v_diff_sse[idx] = diffv * diffv;
+ idx++;
+ }
+ }
+
+ for (i = 0, k = 0, m = 0; i < block_height; i++) {
+ for (j = 0; j < block_width; j++) {
+ const int pixel_value = y_pred[i * y_buf_stride + j];
+ int filter_weight =
+ get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32);
+
+ // non-local mean approach
+ int y_index = 0;
+
+ const int uv_r = i >> ss_y;
+ const int uv_c = j >> ss_x;
+ modifier = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ const int row = (int)i + idy;
+ const int col = (int)j + idx;
+
+ if (row >= 0 && row < (int)block_height && col >= 0 &&
+ col < (int)block_width) {
+ modifier += y_diff_sse[row * (int)block_width + col];
+ ++y_index;
+ }
+ }
+ }
+
+ assert(y_index > 0);
+
+ modifier += u_diff_sse[uv_r * uv_block_width + uv_c];
+ modifier += v_diff_sse[uv_r * uv_block_width + uv_c];
+
+ y_index += 2;
+
+ modifier =
+ mod_index(modifier, y_index, rounding, strength, filter_weight);
+
+ y_count[k] += modifier;
+ y_accumulator[k] += modifier * pixel_value;
+
+ ++k;
+
+ // Process chroma component
+ if (!(i & ss_y) && !(j & ss_x)) {
+ const int u_pixel_value = u_pred[uv_r * uv_buf_stride + uv_c];
+ const int v_pixel_value = v_pred[uv_r * uv_buf_stride + uv_c];
+
+ // non-local mean approach
+ int cr_index = 0;
+ int u_mod = 0, v_mod = 0;
+ int y_diff = 0;
+
+ for (idy = -1; idy <= 1; ++idy) {
+ for (idx = -1; idx <= 1; ++idx) {
+ const int row = uv_r + idy;
+ const int col = uv_c + idx;
+
+ if (row >= 0 && row < (int)uv_block_height && col >= 0 &&
+ col < (int)uv_block_width) {
+ u_mod += u_diff_sse[row * uv_block_width + col];
+ v_mod += v_diff_sse[row * uv_block_width + col];
+ ++cr_index;
+ }
+ }
+ }
+
+ assert(cr_index > 0);
+
+ for (idy = 0; idy < 1 + ss_y; ++idy) {
+ for (idx = 0; idx < 1 + ss_x; ++idx) {
+ const int row = (uv_r << ss_y) + idy;
+ const int col = (uv_c << ss_x) + idx;
+ y_diff += y_diff_sse[row * (int)block_width + col];
+ ++cr_index;
+ }
+ }
+
+ u_mod += y_diff;
+ v_mod += y_diff;
+
+ u_mod = mod_index(u_mod, cr_index, rounding, strength, filter_weight);
+ v_mod = mod_index(v_mod, cr_index, rounding, strength, filter_weight);
+
+ u_count[m] += u_mod;
+ u_accumulator[m] += u_mod * u_pixel_value;
+ v_count[m] += v_mod;
+ v_accumulator[m] += v_mod * v_pixel_value;
+
+ ++m;
+ } // Complete YUV pixel
+ }
+ }
+}
+
+// TODO(any): This function is not used anymore. Should be removed.
void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride,
const uint8_t *frame2,
unsigned int block_width,
@@ -103,7 +371,7 @@ void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride,
unsigned int i, j, k;
int modifier;
int byte = 0;
- const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
+ const int rounding = (1 << strength) >> 1;
assert(strength >= 0);
assert(strength <= 6);
@@ -166,18 +434,31 @@ void vp9_temporal_filter_apply_c(const uint8_t *frame1, unsigned int stride,
void vp9_highbd_temporal_filter_apply_c(
const uint8_t *frame1_8, unsigned int stride, const uint8_t *frame2_8,
unsigned int block_width, unsigned int block_height, int strength,
- int filter_weight, uint32_t *accumulator, uint16_t *count) {
+ int *blk_fw, int use_32x32, uint32_t *accumulator, uint16_t *count) {
const uint16_t *frame1 = CONVERT_TO_SHORTPTR(frame1_8);
const uint16_t *frame2 = CONVERT_TO_SHORTPTR(frame2_8);
unsigned int i, j, k;
int modifier;
- int byte = 0;
const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
+ int diff_sse[BLK_PELS] = { 0 };
+ int this_idx = 0;
+
+ for (i = 0; i < block_height; i++) {
+ for (j = 0; j < block_width; j++) {
+ const int diff =
+ frame1[i * (int)stride + j] - frame2[i * (int)block_width + j];
+ diff_sse[this_idx++] = diff * diff;
+ }
+ }
+
+ modifier = 0;
for (i = 0, k = 0; i < block_height; i++) {
for (j = 0; j < block_width; j++, k++) {
- int pixel_value = *frame2;
- int diff_sse[9] = { 0 };
+ int pixel_value = frame2[i * (int)block_width + j];
+ int filter_weight =
+ get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32);
+
int idx, idy, index = 0;
for (idy = -1; idy <= 1; ++idy) {
@@ -187,22 +468,16 @@ void vp9_highbd_temporal_filter_apply_c(
if (row >= 0 && row < (int)block_height && col >= 0 &&
col < (int)block_width) {
- int diff = frame1[byte + idy * (int)stride + idx] -
- frame2[idy * (int)block_width + idx];
- diff_sse[index] = diff * diff;
+ modifier += diff_sse[row * (int)block_width + col];
++index;
}
}
}
assert(index > 0);
- modifier = 0;
- for (idx = 0; idx < 9; ++idx) modifier += diff_sse[idx];
-
modifier *= 3;
modifier /= index;
- ++frame2;
modifier += rounding;
modifier >>= strength;
@@ -213,24 +488,19 @@ void vp9_highbd_temporal_filter_apply_c(
count[k] += modifier;
accumulator[k] += modifier * pixel_value;
-
- byte++;
}
-
- byte += stride - block_width;
}
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
- ThreadData *td,
- uint8_t *arf_frame_buf,
- uint8_t *frame_ptr_buf,
- int stride, MV *ref_mv) {
+static uint32_t temporal_filter_find_matching_mb_c(
+ VP9_COMP *cpi, ThreadData *td, uint8_t *arf_frame_buf,
+ uint8_t *frame_ptr_buf, int stride, MV *ref_mv, MV *blk_mvs,
+ int *blk_bestsme) {
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv;
- const SEARCH_METHODS search_method = HEX;
+ const SEARCH_METHODS search_method = MESH;
int step_param;
int sadpb = x->sadperbit16;
uint32_t bestsme = UINT_MAX;
@@ -245,6 +515,7 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
// Save input state
struct buf_2d src = x->plane[0].src;
struct buf_2d pre = xd->plane[0].pre[0];
+ int i, j, k = 0;
best_ref_mv1_full.col = best_ref_mv1.col >> 3;
best_ref_mv1_full.row = best_ref_mv1.row >> 3;
@@ -260,19 +531,52 @@ static uint32_t temporal_filter_find_matching_mb_c(VP9_COMP *cpi,
vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
- vp9_full_pixel_search(cpi, x, BLOCK_16X16, &best_ref_mv1_full, step_param,
+ vp9_full_pixel_search(cpi, x, TF_BLOCK, &best_ref_mv1_full, step_param,
search_method, sadpb, cond_cost_list(cpi, cost_list),
&best_ref_mv1, ref_mv, 0, 0);
/* restore UMV window */
x->mv_limits = tmp_mv_limits;
- // Ignore mv costing by sending NULL pointer instead of cost array
+ // find_fractional_mv_step parameters: best_ref_mv1 is for mv rate cost
+ // calculation. The start full mv and the search result are stored in
+ // ref_mv.
bestsme = cpi->find_fractional_mv_step(
x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv,
- x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], 0,
- mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, NULL,
- &distortion, &sse, NULL, 0, 0);
+ x->errorperbit, &cpi->fn_ptr[TF_BLOCK], 0, mv_sf->subpel_search_level,
+ cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, BW,
+ BH, USE_8_TAPS_SHARP);
+
+ // DO motion search on 4 16x16 sub_blocks.
+ best_ref_mv1.row = ref_mv->row;
+ best_ref_mv1.col = ref_mv->col;
+ best_ref_mv1_full.col = best_ref_mv1.col >> 3;
+ best_ref_mv1_full.row = best_ref_mv1.row >> 3;
+
+ for (i = 0; i < BH; i += SUB_BH) {
+ for (j = 0; j < BW; j += SUB_BW) {
+ // Setup frame pointers
+ x->plane[0].src.buf = arf_frame_buf + i * stride + j;
+ x->plane[0].src.stride = stride;
+ xd->plane[0].pre[0].buf = frame_ptr_buf + i * stride + j;
+ xd->plane[0].pre[0].stride = stride;
+
+ vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1);
+ vp9_full_pixel_search(cpi, x, TF_SUB_BLOCK, &best_ref_mv1_full,
+ step_param, search_method, sadpb,
+ cond_cost_list(cpi, cost_list), &best_ref_mv1,
+ &blk_mvs[k], 0, 0);
+ /* restore UMV window */
+ x->mv_limits = tmp_mv_limits;
+
+ blk_bestsme[k] = cpi->find_fractional_mv_step(
+ x, &blk_mvs[k], &best_ref_mv1, cpi->common.allow_high_precision_mv,
+ x->errorperbit, &cpi->fn_ptr[TF_SUB_BLOCK], 0,
+ mv_sf->subpel_search_level, cond_cost_list(cpi, cost_list), NULL,
+ NULL, &distortion, &sse, NULL, SUB_BW, SUB_BH, USE_8_TAPS_SHARP);
+ k++;
+ }
+ }
// Restore input state
x->plane[0].src = src;
@@ -293,25 +597,24 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
int byte;
int frame;
int mb_col;
- unsigned int filter_weight;
- int mb_cols = (frames[alt_ref_index]->y_crop_width + 15) >> 4;
- int mb_rows = (frames[alt_ref_index]->y_crop_height + 15) >> 4;
- DECLARE_ALIGNED(16, uint32_t, accumulator[16 * 16 * 3]);
- DECLARE_ALIGNED(16, uint16_t, count[16 * 16 * 3]);
+ int mb_cols = (frames[alt_ref_index]->y_crop_width + BW - 1) >> BW_LOG2;
+ int mb_rows = (frames[alt_ref_index]->y_crop_height + BH - 1) >> BH_LOG2;
+ DECLARE_ALIGNED(16, uint32_t, accumulator[BLK_PELS * 3]);
+ DECLARE_ALIGNED(16, uint16_t, count[BLK_PELS * 3]);
MACROBLOCKD *mbd = &td->mb.e_mbd;
YV12_BUFFER_CONFIG *f = frames[alt_ref_index];
uint8_t *dst1, *dst2;
#if CONFIG_VP9_HIGHBITDEPTH
- DECLARE_ALIGNED(16, uint16_t, predictor16[16 * 16 * 3]);
- DECLARE_ALIGNED(16, uint8_t, predictor8[16 * 16 * 3]);
+ DECLARE_ALIGNED(16, uint16_t, predictor16[BLK_PELS * 3]);
+ DECLARE_ALIGNED(16, uint8_t, predictor8[BLK_PELS * 3]);
uint8_t *predictor;
#else
- DECLARE_ALIGNED(16, uint8_t, predictor[16 * 16 * 3]);
+ DECLARE_ALIGNED(16, uint8_t, predictor[BLK_PELS * 3]);
#endif
- const int mb_uv_height = 16 >> mbd->plane[1].subsampling_y;
- const int mb_uv_width = 16 >> mbd->plane[1].subsampling_x;
+ const int mb_uv_height = BH >> mbd->plane[1].subsampling_y;
+ const int mb_uv_width = BW >> mbd->plane[1].subsampling_x;
// Addition of the tile col level offsets
- int mb_y_offset = mb_row * 16 * (f->y_stride) + 16 * mb_col_start;
+ int mb_y_offset = mb_row * BH * (f->y_stride) + BW * mb_col_start;
int mb_uv_offset =
mb_row * mb_uv_height * f->uv_stride + mb_uv_width * mb_col_start;
@@ -334,21 +637,21 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
// 8 - VP9_INTERP_EXTEND.
// To keep the mv in play for both Y and UV planes the max that it
// can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1).
- td->mb.mv_limits.row_min = -((mb_row * 16) + (17 - 2 * VP9_INTERP_EXTEND));
+ td->mb.mv_limits.row_min = -((mb_row * BH) + (17 - 2 * VP9_INTERP_EXTEND));
td->mb.mv_limits.row_max =
- ((mb_rows - 1 - mb_row) * 16) + (17 - 2 * VP9_INTERP_EXTEND);
+ ((mb_rows - 1 - mb_row) * BH) + (17 - 2 * VP9_INTERP_EXTEND);
for (mb_col = mb_col_start; mb_col < mb_col_end; mb_col++) {
int i, j, k;
int stride;
MV ref_mv;
- vp9_zero_array(accumulator, 16 * 16 * 3);
- vp9_zero_array(count, 16 * 16 * 3);
+ vp9_zero_array(accumulator, BLK_PELS * 3);
+ vp9_zero_array(count, BLK_PELS * 3);
- td->mb.mv_limits.col_min = -((mb_col * 16) + (17 - 2 * VP9_INTERP_EXTEND));
+ td->mb.mv_limits.col_min = -((mb_col * BW) + (17 - 2 * VP9_INTERP_EXTEND));
td->mb.mv_limits.col_max =
- ((mb_cols - 1 - mb_col) * 16) + (17 - 2 * VP9_INTERP_EXTEND);
+ ((mb_cols - 1 - mb_col) * BW) + (17 - 2 * VP9_INTERP_EXTEND);
if (cpi->oxcf.content == VP9E_CONTENT_FILM) {
unsigned int src_variance;
@@ -360,92 +663,129 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
#if CONFIG_VP9_HIGHBITDEPTH
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
src_variance =
- vp9_high_get_sby_perpixel_variance(cpi, &src, BLOCK_16X16, mbd->bd);
+ vp9_high_get_sby_perpixel_variance(cpi, &src, TF_BLOCK, mbd->bd);
} else {
- src_variance = vp9_get_sby_perpixel_variance(cpi, &src, BLOCK_16X16);
+ src_variance = vp9_get_sby_perpixel_variance(cpi, &src, TF_BLOCK);
}
#else
- src_variance = vp9_get_sby_perpixel_variance(cpi, &src, BLOCK_16X16);
+ src_variance = vp9_get_sby_perpixel_variance(cpi, &src, TF_BLOCK);
#endif // CONFIG_VP9_HIGHBITDEPTH
if (src_variance <= 2) strength = VPXMAX(0, (int)strength - 2);
}
for (frame = 0; frame < frame_count; frame++) {
- const uint32_t thresh_low = 10000;
- const uint32_t thresh_high = 20000;
+ // MVs for 4 16x16 sub blocks.
+ MV blk_mvs[4];
+ // Filter weights for 4 16x16 sub blocks.
+ int blk_fw[4] = { 0, 0, 0, 0 };
+ int use_32x32 = 0;
if (frames[frame] == NULL) continue;
ref_mv.row = 0;
ref_mv.col = 0;
+ blk_mvs[0] = kZeroMv;
+ blk_mvs[1] = kZeroMv;
+ blk_mvs[2] = kZeroMv;
+ blk_mvs[3] = kZeroMv;
if (frame == alt_ref_index) {
- filter_weight = 2;
+ blk_fw[0] = blk_fw[1] = blk_fw[2] = blk_fw[3] = 2;
+ use_32x32 = 1;
} else {
+ const int thresh_low = 10000;
+ const int thresh_high = 20000;
+ int blk_bestsme[4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX };
+
// Find best match in this frame by MC
- uint32_t err = temporal_filter_find_matching_mb_c(
+ int err = temporal_filter_find_matching_mb_c(
cpi, td, frames[alt_ref_index]->y_buffer + mb_y_offset,
frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride,
- &ref_mv);
+ &ref_mv, blk_mvs, blk_bestsme);
+
+ int err16 =
+ blk_bestsme[0] + blk_bestsme[1] + blk_bestsme[2] + blk_bestsme[3];
+ int max_err = INT_MIN, min_err = INT_MAX;
+ for (k = 0; k < 4; k++) {
+ if (min_err > blk_bestsme[k]) min_err = blk_bestsme[k];
+ if (max_err < blk_bestsme[k]) max_err = blk_bestsme[k];
+ }
+
+ if (((err * 15 < (err16 << 4)) && max_err - min_err < 10000) ||
+ ((err * 14 < (err16 << 4)) && max_err - min_err < 5000)) {
+ use_32x32 = 1;
+ // Assign higher weight to matching MB if it's error
+ // score is lower. If not applying MC default behavior
+ // is to weight all MBs equal.
+ blk_fw[0] = err < (thresh_low << THR_SHIFT)
+ ? 2
+ : err < (thresh_high << THR_SHIFT) ? 1 : 0;
+ blk_fw[1] = blk_fw[2] = blk_fw[3] = blk_fw[0];
+ } else {
+ use_32x32 = 0;
+ for (k = 0; k < 4; k++)
+ blk_fw[k] = blk_bestsme[k] < thresh_low
+ ? 2
+ : blk_bestsme[k] < thresh_high ? 1 : 0;
+ }
- // Assign higher weight to matching MB if its error
- // score is lower. If not applying MC default behavior
- // is to weight all MBs equal.
- filter_weight = err < thresh_low ? 2 : err < thresh_high ? 1 : 0;
+ for (k = 0; k < 4; k++) {
+ switch (abs(frame - alt_ref_index)) {
+ case 1: blk_fw[k] = VPXMIN(blk_fw[k], 2); break;
+ case 2:
+ case 3: blk_fw[k] = VPXMIN(blk_fw[k], 1); break;
+ default: break;
+ }
+ }
}
- if (filter_weight != 0) {
+ if (blk_fw[0] || blk_fw[1] || blk_fw[2] || blk_fw[3]) {
// Construct the predictors
temporal_filter_predictors_mb_c(
mbd, frames[frame]->y_buffer + mb_y_offset,
frames[frame]->u_buffer + mb_uv_offset,
frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride,
mb_uv_width, mb_uv_height, ref_mv.row, ref_mv.col, predictor, scale,
- mb_col * 16, mb_row * 16);
+ mb_col * BW, mb_row * BH, blk_mvs, use_32x32);
#if CONFIG_VP9_HIGHBITDEPTH
if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
int adj_strength = strength + 2 * (mbd->bd - 8);
// Apply the filter (YUV)
vp9_highbd_temporal_filter_apply(
- f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, 16,
- adj_strength, filter_weight, accumulator, count);
+ f->y_buffer + mb_y_offset, f->y_stride, predictor, BW, BH,
+ adj_strength, blk_fw, use_32x32, accumulator, count);
vp9_highbd_temporal_filter_apply(
- f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256,
- mb_uv_width, mb_uv_height, adj_strength, filter_weight,
- accumulator + 256, count + 256);
+ f->u_buffer + mb_uv_offset, f->uv_stride, predictor + BLK_PELS,
+ mb_uv_width, mb_uv_height, adj_strength, blk_fw, use_32x32,
+ accumulator + BLK_PELS, count + BLK_PELS);
vp9_highbd_temporal_filter_apply(
- f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 512,
- mb_uv_width, mb_uv_height, adj_strength, filter_weight,
- accumulator + 512, count + 512);
+ f->v_buffer + mb_uv_offset, f->uv_stride,
+ predictor + (BLK_PELS << 1), mb_uv_width, mb_uv_height,
+ adj_strength, blk_fw, use_32x32, accumulator + (BLK_PELS << 1),
+ count + (BLK_PELS << 1));
} else {
// Apply the filter (YUV)
- vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
- predictor, 16, 16, strength, filter_weight,
- accumulator, count);
- vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
- predictor + 256, mb_uv_width, mb_uv_height,
- strength, filter_weight, accumulator + 256,
- count + 256);
- vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + 512, mb_uv_width, mb_uv_height,
- strength, filter_weight, accumulator + 512,
- count + 512);
+ apply_temporal_filter(
+ f->y_buffer + mb_y_offset, f->y_stride, predictor, BW,
+ f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset,
+ f->uv_stride, predictor + BLK_PELS, predictor + (BLK_PELS << 1),
+ mb_uv_width, BW, BH, mbd->plane[1].subsampling_x,
+ mbd->plane[1].subsampling_y, strength, blk_fw, use_32x32,
+ accumulator, count, accumulator + BLK_PELS, count + BLK_PELS,
+ accumulator + (BLK_PELS << 1), count + (BLK_PELS << 1));
}
#else
// Apply the filter (YUV)
- vp9_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride,
- predictor, 16, 16, strength, filter_weight,
- accumulator, count);
- vp9_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride,
- predictor + 256, mb_uv_width, mb_uv_height,
- strength, filter_weight, accumulator + 256,
- count + 256);
- vp9_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride,
- predictor + 512, mb_uv_width, mb_uv_height,
- strength, filter_weight, accumulator + 512,
- count + 512);
+ apply_temporal_filter(
+ f->y_buffer + mb_y_offset, f->y_stride, predictor, BW,
+ f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset,
+ f->uv_stride, predictor + BLK_PELS, predictor + (BLK_PELS << 1),
+ mb_uv_width, BW, BH, mbd->plane[1].subsampling_x,
+ mbd->plane[1].subsampling_y, strength, blk_fw, use_32x32,
+ accumulator, count, accumulator + BLK_PELS, count + BLK_PELS,
+ accumulator + (BLK_PELS << 1), count + (BLK_PELS << 1));
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
@@ -459,8 +799,8 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
dst1_16 = CONVERT_TO_SHORTPTR(dst1);
stride = cpi->alt_ref_buffer.y_stride;
byte = mb_y_offset;
- for (i = 0, k = 0; i < 16; i++) {
- for (j = 0; j < 16; j++, k++) {
+ for (i = 0, k = 0; i < BH; i++) {
+ for (j = 0; j < BW; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1);
pval *= fixed_divide[count[k]];
pval >>= 19;
@@ -471,7 +811,7 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
byte++;
}
- byte += stride - 16;
+ byte += stride - BW;
}
dst1 = cpi->alt_ref_buffer.u_buffer;
@@ -480,9 +820,9 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
dst2_16 = CONVERT_TO_SHORTPTR(dst2);
stride = cpi->alt_ref_buffer.uv_stride;
byte = mb_uv_offset;
- for (i = 0, k = 256; i < mb_uv_height; i++) {
+ for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) {
for (j = 0; j < mb_uv_width; j++, k++) {
- int m = k + 256;
+ int m = k + BLK_PELS;
// U
unsigned int pval = accumulator[k] + (count[k] >> 1);
@@ -507,8 +847,8 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
dst1 = cpi->alt_ref_buffer.y_buffer;
stride = cpi->alt_ref_buffer.y_stride;
byte = mb_y_offset;
- for (i = 0, k = 0; i < 16; i++) {
- for (j = 0; j < 16; j++, k++) {
+ for (i = 0, k = 0; i < BH; i++) {
+ for (j = 0; j < BW; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1);
pval *= fixed_divide[count[k]];
pval >>= 19;
@@ -518,16 +858,16 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
// move to next pixel
byte++;
}
- byte += stride - 16;
+ byte += stride - BW;
}
dst1 = cpi->alt_ref_buffer.u_buffer;
dst2 = cpi->alt_ref_buffer.v_buffer;
stride = cpi->alt_ref_buffer.uv_stride;
byte = mb_uv_offset;
- for (i = 0, k = 256; i < mb_uv_height; i++) {
+ for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) {
for (j = 0; j < mb_uv_width; j++, k++) {
- int m = k + 256;
+ int m = k + BLK_PELS;
// U
unsigned int pval = accumulator[k] + (count[k] >> 1);
@@ -552,8 +892,8 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
dst1 = cpi->alt_ref_buffer.y_buffer;
stride = cpi->alt_ref_buffer.y_stride;
byte = mb_y_offset;
- for (i = 0, k = 0; i < 16; i++) {
- for (j = 0; j < 16; j++, k++) {
+ for (i = 0, k = 0; i < BH; i++) {
+ for (j = 0; j < BW; j++, k++) {
unsigned int pval = accumulator[k] + (count[k] >> 1);
pval *= fixed_divide[count[k]];
pval >>= 19;
@@ -563,16 +903,16 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
// move to next pixel
byte++;
}
- byte += stride - 16;
+ byte += stride - BW;
}
dst1 = cpi->alt_ref_buffer.u_buffer;
dst2 = cpi->alt_ref_buffer.v_buffer;
stride = cpi->alt_ref_buffer.uv_stride;
byte = mb_uv_offset;
- for (i = 0, k = 256; i < mb_uv_height; i++) {
+ for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) {
for (j = 0; j < mb_uv_width; j++, k++) {
- int m = k + 256;
+ int m = k + BLK_PELS;
// U
unsigned int pval = accumulator[k] + (count[k] >> 1);
@@ -592,7 +932,7 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
byte += stride - mb_uv_width;
}
#endif // CONFIG_VP9_HIGHBITDEPTH
- mb_y_offset += 16;
+ mb_y_offset += BW;
mb_uv_offset += mb_uv_width;
}
}
@@ -603,10 +943,10 @@ static void temporal_filter_iterate_tile_c(VP9_COMP *cpi, int tile_row,
const int tile_cols = 1 << cm->log2_tile_cols;
TileInfo *tile_info =
&cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
- const int mb_row_start = (tile_info->mi_row_start) >> 1;
- const int mb_row_end = (tile_info->mi_row_end + 1) >> 1;
- const int mb_col_start = (tile_info->mi_col_start) >> 1;
- const int mb_col_end = (tile_info->mi_col_end + 1) >> 1;
+ const int mb_row_start = (tile_info->mi_row_start) >> TF_SHIFT;
+ const int mb_row_end = (tile_info->mi_row_end + TF_ROUND) >> TF_SHIFT;
+ const int mb_col_start = (tile_info->mi_col_start) >> TF_SHIFT;
+ const int mb_col_end = (tile_info->mi_col_end + TF_ROUND) >> TF_SHIFT;
int mb_row;
for (mb_row = mb_row_start; mb_row < mb_row_end; mb_row++) {
@@ -620,13 +960,6 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi) {
const int tile_cols = 1 << cm->log2_tile_cols;
const int tile_rows = 1 << cm->log2_tile_rows;
int tile_row, tile_col;
- MACROBLOCKD *mbd = &cpi->td.mb.e_mbd;
- // Save input state
- uint8_t *input_buffer[MAX_MB_PLANE];
- int i;
-
- for (i = 0; i < MAX_MB_PLANE; i++) input_buffer[i] = mbd->plane[i].pre[0].buf;
-
vp9_init_tile_data(cpi);
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
@@ -634,15 +967,13 @@ static void temporal_filter_iterate_c(VP9_COMP *cpi) {
temporal_filter_iterate_tile_c(cpi, tile_row, tile_col);
}
}
-
- // Restore input state
- for (i = 0; i < MAX_MB_PLANE; i++) mbd->plane[i].pre[0].buf = input_buffer[i];
}
// Apply buffer limits and context specific adjustments to arnr filter.
static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost,
int *arnr_frames, int *arnr_strength) {
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
+ const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
const int frames_after_arf =
vp9_lookahead_depth(cpi->lookahead) - distance - 1;
int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1;
@@ -696,12 +1027,17 @@ static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost,
}
// Adjustments for second level arf in multi arf case.
- if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) {
- const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
- if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) {
- strength >>= 1;
- }
- }
+ // Leave commented out place holder for possible filtering adjustment with
+ // new multi-layer arf code.
+ // if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed)
+ // if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) strength >>= 1;
+
+ // TODO(jingning): Skip temporal filtering for intermediate frames that will
+ // be used as show_existing_frame. Need to further explore the possibility to
+ // apply certain filter.
+ if (gf_group->arf_src_offset[gf_group->index] <
+ cpi->rc.baseline_gf_interval - 1)
+ frames = 1;
*arnr_frames = frames;
*arnr_strength = strength;
@@ -800,8 +1136,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) {
}
// Initialize errorperbit and sabperbit.
- rdmult = (int)vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX);
- if (rdmult < 1) rdmult = 1;
+ rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX);
set_error_per_bit(&cpi->td.mb, rdmult);
vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX);
diff --git a/libvpx/vp9/encoder/vp9_temporal_filter.h b/libvpx/vp9/encoder/vp9_temporal_filter.h
index 775e49cc5..f5fa194d1 100644
--- a/libvpx/vp9/encoder/vp9_temporal_filter.h
+++ b/libvpx/vp9/encoder/vp9_temporal_filter.h
@@ -8,14 +8,29 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
-#define VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
+#ifndef VPX_VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
+#define VPX_VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
#ifdef __cplusplus
extern "C" {
#endif
#define ARNR_FILT_QINDEX 128
+static const MV kZeroMv = { 0, 0 };
+
+// Block size used in temporal filtering
+#define TF_BLOCK BLOCK_32X32
+#define BH 32
+#define BH_LOG2 5
+#define BW 32
+#define BW_LOG2 5
+#define BLK_PELS 1024 // Pixels in the block
+#define TF_SHIFT 2
+#define TF_ROUND 3
+#define THR_SHIFT 2
+#define TF_SUB_BLOCK BLOCK_16X16
+#define SUB_BH 16
+#define SUB_BW 16
void vp9_temporal_filter_init(void);
void vp9_temporal_filter(VP9_COMP *cpi, int distance);
@@ -28,4 +43,4 @@ void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
+#endif // VPX_VP9_ENCODER_VP9_TEMPORAL_FILTER_H_
diff --git a/libvpx/vp9/encoder/vp9_tokenize.h b/libvpx/vp9/encoder/vp9_tokenize.h
index b2f63ffef..6407ff923 100644
--- a/libvpx/vp9/encoder/vp9_tokenize.h
+++ b/libvpx/vp9/encoder/vp9_tokenize.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_TOKENIZE_H_
-#define VP9_ENCODER_VP9_TOKENIZE_H_
+#ifndef VPX_VP9_ENCODER_VP9_TOKENIZE_H_
+#define VPX_VP9_ENCODER_VP9_TOKENIZE_H_
#include "vp9/common/vp9_entropy.h"
@@ -127,4 +127,4 @@ static INLINE int vp9_get_token_cost(int v, int16_t *token,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_TOKENIZE_H_
+#endif // VPX_VP9_ENCODER_VP9_TOKENIZE_H_
diff --git a/libvpx/vp9/encoder/vp9_treewriter.h b/libvpx/vp9/encoder/vp9_treewriter.h
index a8b9c2cd3..86c5fa224 100644
--- a/libvpx/vp9/encoder/vp9_treewriter.h
+++ b/libvpx/vp9/encoder/vp9_treewriter.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_ENCODER_VP9_TREEWRITER_H_
-#define VP9_ENCODER_VP9_TREEWRITER_H_
+#ifndef VPX_VP9_ENCODER_VP9_TREEWRITER_H_
+#define VPX_VP9_ENCODER_VP9_TREEWRITER_H_
#include "vpx_dsp/bitwriter.h"
@@ -48,4 +48,4 @@ static INLINE void vp9_write_token(vpx_writer *w, const vpx_tree_index *tree,
} // extern "C"
#endif
-#endif // VP9_ENCODER_VP9_TREEWRITER_H_
+#endif // VPX_VP9_ENCODER_VP9_TREEWRITER_H_
diff --git a/libvpx/vp9/encoder/x86/temporal_filter_sse4.c b/libvpx/vp9/encoder/x86/temporal_filter_sse4.c
index 460dab659..e5860d39c 100644
--- a/libvpx/vp9/encoder/x86/temporal_filter_sse4.c
+++ b/libvpx/vp9/encoder/x86/temporal_filter_sse4.c
@@ -241,7 +241,7 @@ void vp9_temporal_filter_apply_sse4_1(const uint8_t *a, unsigned int stride,
int weight, uint32_t *accumulator,
uint16_t *count) {
unsigned int h;
- const int rounding = strength > 0 ? 1 << (strength - 1) : 0;
+ const int rounding = (1 << strength) >> 1;
assert(strength >= 0);
assert(strength <= 6);
diff --git a/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c b/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
index dbd243ac1..8426b9475 100644
--- a/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
+++ b/libvpx/vp9/encoder/x86/vp9_dct_intrin_sse2.c
@@ -14,6 +14,7 @@
#include "./vp9_rtcd.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx_dsp/txfm_common.h"
+#include "vpx_dsp/x86/bitdepth_conversion_sse2.h"
#include "vpx_dsp/x86/fwd_txfm_sse2.h"
#include "vpx_dsp/x86/transpose_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
@@ -170,23 +171,23 @@ void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
- case ADST_ADST:
+ default:
+ assert(tx_type == ADST_ADST);
load_buffer_4x4(input, in, stride);
fadst4_sse2(in);
fadst4_sse2(in);
write_buffer_4x4(output, in);
break;
- default: assert(0); break;
}
}
void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
- int16_t *coeff_ptr, intptr_t n_coeffs,
+ tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
- const int16_t *quant_ptr, int16_t *qcoeff_ptr,
- int16_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
__m128i zero;
int pass;
@@ -215,7 +216,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
__m128i *in[8];
int index = 0;
- (void)scan_ptr;
+ (void)scan;
(void)coeff_ptr;
// Pre-condition input (shift by two)
@@ -449,7 +450,7 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
in7 = _mm_srai_epi16(in7, 1);
}
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -497,15 +498,15 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+ store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs);
+ store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
dequant = _mm_unpackhi_epi64(dequant, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ store_tran_low(coeff0, dqcoeff_ptr + n_coeffs);
+ store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8);
}
{
@@ -518,8 +519,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -562,14 +563,14 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign);
qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), qcoeff0);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, qcoeff1);
+ store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs);
+ store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8);
coeff0 = _mm_mullo_epi16(qcoeff0, dequant);
coeff1 = _mm_mullo_epi16(qcoeff1, dequant);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), coeff0);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, coeff1);
+ store_tran_low(coeff0, dqcoeff_ptr + n_coeffs);
+ store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8);
}
{
@@ -582,8 +583,8 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -609,10 +610,10 @@ void vp9_fdct8x8_quant_sse2(const int16_t *input, int stride,
}
} else {
do {
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i *)(dqcoeff_ptr + n_coeffs) + 1, zero);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs), zero);
- _mm_store_si128((__m128i *)(qcoeff_ptr + n_coeffs) + 1, zero);
+ store_tran_low(zero, qcoeff_ptr + n_coeffs);
+ store_tran_low(zero, qcoeff_ptr + n_coeffs + 8);
+ store_tran_low(zero, dqcoeff_ptr + n_coeffs);
+ store_tran_low(zero, dqcoeff_ptr + n_coeffs + 8);
n_coeffs += 8 * 2;
} while (n_coeffs < 0);
*eob_ptr = 0;
@@ -1097,14 +1098,14 @@ void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride,
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
- case ADST_ADST:
+ default:
+ assert(tx_type == ADST_ADST);
load_buffer_8x8(input, in, stride);
fadst8_sse2(in);
fadst8_sse2(in);
right_shift_8x8(in, 1);
write_buffer_8x8(output, in, 8);
break;
- default: assert(0); break;
}
}
@@ -1963,13 +1964,13 @@ void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride,
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
- case ADST_ADST:
+ default:
+ assert(tx_type == ADST_ADST);
load_buffer_16x16(input, in0, in1, stride);
fadst16_sse2(in0, in1);
right_shift_16x16(in0, in1);
fadst16_sse2(in0, in1);
write_buffer_16x16(output, in0, in1, 16);
break;
- default: assert(0); break;
}
}
diff --git a/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c b/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
index bf874a09e..99c193894 100644
--- a/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
+++ b/libvpx/vp9/encoder/x86/vp9_dct_ssse3.c
@@ -18,11 +18,13 @@
#include "vpx_dsp/x86/inv_txfm_sse2.h"
#include "vpx_dsp/x86/txfm_common_sse2.h"
-void vp9_fdct8x8_quant_ssse3(
- const int16_t *input, int stride, tran_low_t *coeff_ptr, intptr_t n_coeffs,
- int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr,
- tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) {
+void vp9_fdct8x8_quant_ssse3(const int16_t *input, int stride,
+ tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr,
+ const int16_t *dequant_ptr, uint16_t *eob_ptr,
+ const int16_t *scan, const int16_t *iscan) {
__m128i zero;
int pass;
@@ -52,7 +54,7 @@ void vp9_fdct8x8_quant_ssse3(
__m128i *in[8];
int index = 0;
- (void)scan_ptr;
+ (void)scan;
(void)coeff_ptr;
// Pre-condition input (shift by two)
@@ -280,7 +282,7 @@ void vp9_fdct8x8_quant_ssse3(
in7 = _mm_srai_epi16(in7, 1);
}
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -350,8 +352,8 @@ void vp9_fdct8x8_quant_ssse3(
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -427,8 +429,8 @@ void vp9_fdct8x8_quant_ssse3(
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
diff --git a/libvpx/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c b/libvpx/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c
index 91f627c34..af04b6c52 100644
--- a/libvpx/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c
+++ b/libvpx/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c
@@ -11,24 +11,25 @@
#include <emmintrin.h>
#include <stdio.h>
+#include "./vp9_rtcd.h"
#include "vp9/common/vp9_common.h"
-int64_t vp9_highbd_block_error_sse2(tran_low_t *coeff, tran_low_t *dqcoeff,
- intptr_t block_size, int64_t *ssz,
- int bps) {
+int64_t vp9_highbd_block_error_sse2(const tran_low_t *coeff,
+ const tran_low_t *dqcoeff,
+ intptr_t block_size, int64_t *ssz, int bd) {
int i, j, test;
uint32_t temp[4];
__m128i max, min, cmp0, cmp1, cmp2, cmp3;
int64_t error = 0, sqcoeff = 0;
- const int shift = 2 * (bps - 8);
+ const int shift = 2 * (bd - 8);
const int rounding = shift > 0 ? 1 << (shift - 1) : 0;
for (i = 0; i < block_size; i += 8) {
// Load the data into xmm registers
- __m128i mm_coeff = _mm_load_si128((__m128i *)(coeff + i));
- __m128i mm_coeff2 = _mm_load_si128((__m128i *)(coeff + i + 4));
- __m128i mm_dqcoeff = _mm_load_si128((__m128i *)(dqcoeff + i));
- __m128i mm_dqcoeff2 = _mm_load_si128((__m128i *)(dqcoeff + i + 4));
+ __m128i mm_coeff = _mm_load_si128((const __m128i *)(coeff + i));
+ __m128i mm_coeff2 = _mm_load_si128((const __m128i *)(coeff + i + 4));
+ __m128i mm_dqcoeff = _mm_load_si128((const __m128i *)(dqcoeff + i));
+ __m128i mm_dqcoeff2 = _mm_load_si128((const __m128i *)(dqcoeff + i + 4));
// Check if any values require more than 15 bit
max = _mm_set1_epi32(0x3fff);
min = _mm_set1_epi32(0xffffc000);
diff --git a/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c b/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c
new file mode 100644
index 000000000..8dfdbd50f
--- /dev/null
+++ b/libvpx/vp9/encoder/x86/vp9_quantize_avx2.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2017 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <assert.h>
+#include <immintrin.h> // AVX2
+
+#include "./vp9_rtcd.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/x86/bitdepth_conversion_avx2.h"
+#include "vpx_dsp/x86/quantize_sse2.h"
+
+// Zero fill 8 positions in the output buffer.
+static INLINE void store_zero_tran_low(tran_low_t *a) {
+ const __m256i zero = _mm256_setzero_si256();
+#if CONFIG_VP9_HIGHBITDEPTH
+ _mm256_storeu_si256((__m256i *)(a), zero);
+ _mm256_storeu_si256((__m256i *)(a + 8), zero);
+#else
+ _mm256_storeu_si256((__m256i *)(a), zero);
+#endif
+}
+
+static INLINE __m256i scan_eob_256(const __m256i *iscan_ptr,
+ __m256i *coeff256) {
+ const __m256i iscan = _mm256_loadu_si256(iscan_ptr);
+ const __m256i zero256 = _mm256_setzero_si256();
+#if CONFIG_VP9_HIGHBITDEPTH
+ // The _mm256_packs_epi32() in load_tran_low() packs the 64 bit coeff as
+ // B1 A1 B0 A0. Shuffle to B1 B0 A1 A0 in order to scan eob correctly.
+ const __m256i _coeff256 = _mm256_permute4x64_epi64(*coeff256, 0xd8);
+ const __m256i zero_coeff0 = _mm256_cmpeq_epi16(_coeff256, zero256);
+#else
+ const __m256i zero_coeff0 = _mm256_cmpeq_epi16(*coeff256, zero256);
+#endif
+ const __m256i nzero_coeff0 = _mm256_cmpeq_epi16(zero_coeff0, zero256);
+ // Add one to convert from indices to counts
+ const __m256i iscan_plus_one = _mm256_sub_epi16(iscan, nzero_coeff0);
+ return _mm256_and_si256(iscan_plus_one, nzero_coeff0);
+}
+
+void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
+ int skip_block, const int16_t *round_ptr,
+ const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
+ __m128i eob;
+ __m256i round256, quant256, dequant256;
+ __m256i eob256, thr256;
+
+ (void)scan;
+ (void)skip_block;
+ assert(!skip_block);
+
+ coeff_ptr += n_coeffs;
+ iscan += n_coeffs;
+ qcoeff_ptr += n_coeffs;
+ dqcoeff_ptr += n_coeffs;
+ n_coeffs = -n_coeffs;
+
+ {
+ __m256i coeff256;
+
+ // Setup global values
+ {
+ const __m128i round = _mm_load_si128((const __m128i *)round_ptr);
+ const __m128i quant = _mm_load_si128((const __m128i *)quant_ptr);
+ const __m128i dequant = _mm_load_si128((const __m128i *)dequant_ptr);
+ round256 = _mm256_castsi128_si256(round);
+ round256 = _mm256_permute4x64_epi64(round256, 0x54);
+
+ quant256 = _mm256_castsi128_si256(quant);
+ quant256 = _mm256_permute4x64_epi64(quant256, 0x54);
+
+ dequant256 = _mm256_castsi128_si256(dequant);
+ dequant256 = _mm256_permute4x64_epi64(dequant256, 0x54);
+ }
+
+ {
+ __m256i qcoeff256;
+ __m256i qtmp256;
+ coeff256 = load_tran_low(coeff_ptr + n_coeffs);
+ qcoeff256 = _mm256_abs_epi16(coeff256);
+ qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
+ qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
+ qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
+ store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs);
+ coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
+ store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
+ }
+
+ eob256 = scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256);
+ n_coeffs += 8 * 2;
+ }
+
+ // remove dc constants
+ dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31);
+ quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31);
+ round256 = _mm256_permute2x128_si256(round256, round256, 0x31);
+
+ thr256 = _mm256_srai_epi16(dequant256, 1);
+
+ // AC only loop
+ while (n_coeffs < 0) {
+ __m256i coeff256 = load_tran_low(coeff_ptr + n_coeffs);
+ __m256i qcoeff256 = _mm256_abs_epi16(coeff256);
+ int32_t nzflag =
+ _mm256_movemask_epi8(_mm256_cmpgt_epi16(qcoeff256, thr256));
+
+ if (nzflag) {
+ __m256i qtmp256;
+ qcoeff256 = _mm256_adds_epi16(qcoeff256, round256);
+ qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256);
+ qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256);
+ store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs);
+ coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256);
+ store_tran_low(coeff256, dqcoeff_ptr + n_coeffs);
+ eob256 = _mm256_max_epi16(
+ eob256, scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256));
+ } else {
+ store_zero_tran_low(qcoeff_ptr + n_coeffs);
+ store_zero_tran_low(dqcoeff_ptr + n_coeffs);
+ }
+ n_coeffs += 8 * 2;
+ }
+
+ eob = _mm_max_epi16(_mm256_castsi256_si128(eob256),
+ _mm256_extracti128_si256(eob256, 1));
+
+ *eob_ptr = accumulate_eob(eob);
+}
diff --git a/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c b/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
index ca0ad4407..885220a71 100644
--- a/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
+++ b/libvpx/vp9/encoder/x86/vp9_quantize_sse2.c
@@ -21,20 +21,20 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
int skip_block, const int16_t *round_ptr,
const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
- uint16_t *eob_ptr, const int16_t *scan_ptr,
- const int16_t *iscan_ptr) {
+ uint16_t *eob_ptr, const int16_t *scan,
+ const int16_t *iscan) {
__m128i zero;
__m128i thr;
int16_t nzflag;
__m128i eob;
__m128i round, quant, dequant;
- (void)scan_ptr;
+ (void)scan;
(void)skip_block;
assert(!skip_block);
coeff_ptr += n_coeffs;
- iscan_ptr += n_coeffs;
+ iscan += n_coeffs;
qcoeff_ptr += n_coeffs;
dqcoeff_ptr += n_coeffs;
n_coeffs = -n_coeffs;
@@ -100,8 +100,8 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
@@ -175,8 +175,8 @@ void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero);
nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero);
nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero);
- iscan0 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs));
- iscan1 = _mm_load_si128((const __m128i *)(iscan_ptr + n_coeffs) + 1);
+ iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs));
+ iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1);
// Add one to convert from indices to counts
iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0);
iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1);
diff --git a/libvpx/vp9/vp9_common.mk b/libvpx/vp9/vp9_common.mk
index 5bfc0d359..7ca4004b0 100644
--- a/libvpx/vp9/vp9_common.mk
+++ b/libvpx/vp9/vp9_common.mk
@@ -63,30 +63,33 @@ VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h
VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c
-ifeq ($(CONFIG_VP9_POSTPROC),yes)
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_mfqe_sse2.asm
-endif
-
-ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans8_dspr2.c
-VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c
-endif
-# common (msa)
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
+VP9_COMMON_SRCS-$(HAVE_VSX) += common/ppc/vp9_idct_vsx.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht16x16_add_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht_neon.h
ifeq ($(CONFIG_VP9_POSTPROC),yes)
-VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
+VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c
+VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_mfqe_sse2.asm
endif
-VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c
-
ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c
-VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c
+VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c
+VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans8_dspr2.c
+VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c
+else
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht4x4_add_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht8x8_add_neon.c
+VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht16x16_add_neon.c
+VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht4x4_add_sse4.c
+VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht8x8_add_sse4.c
+VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht16x16_add_sse4.c
endif
$(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl))
diff --git a/libvpx/vp9/vp9_cx_iface.c b/libvpx/vp9/vp9_cx_iface.c
index 881caae78..85f83a662 100644
--- a/libvpx/vp9/vp9_cx_iface.c
+++ b/libvpx/vp9/vp9_cx_iface.c
@@ -30,6 +30,7 @@ struct vp9_extracfg {
unsigned int static_thresh;
unsigned int tile_columns;
unsigned int tile_rows;
+ unsigned int enable_tpl_model;
unsigned int arnr_max_frames;
unsigned int arnr_strength;
unsigned int min_gf_interval;
@@ -63,6 +64,7 @@ static struct vp9_extracfg default_extra_cfg = {
0, // static_thresh
6, // tile_columns
0, // tile_rows
+ 1, // enable_tpl_model
7, // arnr_max_frames
5, // arnr_strength
0, // min_gf_interval; 0 -> default decision
@@ -237,22 +239,6 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
ERROR("ts_rate_decimator factors are not powers of 2");
}
-#if CONFIG_SPATIAL_SVC
-
- if ((cfg->ss_number_layers > 1 || cfg->ts_number_layers > 1) &&
- cfg->g_pass == VPX_RC_LAST_PASS) {
- unsigned int i, alt_ref_sum = 0;
- for (i = 0; i < cfg->ss_number_layers; ++i) {
- if (cfg->ss_enable_auto_alt_ref[i]) ++alt_ref_sum;
- }
- if (alt_ref_sum > REF_FRAMES - cfg->ss_number_layers)
- ERROR("Not enough ref buffers for svc alt ref frames");
- if (cfg->ss_number_layers * cfg->ts_number_layers > 3 &&
- cfg->g_error_resilient == 0)
- ERROR("Multiple frame context are not supported for more than 3 layers");
- }
-#endif
-
// VP9 does not support a lower bound on the keyframe interval in
// automatic keyframe placement mode.
if (cfg->kf_mode != VPX_KF_DISABLED && cfg->kf_min_dist != cfg->kf_max_dist &&
@@ -263,8 +249,8 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx,
RANGE_CHECK(extra_cfg, row_mt, 0, 1);
RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2);
- RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, 2);
- RANGE_CHECK(extra_cfg, cpu_used, -8, 8);
+ RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, MAX_ARF_LAYERS);
+ RANGE_CHECK(extra_cfg, cpu_used, -9, 9);
RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6);
RANGE_CHECK(extra_cfg, tile_columns, 0, 6);
RANGE_CHECK(extra_cfg, tile_rows, 0, 2);
@@ -560,6 +546,8 @@ static vpx_codec_err_t set_encoder_config(
oxcf->tile_columns = extra_cfg->tile_columns;
+ oxcf->enable_tpl_model = extra_cfg->enable_tpl_model;
+
// TODO(yunqing): The dependencies between row tiles cause error in multi-
// threaded encoding. For now, tile_rows is forced to be 0 in this case.
// The further fix can be done by adding synchronizations after a tile row
@@ -589,9 +577,6 @@ static vpx_codec_err_t set_encoder_config(
oxcf->motion_vector_unit_test = extra_cfg->motion_vector_unit_test;
for (sl = 0; sl < oxcf->ss_number_layers; ++sl) {
-#if CONFIG_SPATIAL_SVC
- oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl];
-#endif
for (tl = 0; tl < oxcf->ts_number_layers; ++tl) {
oxcf->layer_target_bitrate[sl * oxcf->ts_number_layers + tl] =
1000 * cfg->layer_target_bitrate[sl * oxcf->ts_number_layers + tl];
@@ -599,9 +584,6 @@ static vpx_codec_err_t set_encoder_config(
}
if (oxcf->ss_number_layers == 1 && oxcf->pass != 0) {
oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth;
-#if CONFIG_SPATIAL_SVC
- oxcf->ss_enable_auto_arf[0] = extra_cfg->enable_auto_alt_ref;
-#endif
}
if (oxcf->ts_number_layers > 1) {
for (tl = 0; tl < VPX_TS_MAX_LAYERS; ++tl) {
@@ -762,6 +744,13 @@ static vpx_codec_err_t ctrl_set_tile_rows(vpx_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_tpl_model(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct vp9_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.enable_tpl_model = CAST(VP9E_SET_TPL, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
static vpx_codec_err_t ctrl_set_arnr_max_frames(vpx_codec_alg_priv_t *ctx,
va_list args) {
struct vp9_extracfg extra_cfg = ctx->extra_cfg;
@@ -1067,12 +1056,11 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi,
vpx_codec_frame_flags_t flags = lib_flags << 16;
if (lib_flags & FRAMEFLAGS_KEY ||
- (cpi->use_svc &&
- cpi->svc
- .layer_context[cpi->svc.spatial_layer_id *
- cpi->svc.number_temporal_layers +
- cpi->svc.temporal_layer_id]
- .is_key_frame))
+ (cpi->use_svc && cpi->svc
+ .layer_context[cpi->svc.spatial_layer_id *
+ cpi->svc.number_temporal_layers +
+ cpi->svc.temporal_layer_id]
+ .is_key_frame))
flags |= VPX_FRAME_IS_KEY;
if (cpi->droppable) flags |= VPX_FRAME_IS_DROPPABLE;
@@ -1097,23 +1085,11 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
if (cpi->oxcf.pass == 2 && cpi->level_constraint.level_index >= 0 &&
!cpi->level_constraint.rc_config_updated) {
- SVC *const svc = &cpi->svc;
- const int is_two_pass_svc =
- (svc->number_spatial_layers > 1) || (svc->number_temporal_layers > 1);
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
TWO_PASS *const twopass = &cpi->twopass;
FIRSTPASS_STATS *stats = &twopass->total_stats;
- if (is_two_pass_svc) {
- const double frame_rate = 10000000.0 * stats->count / stats->duration;
- vp9_update_spatial_layer_framerate(cpi, frame_rate);
- twopass->bits_left =
- (int64_t)(stats->duration *
- svc->layer_context[svc->spatial_layer_id].target_bandwidth /
- 10000000.0);
- } else {
- twopass->bits_left =
- (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
- }
+ twopass->bits_left =
+ (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0);
cpi->level_constraint.rc_config_updated = 1;
}
@@ -1123,7 +1099,7 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
// There's no codec control for multiple alt-refs so check the encoder
// instance for its status to determine the compressed data size.
data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 *
- (cpi->multi_arf_allowed ? 8 : 2);
+ (cpi->multi_layer_arf ? 8 : 2);
if (data_sz < kMinCompressedSize) data_sz = kMinCompressedSize;
if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) {
ctx->cx_data_sz = data_sz;
@@ -1174,6 +1150,9 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
size_t size, cx_data_sz;
unsigned char *cx_data;
+ cpi->svc.timebase_fac = timebase_units_to_ticks(timebase, 1);
+ cpi->svc.time_stamp_superframe = dst_time_stamp;
+
// Set up internal flags
if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) cpi->b_calculate_psnr = 1;
@@ -1213,27 +1192,23 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
-1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data,
&dst_time_stamp, &dst_end_time_stamp,
!img)) {
- if (size) {
+ if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) {
vpx_codec_cx_pkt_t pkt;
-#if CONFIG_SPATIAL_SVC
- if (cpi->use_svc)
- cpi->svc
- .layer_context[cpi->svc.spatial_layer_id *
- cpi->svc.number_temporal_layers]
- .layer_size += size;
-#endif
-
// Pack invisible frames with the next visible frame
if (!cpi->common.show_frame ||
(cpi->use_svc &&
cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)) {
if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data;
ctx->pending_cx_data_sz += size;
- ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+ if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
ctx->pending_frame_magnitude |= size;
cx_data += size;
cx_data_sz -= size;
+ pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
+ pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
+ pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
+ 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
if (ctx->output_cx_pkt_cb.output_cx_pkt) {
pkt.kind = VPX_CODEC_CX_FRAME_PKT;
@@ -1260,9 +1235,13 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units(
timebase, dst_end_time_stamp - dst_time_stamp);
pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags);
+ pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width;
+ pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height;
+ pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] =
+ 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id];
if (ctx->pending_cx_data) {
- ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
+ if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size;
ctx->pending_frame_magnitude |= size;
ctx->pending_cx_data_sz += size;
// write the superframe only for the case when
@@ -1288,27 +1267,6 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx,
cx_data += size;
cx_data_sz -= size;
-#if CONFIG_SPATIAL_SVC && defined(VPX_TEST_SPATIAL_SVC)
- if (cpi->use_svc && !ctx->output_cx_pkt_cb.output_cx_pkt) {
- vpx_codec_cx_pkt_t pkt_sizes, pkt_psnr;
- int sl;
- vp9_zero(pkt_sizes);
- vp9_zero(pkt_psnr);
- pkt_sizes.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES;
- pkt_psnr.kind = VPX_CODEC_SPATIAL_SVC_LAYER_PSNR;
- for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
- LAYER_CONTEXT *lc =
- &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers];
- pkt_sizes.data.layer_sizes[sl] = lc->layer_size;
- pkt_psnr.data.layer_psnr[sl] = lc->psnr_pkt;
- lc->layer_size = 0;
- }
-
- vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_sizes);
-
- vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_psnr);
- }
-#endif
if (is_one_pass_cbr_svc(cpi) &&
(cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
// Encoded all spatial layers; exit loop.
@@ -1338,9 +1296,8 @@ static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx,
vp9_set_reference_enc(ctx->cpi, ref_frame_to_vp9_reframe(frame->frame_type),
&sd);
return VPX_CODEC_OK;
- } else {
- return VPX_CODEC_INVALID_PARAM;
}
+ return VPX_CODEC_INVALID_PARAM;
}
static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
@@ -1354,9 +1311,8 @@ static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx,
vp9_copy_reference_enc(ctx->cpi,
ref_frame_to_vp9_reframe(frame->frame_type), &sd);
return VPX_CODEC_OK;
- } else {
- return VPX_CODEC_INVALID_PARAM;
}
+ return VPX_CODEC_INVALID_PARAM;
}
static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
@@ -1364,14 +1320,13 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
vp9_ref_frame_t *const frame = va_arg(args, vp9_ref_frame_t *);
if (frame != NULL) {
- YV12_BUFFER_CONFIG *fb = get_ref_frame(&ctx->cpi->common, frame->idx);
+ const int fb_idx = ctx->cpi->common.cur_show_frame_fb_idx;
+ YV12_BUFFER_CONFIG *fb = get_buf_frame(&ctx->cpi->common, fb_idx);
if (fb == NULL) return VPX_CODEC_ERROR;
-
yuvconfig2image(&frame->img, fb, NULL);
return VPX_CODEC_OK;
- } else {
- return VPX_CODEC_INVALID_PARAM;
}
+ return VPX_CODEC_INVALID_PARAM;
}
static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx,
@@ -1381,9 +1336,8 @@ static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx,
if (config != NULL) {
ctx->preview_ppcfg = *config;
return VPX_CODEC_OK;
- } else {
- return VPX_CODEC_INVALID_PARAM;
}
+ return VPX_CODEC_INVALID_PARAM;
#else
(void)ctx;
(void)args;
@@ -1405,17 +1359,24 @@ static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) {
if (vp9_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) {
yuvconfig2image(&ctx->preview_img, &sd, NULL);
return &ctx->preview_img;
- } else {
- return NULL;
}
+ return NULL;
}
static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx,
va_list args) {
- (void)ctx;
- (void)args;
+ vpx_roi_map_t *data = va_arg(args, vpx_roi_map_t *);
+
+ if (data) {
+ vpx_roi_map_t *roi = (vpx_roi_map_t *)data;
- // TODO(yaowu): Need to re-implement and test for VP9.
+ if (!vp9_set_roi_map(ctx->cpi, roi->roi_map, roi->rows, roi->cols,
+ roi->delta_q, roi->delta_lf, roi->skip,
+ roi->ref_frame)) {
+ return VPX_CODEC_OK;
+ }
+ return VPX_CODEC_INVALID_PARAM;
+ }
return VPX_CODEC_INVALID_PARAM;
}
@@ -1427,11 +1388,10 @@ static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx,
if (!vp9_set_active_map(ctx->cpi, map->active_map, (int)map->rows,
(int)map->cols))
return VPX_CODEC_OK;
- else
- return VPX_CODEC_INVALID_PARAM;
- } else {
+
return VPX_CODEC_INVALID_PARAM;
}
+ return VPX_CODEC_INVALID_PARAM;
}
static vpx_codec_err_t ctrl_get_active_map(vpx_codec_alg_priv_t *ctx,
@@ -1442,11 +1402,10 @@ static vpx_codec_err_t ctrl_get_active_map(vpx_codec_alg_priv_t *ctx,
if (!vp9_get_active_map(ctx->cpi, map->active_map, (int)map->rows,
(int)map->cols))
return VPX_CODEC_OK;
- else
- return VPX_CODEC_INVALID_PARAM;
- } else {
+
return VPX_CODEC_INVALID_PARAM;
}
+ return VPX_CODEC_INVALID_PARAM;
}
static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx,
@@ -1458,9 +1417,8 @@ static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx,
vp9_set_internal_size(ctx->cpi, (VPX_SCALING)mode->h_scaling_mode,
(VPX_SCALING)mode->v_scaling_mode);
return (res == 0) ? VPX_CODEC_OK : VPX_CODEC_INVALID_PARAM;
- } else {
- return VPX_CODEC_INVALID_PARAM;
}
+ return VPX_CODEC_INVALID_PARAM;
}
static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) {
@@ -1491,22 +1449,23 @@ static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx,
vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *);
VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
SVC *const svc = &cpi->svc;
+ int sl;
- svc->first_spatial_layer_to_encode = data->spatial_layer_id;
svc->spatial_layer_to_encode = data->spatial_layer_id;
+ svc->first_spatial_layer_to_encode = data->spatial_layer_id;
+ // TODO(jianj): Deprecated to be removed.
svc->temporal_layer_id = data->temporal_layer_id;
+ // Allow for setting temporal layer per spatial layer for superframe.
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+ svc->temporal_layer_id_per_spatial[sl] =
+ data->temporal_layer_id_per_spatial[sl];
+ }
// Checks on valid layer_id input.
if (svc->temporal_layer_id < 0 ||
svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) {
return VPX_CODEC_INVALID_PARAM;
}
- if (svc->first_spatial_layer_to_encode < 0 ||
- svc->first_spatial_layer_to_encode >= (int)ctx->cfg.ss_number_layers) {
- return VPX_CODEC_INVALID_PARAM;
- }
- // First spatial layer to encode not implemented for two-pass.
- if (is_two_pass_svc(cpi) && svc->first_spatial_layer_to_encode > 0)
- return VPX_CODEC_INVALID_PARAM;
+
return VPX_CODEC_OK;
}
@@ -1546,20 +1505,87 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_get_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *);
+ int sl;
+ for (sl = 0; sl <= cpi->svc.spatial_layer_id; sl++) {
+ data->update_buffer_slot[sl] = cpi->svc.update_buffer_slot[sl];
+ data->reference_last[sl] = cpi->svc.reference_last[sl];
+ data->reference_golden[sl] = cpi->svc.reference_golden[sl];
+ data->reference_alt_ref[sl] = cpi->svc.reference_altref[sl];
+ data->lst_fb_idx[sl] = cpi->svc.lst_fb_idx[sl];
+ data->gld_fb_idx[sl] = cpi->svc.gld_fb_idx[sl];
+ data->alt_fb_idx[sl] = cpi->svc.alt_fb_idx[sl];
+ // TODO(jianj): Remove these 3, deprecated.
+ data->update_last[sl] = cpi->svc.update_last[sl];
+ data->update_golden[sl] = cpi->svc.update_golden[sl];
+ data->update_alt_ref[sl] = cpi->svc.update_altref[sl];
+ }
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t ctrl_set_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx,
va_list args) {
VP9_COMP *const cpi = ctx->cpi;
vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *);
int sl;
+ cpi->svc.use_set_ref_frame_config = 1;
for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
- cpi->svc.ext_frame_flags[sl] = data->frame_flags[sl];
- cpi->svc.ext_lst_fb_idx[sl] = data->lst_fb_idx[sl];
- cpi->svc.ext_gld_fb_idx[sl] = data->gld_fb_idx[sl];
- cpi->svc.ext_alt_fb_idx[sl] = data->alt_fb_idx[sl];
+ cpi->svc.update_buffer_slot[sl] = data->update_buffer_slot[sl];
+ cpi->svc.reference_last[sl] = data->reference_last[sl];
+ cpi->svc.reference_golden[sl] = data->reference_golden[sl];
+ cpi->svc.reference_altref[sl] = data->reference_alt_ref[sl];
+ cpi->svc.lst_fb_idx[sl] = data->lst_fb_idx[sl];
+ cpi->svc.gld_fb_idx[sl] = data->gld_fb_idx[sl];
+ cpi->svc.alt_fb_idx[sl] = data->alt_fb_idx[sl];
+ cpi->svc.duration[sl] = data->duration[sl];
}
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_set_svc_inter_layer_pred(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ const int data = va_arg(args, int);
+ VP9_COMP *const cpi = ctx->cpi;
+ cpi->svc.disable_inter_layer_pred = data;
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_svc_frame_drop_layer(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ vpx_svc_frame_drop_t *data = va_arg(args, vpx_svc_frame_drop_t *);
+ int sl;
+ cpi->svc.framedrop_mode = data->framedrop_mode;
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl)
+ cpi->svc.framedrop_thresh[sl] = data->framedrop_thresh[sl];
+ // Don't allow max_consec_drop values below 1.
+ cpi->svc.max_consec_drop = VPXMAX(1, data->max_consec_drop);
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_svc_gf_temporal_ref(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ const unsigned int data = va_arg(args, unsigned int);
+ cpi->svc.use_gf_temporal_ref = data;
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_set_svc_spatial_layer_sync(
+ vpx_codec_alg_priv_t *ctx, va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ vpx_svc_spatial_layer_sync_t *data =
+ va_arg(args, vpx_svc_spatial_layer_sync_t *);
+ int sl;
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl)
+ cpi->svc.spatial_layer_sync[sl] = data->spatial_layer_sync[sl];
+ cpi->svc.set_intra_only_frame = data->base_layer_intra_only;
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t ctrl_register_cx_callback(vpx_codec_alg_priv_t *ctx,
va_list args) {
vpx_codec_priv_output_cx_pkt_cb_pair_t *cbp =
@@ -1600,13 +1626,21 @@ static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_postencode_drop(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ const unsigned int data = va_arg(args, unsigned int);
+ cpi->rc.ext_use_post_encode_drop = data;
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP8_COPY_REFERENCE, ctrl_copy_reference },
// Setters
{ VP8_SET_REFERENCE, ctrl_set_reference },
{ VP8_SET_POSTPROC, ctrl_set_previewpp },
- { VP8E_SET_ROI_MAP, ctrl_set_roi_map },
+ { VP9E_SET_ROI_MAP, ctrl_set_roi_map },
{ VP8E_SET_ACTIVEMAP, ctrl_set_active_map },
{ VP8E_SET_SCALEMODE, ctrl_set_scale_mode },
{ VP8E_SET_CPUUSED, ctrl_set_cpuused },
@@ -1615,6 +1649,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP8E_SET_STATIC_THRESHOLD, ctrl_set_static_thresh },
{ VP9E_SET_TILE_COLUMNS, ctrl_set_tile_columns },
{ VP9E_SET_TILE_ROWS, ctrl_set_tile_rows },
+ { VP9E_SET_TPL, ctrl_set_tpl_model },
{ VP8E_SET_ARNR_MAXFRAMES, ctrl_set_arnr_max_frames },
{ VP8E_SET_ARNR_STRENGTH, ctrl_set_arnr_strength },
{ VP8E_SET_ARNR_TYPE, ctrl_set_arnr_type },
@@ -1642,7 +1677,12 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP9E_SET_RENDER_SIZE, ctrl_set_render_size },
{ VP9E_SET_TARGET_LEVEL, ctrl_set_target_level },
{ VP9E_SET_ROW_MT, ctrl_set_row_mt },
+ { VP9E_SET_POSTENCODE_DROP, ctrl_set_postencode_drop },
{ VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test },
+ { VP9E_SET_SVC_INTER_LAYER_PRED, ctrl_set_svc_inter_layer_pred },
+ { VP9E_SET_SVC_FRAME_DROP_LAYER, ctrl_set_svc_frame_drop_layer },
+ { VP9E_SET_SVC_GF_TEMPORAL_REF, ctrl_set_svc_gf_temporal_ref },
+ { VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync },
// Getters
{ VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer },
@@ -1651,6 +1691,7 @@ static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id },
{ VP9E_GET_ACTIVEMAP, ctrl_get_active_map },
{ VP9E_GET_LEVEL, ctrl_get_level },
+ { VP9E_GET_SVC_REF_FRAME_CONFIG, ctrl_get_svc_ref_frame_config },
{ -1, NULL },
};
@@ -1659,7 +1700,7 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = {
{ 0,
{
// NOLINT
- 0, // g_usage
+ 0, // g_usage (unused)
8, // g_threads
0, // g_profile
diff --git a/libvpx/vp9/vp9_dx_iface.c b/libvpx/vp9/vp9_dx_iface.c
index 2a5578674..6a4cb9acf 100644
--- a/libvpx/vp9/vp9_dx_iface.c
+++ b/libvpx/vp9/vp9_dx_iface.c
@@ -238,6 +238,19 @@ static void set_ppflags(const vpx_codec_alg_priv_t *ctx, vp9_ppflags_t *flags) {
flags->noise_level = ctx->postproc_cfg.noise_level;
}
+#undef ERROR
+#define ERROR(str) \
+ do { \
+ ctx->base.err_detail = str; \
+ return VPX_CODEC_INVALID_PARAM; \
+ } while (0)
+
+#define RANGE_CHECK(p, memb, lo, hi) \
+ do { \
+ if (!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \
+ ERROR(#memb " out of range [" #lo ".." #hi "]"); \
+ } while (0)
+
static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
ctx->last_show_frame = -1;
ctx->need_resync = 1;
@@ -254,6 +267,12 @@ static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
ctx->pbi->max_threads = ctx->cfg.threads;
ctx->pbi->inv_tile_order = ctx->invert_tile_order;
+ RANGE_CHECK(ctx, row_mt, 0, 1);
+ ctx->pbi->row_mt = ctx->row_mt;
+
+ RANGE_CHECK(ctx, lpf_opt, 0, 1);
+ ctx->pbi->lpf_mt_opt = ctx->lpf_opt;
+
// If postprocessing was enabled by the application and a
// configuration has not been provided, default it.
if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
@@ -455,8 +474,8 @@ static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx,
vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *);
if (data) {
- YV12_BUFFER_CONFIG *fb;
- fb = get_ref_frame(&ctx->pbi->common, data->idx);
+ const int fb_idx = ctx->pbi->common.cur_show_frame_fb_idx;
+ YV12_BUFFER_CONFIG *fb = get_buf_frame(&ctx->pbi->common, fb_idx);
if (fb == NULL) return VPX_CODEC_ERROR;
yuvconfig2image(&data->img, fb, NULL);
return VPX_CODEC_OK;
@@ -635,6 +654,20 @@ static vpx_codec_err_t ctrl_set_spatial_layer_svc(vpx_codec_alg_priv_t *ctx,
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->row_mt = va_arg(args, int);
+
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_enable_lpf_opt(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ ctx->lpf_opt = va_arg(args, int);
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP8_COPY_REFERENCE, ctrl_copy_reference },
@@ -646,6 +679,8 @@ static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = {
{ VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment },
{ VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter },
{ VP9_DECODE_SVC_SPATIAL_LAYER, ctrl_set_spatial_layer_svc },
+ { VP9D_SET_ROW_MT, ctrl_set_row_mt },
+ { VP9D_SET_LOOP_FILTER_OPT, ctrl_enable_lpf_opt },
// Getters
{ VPXD_GET_LAST_QUANTIZER, ctrl_get_quantizer },
diff --git a/libvpx/vp9/vp9_dx_iface.h b/libvpx/vp9/vp9_dx_iface.h
index 18bc7ab0d..f60688c4d 100644
--- a/libvpx/vp9/vp9_dx_iface.h
+++ b/libvpx/vp9/vp9_dx_iface.h
@@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_VP9_DX_IFACE_H_
-#define VP9_VP9_DX_IFACE_H_
+#ifndef VPX_VP9_VP9_DX_IFACE_H_
+#define VPX_VP9_VP9_DX_IFACE_H_
#include "vp9/decoder/vp9_decoder.h"
@@ -45,6 +45,8 @@ struct vpx_codec_alg_priv {
// Allow for decoding up to a given spatial layer for SVC stream.
int svc_decoding;
int svc_spatial_layer;
+ int row_mt;
+ int lpf_opt;
};
-#endif // VP9_VP9_DX_IFACE_H_
+#endif // VPX_VP9_VP9_DX_IFACE_H_
diff --git a/libvpx/vp9/vp9_iface_common.h b/libvpx/vp9/vp9_iface_common.h
index d68872750..a1921db63 100644
--- a/libvpx/vp9/vp9_iface_common.h
+++ b/libvpx/vp9/vp9_iface_common.h
@@ -7,17 +7,17 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
-#ifndef VP9_VP9_IFACE_COMMON_H_
-#define VP9_VP9_IFACE_COMMON_H_
+#ifndef VPX_VP9_VP9_IFACE_COMMON_H_
+#define VPX_VP9_VP9_IFACE_COMMON_H_
#include "vpx_ports/mem.h"
static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12,
void *user_priv) {
/** vpx_img_wrap() doesn't allow specifying independent strides for
- * the Y, U, and V planes, nor other alignment adjustments that
- * might be representable by a YV12_BUFFER_CONFIG, so we just
- * initialize all the fields.*/
+ * the Y, U, and V planes, nor other alignment adjustments that
+ * might be representable by a YV12_BUFFER_CONFIG, so we just
+ * initialize all the fields.*/
int bps;
if (!yv12->subsampling_y) {
if (!yv12->subsampling_x) {
@@ -142,4 +142,4 @@ static VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) {
assert(0 && "Invalid Reference Frame");
return VP9_LAST_FLAG;
}
-#endif // VP9_VP9_IFACE_COMMON_H_
+#endif // VPX_VP9_VP9_IFACE_COMMON_H_
diff --git a/libvpx/vp9/vp9cx.mk b/libvpx/vp9/vp9cx.mk
index d633ed142..05981d689 100644
--- a/libvpx/vp9/vp9cx.mk
+++ b/libvpx/vp9/vp9cx.mk
@@ -64,6 +64,7 @@ VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c
VP9_CX_SRCS-yes += encoder/vp9_rd.c
VP9_CX_SRCS-yes += encoder/vp9_rdopt.c
VP9_CX_SRCS-yes += encoder/vp9_pickmode.c
+VP9_CX_SRCS-yes += encoder/vp9_partition_models.h
VP9_CX_SRCS-yes += encoder/vp9_segmentation.c
VP9_CX_SRCS-yes += encoder/vp9_segmentation.h
VP9_CX_SRCS-yes += encoder/vp9_speed_features.c
@@ -74,6 +75,7 @@ VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c
VP9_CX_SRCS-yes += encoder/vp9_resize.c
VP9_CX_SRCS-yes += encoder/vp9_resize.h
VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.c
+VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.h
VP9_CX_SRCS-yes += encoder/vp9_tokenize.c
VP9_CX_SRCS-yes += encoder/vp9_treewriter.c
@@ -103,6 +105,7 @@ VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h
VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/temporal_filter_sse4.c
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c
+VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_quantize_avx2.c
VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
@@ -139,6 +142,8 @@ VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c
VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h
+VP9_CX_SRCS-$(HAVE_VSX) += encoder/ppc/vp9_quantize_vsx.c
+
# Strip unnecessary files with CONFIG_REALTIME_ONLY
VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_firstpass.c
VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_mbgraph.c