aboutsummaryrefslogtreecommitdiff
path: root/third_party/libaom/source/libaom/av1
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libaom/source/libaom/av1')
-rw-r--r--third_party/libaom/source/libaom/av1/arg_defs.c21
-rw-r--r--third_party/libaom/source/libaom/av1/arg_defs.h2
-rw-r--r--third_party/libaom/source/libaom/av1/av1_cx_iface.c723
-rw-r--r--third_party/libaom/source/libaom/av1/av1_dx_iface.c35
-rw-r--r--third_party/libaom/source/libaom/av1/common/alloccommon.c227
-rw-r--r--third_party/libaom/source/libaom/av1/common/alloccommon.h8
-rw-r--r--third_party/libaom/source/libaom/av1/common/av1_common_int.h77
-rw-r--r--third_party/libaom/source/libaom/av1/common/av1_loopfilter.c268
-rw-r--r--third_party/libaom/source/libaom/av1/common/av1_loopfilter.h16
-rw-r--r--third_party/libaom/source/libaom/av1/common/blockd.h45
-rw-r--r--third_party/libaom/source/libaom/av1/common/cdef.c333
-rw-r--r--third_party/libaom/source/libaom/av1/common/cdef.h59
-rw-r--r--third_party/libaom/source/libaom/av1/common/cdef_block.h4
-rw-r--r--third_party/libaom/source/libaom/av1/common/cfl.h2
-rw-r--r--third_party/libaom/source/libaom/av1/common/common.h2
-rw-r--r--third_party/libaom/source/libaom/av1/common/common_data.h9
-rw-r--r--third_party/libaom/source/libaom/av1/common/enums.h9
-rw-r--r--third_party/libaom/source/libaom/av1/common/loopfiltermask.c41
-rw-r--r--third_party/libaom/source/libaom/av1/common/mv.h2
-rw-r--r--third_party/libaom/source/libaom/av1/common/mvref_common.c34
-rw-r--r--third_party/libaom/source/libaom/av1/common/pred_common.h4
-rw-r--r--third_party/libaom/source/libaom/av1/common/reconinter.c30
-rw-r--r--third_party/libaom/source/libaom/av1/common/reconinter.h4
-rw-r--r--third_party/libaom/source/libaom/av1/common/reconintra.c139
-rw-r--r--third_party/libaom/source/libaom/av1/common/reconintra.h15
-rw-r--r--third_party/libaom/source/libaom/av1/common/resize.c22
-rw-r--r--third_party/libaom/source/libaom/av1/common/restoration.c24
-rw-r--r--third_party/libaom/source/libaom/av1/common/thread_common.c296
-rw-r--r--third_party/libaom/source/libaom/av1/common/thread_common.h53
-rw-r--r--third_party/libaom/source/libaom/av1/common/tile_common.c26
-rw-r--r--third_party/libaom/source/libaom/av1/decoder/decodeframe.c274
-rw-r--r--third_party/libaom/source/libaom/av1/decoder/decodemv.c40
-rw-r--r--third_party/libaom/source/libaom/av1/decoder/decoder.c57
-rw-r--r--third_party/libaom/source/libaom/av1/decoder/decoder.h30
-rw-r--r--third_party/libaom/source/libaom/av1/decoder/obu.c160
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/aq_complexity.c10
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.c114
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.h24
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/aq_variance.c23
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/av1_noise_estimate.c10
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/av1_quantize.c61
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/av1_quantize.h26
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.c14
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/bitstream.c726
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/bitstream.h87
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/block.h59
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/compound_type.c125
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/context_tree.c2
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/dwt.c16
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/dwt.h5
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/enc_enums.h2
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encode_strategy.c570
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encode_strategy.h24
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encodeframe.c243
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.c222
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.h147
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encodemb.c58
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encodemb.h15
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encodemv.c7
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encodemv.h4
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encoder.c1480
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encoder.h643
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encoder_alloc.h68
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encoder_utils.c125
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encoder_utils.h76
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/encodetxb.c11
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/ethread.c528
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/ethread.h11
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/external_partition.c93
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/external_partition.h55
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/firstpass.c271
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/firstpass.h38
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/global_motion_facade.c12
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/gop_structure.c165
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/gop_structure.h5
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/hybrid_fwd_txfm.c24
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/hybrid_fwd_txfm.h9
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/interp_search.c27
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/interp_search.h12
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/intra_mode_search.c648
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/intra_mode_search.h30
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/intra_mode_search_utils.h259
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/level.c14
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/level.h2
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/mcomp.c29
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/mcomp.h2
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/motion_search_facade.c100
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/motion_search_facade.h11
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/mv_prec.c12
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/mv_prec.h4
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/nonrd_pickmode.c119
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/optical_flow.c11
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/palette.c101
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/palette.h7
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/partition_search.c654
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/partition_search.h15
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/partition_strategy.c671
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/partition_strategy.h134
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/pass2_strategy.c1342
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/pickcdef.c38
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/pickcdef.h19
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/picklpf.c49
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/pickrst.c38
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/ratectrl.c571
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/ratectrl.h169
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/rc_utils.h55
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/rd.c104
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/rd.h49
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/rdopt.c678
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/rdopt.h6
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/rdopt_utils.h8
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/segmentation.c18
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/sparse_linear_solver.c3
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/sparse_linear_solver.h6
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/speed_features.c274
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/speed_features.h225
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/superres_scale.c39
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/svc_layercontext.c199
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/svc_layercontext.h10
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/temporal_filter.c94
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/temporal_filter.h13
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/tokenize.c12
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/tokenize.h4
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/tpl_model.c607
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/tpl_model.h151
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.c125
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.h6
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/tune_vmaf.c473
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/tune_vmaf.h4
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/tx_search.c129
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/txb_rdopt.c12
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/txb_rdopt.h8
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/var_based_part.c98
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/x86/highbd_temporal_filter_avx2.c11
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/x86/highbd_temporal_filter_sse2.c11
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_avx2.c11
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_sse2.c11
137 files changed, 12178 insertions, 5663 deletions
diff --git a/third_party/libaom/source/libaom/av1/arg_defs.c b/third_party/libaom/source/libaom/av1/arg_defs.c
index e79f9b2934..8646b09c9d 100644
--- a/third_party/libaom/source/libaom/av1/arg_defs.c
+++ b/third_party/libaom/source/libaom/av1/arg_defs.c
@@ -271,7 +271,9 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
.noise_sens = ARG_DEF(NULL, "noise-sensitivity", 1,
"Noise sensitivity (frames to blur)"),
.sharpness = ARG_DEF(NULL, "sharpness", 1,
- "Loop filter sharpness (0..7), default is 0"),
+ "Bias towards block sharpness in rate-distortion "
+ "optimization of transform coefficients "
+ "(0..7), default is 0"),
.static_thresh =
ARG_DEF(NULL, "static-thresh", 1, "Motion detection threshold"),
.auto_altref =
@@ -448,13 +450,16 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
"Use Default-transform only for INTRA modes"),
.quant_b_adapt = ARG_DEF(NULL, "quant-b-adapt", 1, "Use adaptive quantize_b"),
.coeff_cost_upd_freq = ARG_DEF(NULL, "coeff-cost-upd-freq", 1,
- "Update freq for coeff costs"
+ "Update freq for coeff costs. "
"0: SB, 1: SB Row per Tile, 2: Tile, 3: Off"),
.mode_cost_upd_freq = ARG_DEF(NULL, "mode-cost-upd-freq", 1,
- "Update freq for mode costs"
+ "Update freq for mode costs. "
"0: SB, 1: SB Row per Tile, 2: Tile, 3: Off"),
.mv_cost_upd_freq = ARG_DEF(NULL, "mv-cost-upd-freq", 1,
- "Update freq for mv costs"
+ "Update freq for mv costs. "
+ "0: SB, 1: SB Row per Tile, 2: Tile, 3: Off"),
+ .dv_cost_upd_freq = ARG_DEF(NULL, "dv-cost-upd-freq", 1,
+ "Update freq for dv costs. "
"0: SB, 1: SB Row per Tile, 2: Tile, 3: Off"),
.num_tg = ARG_DEF(NULL, "num-tile-groups", 1,
"Maximum number of tile groups, default is 1"),
@@ -471,6 +476,8 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
.vmaf_model_path =
ARG_DEF(NULL, "vmaf-model-path", 1, "Path to the VMAF model file"),
#endif
+ .partition_info_path = ARG_DEF(NULL, "partition-info-path", 1,
+ "Partition information read and write path"),
.film_grain_test = ARG_DEF(
NULL, "film-grain-test", 1,
"Film grain test vectors (0: none (default), 1: test-1 2: test-2, "
@@ -592,7 +599,9 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
"pyramid. Selected automatically from --cq-level if "
"--fixed-qp-offsets is not provided. If this option is not "
"specified (default), offsets are adaptively chosen by the "
- "encoder."),
+ "encoder. Further, if this option is specified, at least two "
+ "comma-separated values corresponding to kf and arf offsets "
+ "must be provided, while the rest are chosen by the encoder"),
.fixed_qp_offsets = ARG_DEF(
NULL, "fixed-qp-offsets", 1,
@@ -605,6 +614,6 @@ const av1_codec_arg_definitions_t g_av1_codec_arg_defs = {
.vbr_corpus_complexity_lap = ARG_DEF(
NULL, "vbr-corpus-complexity-lap", 1,
"Set average corpus complexity per mb for single pass VBR using lap. "
- "(0..10000), default is 0")
+ "(0..10000), default is 0"),
#endif // CONFIG_AV1_ENCODER
};
diff --git a/third_party/libaom/source/libaom/av1/arg_defs.h b/third_party/libaom/source/libaom/av1/arg_defs.h
index f86e91551c..6a8d0d47cf 100644
--- a/third_party/libaom/source/libaom/av1/arg_defs.h
+++ b/third_party/libaom/source/libaom/av1/arg_defs.h
@@ -173,12 +173,14 @@ typedef struct av1_codec_arg_definitions {
arg_def_t coeff_cost_upd_freq;
arg_def_t mode_cost_upd_freq;
arg_def_t mv_cost_upd_freq;
+ arg_def_t dv_cost_upd_freq;
arg_def_t num_tg;
arg_def_t mtu_size;
arg_def_t timing_info;
#if CONFIG_TUNE_VMAF
arg_def_t vmaf_model_path;
#endif
+ arg_def_t partition_info_path;
arg_def_t film_grain_test;
arg_def_t film_grain_table;
#if CONFIG_DENOISE
diff --git a/third_party/libaom/source/libaom/av1/av1_cx_iface.c b/third_party/libaom/source/libaom/av1/av1_cx_iface.c
index 123bb1dc41..11c47bca24 100644
--- a/third_party/libaom/source/libaom/av1/av1_cx_iface.c
+++ b/third_party/libaom/source/libaom/av1/av1_cx_iface.c
@@ -26,6 +26,7 @@
#include "av1/encoder/bitstream.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/ethread.h"
+#include "av1/encoder/external_partition.h"
#include "av1/encoder/firstpass.h"
#include "av1/arg_defs.h"
@@ -51,6 +52,7 @@ struct av1_extracfg {
unsigned int gf_max_pyr_height;
aom_tune_metric tuning;
const char *vmaf_model_path;
+ const char *partition_info_path;
unsigned int cq_level; // constrained quality level
unsigned int rc_max_intra_bitrate_pct;
unsigned int rc_max_inter_bitrate_pct;
@@ -154,12 +156,26 @@ struct av1_extracfg {
COST_UPDATE_TYPE coeff_cost_upd_freq;
COST_UPDATE_TYPE mode_cost_upd_freq;
COST_UPDATE_TYPE mv_cost_upd_freq;
+ COST_UPDATE_TYPE dv_cost_upd_freq;
unsigned int ext_tile_debug;
unsigned int sb_multipass_unit_test;
};
+#if CONFIG_REALTIME_ONLY
+// Settings changed for realtime only build:
+// cpu_used: 7
+// enable_tpl_model: 0
+// enable_restoration: 0
+// enable_obmc: 0
+// deltaq_mode: NO_DELTA_Q
+// enable_global_motion usage: 0
+// enable_warped_motion at sequence level: 0
+// allow_warped_motion at frame level: 0
+// coeff_cost_upd_freq: COST_UPD_OFF
+// mode_cost_upd_freq: COST_UPD_OFF
+// mv_cost_upd_freq: COST_UPD_OFF
static struct av1_extracfg default_extra_cfg = {
- 0, // cpu_used
+ 7, // cpu_used
1, // enable_auto_alt_ref
0, // enable_auto_bwd_ref
0, // noise_sensitivity
@@ -168,7 +184,7 @@ static struct av1_extracfg default_extra_cfg = {
1, // row_mt
0, // tile_columns
0, // tile_rows
- 1, // enable_tpl_model
+ 0, // enable_tpl_model
1, // enable_keyframe_filtering
7, // arnr_max_frames
5, // arnr_strength
@@ -177,31 +193,32 @@ static struct av1_extracfg default_extra_cfg = {
0, // gf_min_pyr_height
5, // gf_max_pyr_height
AOM_TUNE_PSNR, // tuning
- "/usr/local/share/model/vmaf_v0.6.1.pkl", // VMAF model path
- 10, // cq_level
- 0, // rc_max_intra_bitrate_pct
- 0, // rc_max_inter_bitrate_pct
- 0, // gf_cbr_boost_pct
- 0, // lossless
- 1, // enable_cdef
- 1, // enable_restoration
- 0, // force_video_mode
- 1, // enable_obmc
- 3, // disable_trellis_quant
- 0, // enable_qm
- DEFAULT_QM_Y, // qm_y
- DEFAULT_QM_U, // qm_u
- DEFAULT_QM_V, // qm_v
- DEFAULT_QM_FIRST, // qm_min
- DEFAULT_QM_LAST, // qm_max
- 1, // max number of tile groups
- 0, // mtu_size
+ "/usr/local/share/model/vmaf_v0.6.1.json", // VMAF model path
+ ".", // partition info path
+ 10, // cq_level
+ 0, // rc_max_intra_bitrate_pct
+ 0, // rc_max_inter_bitrate_pct
+ 0, // gf_cbr_boost_pct
+ 0, // lossless
+ 1, // enable_cdef
+ 0, // enable_restoration
+ 0, // force_video_mode
+ 0, // enable_obmc
+ 3, // disable_trellis_quant
+ 0, // enable_qm
+ DEFAULT_QM_Y, // qm_y
+ DEFAULT_QM_U, // qm_u
+ DEFAULT_QM_V, // qm_v
+ DEFAULT_QM_FIRST, // qm_min
+ DEFAULT_QM_LAST, // qm_max
+ 1, // max number of tile groups
+ 0, // mtu_size
AOM_TIMING_UNSPECIFIED, // No picture timing signaling in bitstream
0, // frame_parallel_decoding_mode
1, // enable dual filter
0, // enable delta quant in chroma planes
NO_AQ, // aq_mode
- DELTA_Q_OBJECTIVE, // deltaq_mode
+ NO_DELTA_Q, // deltaq_mode
0, // delta lf mode
0, // frame_periodic_boost
AOM_BITS_8, // Bit depth
@@ -243,9 +260,9 @@ static struct av1_extracfg default_extra_cfg = {
1, // enable difference-weighted compound
1, // enable interinter wedge compound
1, // enable interintra wedge compound
- 1, // enable_global_motion usage
- 1, // enable_warped_motion at sequence level
- 1, // allow_warped_motion at frame level
+ 0, // enable_global_motion usage
+ 0, // enable_warped_motion at sequence level
+ 0, // allow_warped_motion at frame level
1, // enable filter intra at sequence level
1, // enable smooth intra modes usage for sequence
1, // enable Paeth intra mode usage for sequence
@@ -277,15 +294,148 @@ static struct av1_extracfg default_extra_cfg = {
SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
+ }, // target_seq_level_idx
+ 0, // tier_mask
+ 0, // min_cr
+ COST_UPD_OFF, // coeff_cost_upd_freq
+ COST_UPD_OFF, // mode_cost_upd_freq
+ COST_UPD_OFF, // mv_cost_upd_freq
+ COST_UPD_OFF, // dv_cost_upd_freq
+ 0, // ext_tile_debug
+ 0, // sb_multipass_unit_test
+};
+#else
+static struct av1_extracfg default_extra_cfg = {
+ 0, // cpu_used
+ 1, // enable_auto_alt_ref
+ 0, // enable_auto_bwd_ref
+ 0, // noise_sensitivity
+ 0, // sharpness
+ 0, // static_thresh
+ 1, // row_mt
+ 0, // tile_columns
+ 0, // tile_rows
+ 1, // enable_tpl_model
+ 1, // enable_keyframe_filtering
+ 7, // arnr_max_frames
+ 5, // arnr_strength
+ 0, // min_gf_interval; 0 -> default decision
+ 0, // max_gf_interval; 0 -> default decision
+ 0, // gf_min_pyr_height
+ 5, // gf_max_pyr_height
+ AOM_TUNE_PSNR, // tuning
+ "/usr/local/share/model/vmaf_v0.6.1.json", // VMAF model path
+ ".", // partition info path
+ 10, // cq_level
+ 0, // rc_max_intra_bitrate_pct
+ 0, // rc_max_inter_bitrate_pct
+ 0, // gf_cbr_boost_pct
+ 0, // lossless
+ 1, // enable_cdef
+ 1, // enable_restoration
+ 0, // force_video_mode
+ 1, // enable_obmc
+ 3, // disable_trellis_quant
+ 0, // enable_qm
+ DEFAULT_QM_Y, // qm_y
+ DEFAULT_QM_U, // qm_u
+ DEFAULT_QM_V, // qm_v
+ DEFAULT_QM_FIRST, // qm_min
+ DEFAULT_QM_LAST, // qm_max
+ 1, // max number of tile groups
+ 0, // mtu_size
+ AOM_TIMING_UNSPECIFIED, // No picture timing signaling in bitstream
+ 0, // frame_parallel_decoding_mode
+ 1, // enable dual filter
+ 0, // enable delta quant in chroma planes
+ NO_AQ, // aq_mode
+ DELTA_Q_OBJECTIVE, // deltaq_mode
+ 0, // delta lf mode
+ 0, // frame_periodic_boost
+ AOM_BITS_8, // Bit depth
+ AOM_CONTENT_DEFAULT, // content
+ AOM_CICP_CP_UNSPECIFIED, // CICP color primaries
+ AOM_CICP_TC_UNSPECIFIED, // CICP transfer characteristics
+ AOM_CICP_MC_UNSPECIFIED, // CICP matrix coefficients
+ AOM_CSP_UNKNOWN, // chroma sample position
+ 0, // color range
+ 0, // render width
+ 0, // render height
+ AOM_SUPERBLOCK_SIZE_DYNAMIC, // superblock_size
+ 1, // this depends on large_scale_tile.
+ 0, // error_resilient_mode off by default.
+ 0, // s_frame_mode off by default.
+ 0, // film_grain_test_vector
+ 0, // film_grain_table_filename
+ 0, // motion_vector_unit_test
+ 1, // CDF update mode
+ 1, // enable rectangular partitions
+ 1, // enable ab shape partitions
+ 1, // enable 1:4 and 4:1 partitions
+ 4, // min_partition_size
+ 128, // max_partition_size
+ 1, // enable intra edge filter
+ 1, // frame order hint
+ 1, // enable 64-pt transform usage
+ 1, // enable flip and identity transform
+ 1, // enable rectangular transform usage
+ 1, // dist-wtd compound
+ 7, // max_reference_frames
+ 0, // enable_reduced_reference_set
+ 1, // enable_ref_frame_mvs sequence level
+ 1, // allow ref_frame_mvs frame level
+ 1, // enable masked compound at sequence level
+ 1, // enable one sided compound at sequence level
+ 1, // enable interintra compound at sequence level
+ 1, // enable smooth interintra mode
+ 1, // enable difference-weighted compound
+ 1, // enable interinter wedge compound
+ 1, // enable interintra wedge compound
+ 1, // enable_global_motion usage
+ 1, // enable_warped_motion at sequence level
+ 1, // allow_warped_motion at frame level
+ 1, // enable filter intra at sequence level
+ 1, // enable smooth intra modes usage for sequence
+ 1, // enable Paeth intra mode usage for sequence
+ 1, // enable CFL uv intra mode usage for sequence
+ 1, // enable D45 to D203 intra mode usage for sequence
+ 1, // superres
+ 1, // enable overlay
+ 1, // enable palette
+ !CONFIG_SHARP_SETTINGS, // enable intrabc
+ 1, // enable angle delta
+#if CONFIG_DENOISE
+ 0, // noise_level
+ 32, // noise_block_size
+ 1, // enable_dnl_denoising
+#endif
+ 0, // chroma_subsampling_x
+ 0, // chroma_subsampling_y
+ 0, // reduced_tx_type_set
+ 0, // use_intra_dct_only
+ 0, // use_inter_dct_only
+ 0, // use_intra_default_tx_only
+ 0, // quant_b_adapt
+ 0, // vbr_corpus_complexity_lap
+ {
+ SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
+ SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
+ SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
+ SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
+ SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
+ SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
+ SEQ_LEVEL_MAX, SEQ_LEVEL_MAX,
}, // target_seq_level_idx
0, // tier_mask
0, // min_cr
COST_UPD_SB, // coeff_cost_upd_freq
COST_UPD_SB, // mode_cost_upd_freq
COST_UPD_SB, // mv_cost_upd_freq
+ COST_UPD_SB, // dv_cost_upd_freq
0, // ext_tile_debug
0, // sb_multipass_unit_test
};
+#endif
struct aom_codec_alg_priv {
aom_codec_priv_t base;
@@ -380,7 +530,11 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
RANGE_CHECK_HI(extra_cfg, deltaq_mode, DELTA_Q_MODE_COUNT - 1);
RANGE_CHECK_HI(extra_cfg, deltalf_mode, 1);
RANGE_CHECK_HI(extra_cfg, frame_periodic_boost, 1);
- RANGE_CHECK_HI(cfg, g_usage, 2);
+#if CONFIG_REALTIME_ONLY
+ RANGE_CHECK(cfg, g_usage, AOM_USAGE_REALTIME, AOM_USAGE_REALTIME);
+#else
+ RANGE_CHECK_HI(cfg, g_usage, AOM_USAGE_ALL_INTRA);
+#endif
RANGE_CHECK_HI(cfg, g_threads, MAX_NUM_THREADS);
RANGE_CHECK(cfg, rc_end_usage, AOM_VBR, AOM_Q);
RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100);
@@ -540,15 +694,6 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
}
#endif
-#if !CONFIG_USE_VMAF_RC
- if (extra_cfg->tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
- ERROR(
- "This error may be related to the wrong configuration options: try to "
- "set -DCONFIG_TUNE_VMAF=1 and -DCONFIG_USE_VMAF_RC=1 at the time CMake"
- " is run.");
- }
-#endif
-
RANGE_CHECK(extra_cfg, tuning, AOM_TUNE_PSNR, AOM_TUNE_BUTTERAUGLI);
RANGE_CHECK(extra_cfg, timing_info_type, AOM_TIMING_UNSPECIFIED,
@@ -572,6 +717,7 @@ static aom_codec_err_t validate_config(aom_codec_alg_priv_t *ctx,
RANGE_CHECK(extra_cfg, coeff_cost_upd_freq, 0, 3);
RANGE_CHECK(extra_cfg, mode_cost_upd_freq, 0, 3);
RANGE_CHECK(extra_cfg, mv_cost_upd_freq, 0, 3);
+ RANGE_CHECK(extra_cfg, dv_cost_upd_freq, 0, 3);
RANGE_CHECK(extra_cfg, min_partition_size, 4, 128);
RANGE_CHECK(extra_cfg, max_partition_size, 4, 128);
@@ -619,13 +765,14 @@ static aom_codec_err_t validate_img(aom_codec_alg_priv_t *ctx,
#if CONFIG_TUNE_BUTTERAUGLI
if (ctx->extra_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
- if (img->x_chroma_shift != 1 || img->y_chroma_shift != 1) {
- ERROR("Only YV12/I420 images supported in tune=butteraugli mode.");
+ if (img->bit_depth > 8) {
+ ERROR("Only 8 bit depth images supported in tune=butteraugli mode.");
}
- if ((img->cp != 0 && img->cp != AOM_CICP_CP_BT_709) ||
- (img->tc != 0 && img->tc != AOM_CICP_TC_BT_709) ||
- (img->mc != 0 && img->mc != AOM_CICP_MC_BT_709)) {
- ERROR("Only BT.709 images supported in tune=butteraugli mode.");
+ if (img->mc != 0 && img->mc != AOM_CICP_MC_BT_709 &&
+ img->mc != AOM_CICP_MC_BT_601 && img->mc != AOM_CICP_MC_BT_470_B_G) {
+ ERROR(
+ "Only BT.709 and BT.601 matrix coefficients supported in "
+ "tune=butteraugli mode. Identity matrix is treated as BT.601.");
}
}
#endif
@@ -689,7 +836,6 @@ static void update_default_encoder_config(const cfg_options_t *cfg,
extra_cfg->enable_smooth_intra = (cfg->disable_smooth_intra == 0);
extra_cfg->enable_paeth_intra = (cfg->disable_paeth_intra == 0);
extra_cfg->enable_cfl_intra = (cfg->disable_cfl == 0);
- extra_cfg->enable_diagonal_intra = (cfg->disable_diagonal_intra == 0);
extra_cfg->enable_obmc = (cfg->disable_obmc == 0);
extra_cfg->enable_palette = (cfg->disable_palette == 0);
extra_cfg->enable_intrabc = (cfg->disable_intrabc == 0);
@@ -709,12 +855,12 @@ static double convert_qp_offset(int cq_level, int q_offset, int bit_depth) {
return (base_q_val - new_q_val);
}
-static double get_modeled_qp_offset(int cq_level, int level, int bit_depth) {
- // 80% for keyframe was derived empirically.
- // 40% similar to rc_pick_q_and_bounds_one_pass_vbr() for Q mode ARF.
+static double get_modeled_qp_offset(int qp, int level, int bit_depth) {
+ // 76% for keyframe was derived empirically.
+ // 60% similar to rc_pick_q_and_bounds_one_pass_vbr() for Q mode ARF.
// Rest derived similar to rc_pick_q_and_bounds_two_pass()
- static const int percents[FIXED_QP_OFFSET_COUNT] = { 76, 60, 30, 15, 8 };
- const double q_val = av1_convert_qindex_to_q(cq_level, bit_depth);
+ static const int percents[FIXED_QP_OFFSET_COUNT] = { 76, 60, 30, 15, 8, 4 };
+ const double q_val = av1_convert_qindex_to_q(qp, bit_depth);
return q_val * percents[level] / 100;
}
@@ -916,6 +1062,7 @@ static aom_codec_err_t set_encoder_config(AV1EncoderConfig *oxcf,
oxcf->cost_upd_freq.coeff = (COST_UPDATE_TYPE)extra_cfg->coeff_cost_upd_freq;
oxcf->cost_upd_freq.mode = (COST_UPDATE_TYPE)extra_cfg->mode_cost_upd_freq;
oxcf->cost_upd_freq.mv = (COST_UPDATE_TYPE)extra_cfg->mv_cost_upd_freq;
+ oxcf->cost_upd_freq.dv = (COST_UPDATE_TYPE)extra_cfg->dv_cost_upd_freq;
// Set frame resize mode configuration.
resize_cfg->resize_mode = (RESIZE_MODE)cfg->rc_resize_mode;
@@ -1044,7 +1191,7 @@ static aom_codec_err_t set_encoder_config(AV1EncoderConfig *oxcf,
oxcf->motion_mode_cfg.enable_obmc = extra_cfg->enable_obmc;
oxcf->motion_mode_cfg.enable_warped_motion = extra_cfg->enable_warped_motion;
oxcf->motion_mode_cfg.allow_warped_motion =
- (cfg->g_usage == AOM_USAGE_REALTIME)
+ (cfg->g_usage == AOM_USAGE_REALTIME && oxcf->speed >= 7)
? false
: (extra_cfg->allow_warped_motion & extra_cfg->enable_warped_motion);
@@ -1141,6 +1288,8 @@ static aom_codec_err_t set_encoder_config(AV1EncoderConfig *oxcf,
sizeof(oxcf->target_seq_level_idx));
oxcf->tier_mask = extra_cfg->tier_mask;
+ oxcf->partition_info_path = extra_cfg->partition_info_path;
+
return AOM_CODEC_OK;
}
@@ -1179,10 +1328,20 @@ static aom_codec_err_t encoder_set_config(aom_codec_alg_priv_t *ctx,
ctx->cfg = *cfg;
set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
// On profile change, request a key frame
- force_key |= ctx->ppi->cpi->common.seq_params.profile != ctx->oxcf.profile;
- av1_change_config(ctx->ppi->cpi, &ctx->oxcf);
+ force_key |= ctx->ppi->seq_params.profile != ctx->oxcf.profile;
+ bool is_sb_size_changed = false;
+ av1_change_config_seq(ctx->ppi, &ctx->oxcf, &is_sb_size_changed);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ int i;
+ for (i = 0; i < ctx->ppi->num_fp_contexts; i++) {
+ av1_change_config(ctx->ppi->parallel_cpi[i], &ctx->oxcf,
+ is_sb_size_changed);
+ }
+#else
+ av1_change_config(ctx->ppi->cpi, &ctx->oxcf, is_sb_size_changed);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
if (ctx->ppi->cpi_lap != NULL) {
- av1_change_config(ctx->ppi->cpi_lap, &ctx->oxcf);
+ av1_change_config(ctx->ppi->cpi_lap, &ctx->oxcf, is_sb_size_changed);
}
}
@@ -1192,7 +1351,7 @@ static aom_codec_err_t encoder_set_config(aom_codec_alg_priv_t *ctx,
}
static aom_fixed_buf_t *encoder_get_global_headers(aom_codec_alg_priv_t *ctx) {
- return av1_get_global_headers(ctx->ppi->cpi);
+ return av1_get_global_headers(ctx->ppi);
}
static aom_codec_err_t ctrl_get_quantizer(aom_codec_alg_priv_t *ctx,
@@ -1215,7 +1374,7 @@ static aom_codec_err_t ctrl_get_baseline_gf_interval(aom_codec_alg_priv_t *ctx,
va_list args) {
int *const arg = va_arg(args, int *);
if (arg == NULL) return AOM_CODEC_INVALID_PARAM;
- *arg = ctx->ppi->cpi->rc.baseline_gf_interval;
+ *arg = ctx->ppi->p_rc.baseline_gf_interval;
return AOM_CODEC_OK;
}
@@ -1225,9 +1384,19 @@ static aom_codec_err_t update_extra_cfg(aom_codec_alg_priv_t *ctx,
if (res == AOM_CODEC_OK) {
ctx->extra_cfg = *extra_cfg;
set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
- av1_change_config(ctx->ppi->cpi, &ctx->oxcf);
+ bool is_sb_size_changed = false;
+ av1_change_config_seq(ctx->ppi, &ctx->oxcf, &is_sb_size_changed);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ int i;
+ for (i = 0; i < ctx->ppi->num_fp_contexts; i++) {
+ av1_change_config(ctx->ppi->parallel_cpi[i], &ctx->oxcf,
+ is_sb_size_changed);
+ }
+#else
+ av1_change_config(ctx->ppi->cpi, &ctx->oxcf, is_sb_size_changed);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
if (ctx->ppi->cpi_lap != NULL) {
- av1_change_config(ctx->ppi->cpi_lap, &ctx->oxcf);
+ av1_change_config(ctx->ppi->cpi_lap, &ctx->oxcf, is_sb_size_changed);
}
}
return res;
@@ -1299,7 +1468,13 @@ static aom_codec_err_t ctrl_set_tile_rows(aom_codec_alg_priv_t *ctx,
static aom_codec_err_t ctrl_set_enable_tpl_model(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_tpl_model = CAST(AV1E_SET_ENABLE_TPL_MODEL, args);
+ const unsigned int tpl_model_arg = CAST(AV1E_SET_ENABLE_TPL_MODEL, args);
+#if CONFIG_REALTIME_ONLY
+ if (tpl_model_arg) {
+ ERROR("TPL model can't be turned on in realtime only build.");
+ }
+#endif
+ extra_cfg.enable_tpl_model = tpl_model_arg;
return update_extra_cfg(ctx, &extra_cfg);
}
@@ -1379,7 +1554,13 @@ static aom_codec_err_t ctrl_set_enable_cdef(aom_codec_alg_priv_t *ctx,
static aom_codec_err_t ctrl_set_enable_restoration(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_restoration = CAST(AV1E_SET_ENABLE_RESTORATION, args);
+ const unsigned int restoration_arg = CAST(AV1E_SET_ENABLE_RESTORATION, args);
+#if CONFIG_REALTIME_ONLY
+ if (restoration_arg) {
+ ERROR("Restoration can't be turned on in realtime only build.");
+ }
+#endif
+ extra_cfg.enable_restoration = restoration_arg;
return update_extra_cfg(ctx, &extra_cfg);
}
@@ -1393,7 +1574,13 @@ static aom_codec_err_t ctrl_set_force_video_mode(aom_codec_alg_priv_t *ctx,
static aom_codec_err_t ctrl_set_enable_obmc(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_obmc = CAST(AV1E_SET_ENABLE_OBMC, args);
+ const unsigned int obmc_arg = CAST(AV1E_SET_ENABLE_OBMC, args);
+#if CONFIG_REALTIME_ONLY
+ if (obmc_arg) {
+ ERROR("OBMC can't be enabled in realtime only build.");
+ }
+#endif
+ extra_cfg.enable_obmc = obmc_arg;
return update_extra_cfg(ctx, &extra_cfg);
}
@@ -1637,14 +1824,26 @@ static aom_codec_err_t ctrl_set_enable_interintra_wedge(
static aom_codec_err_t ctrl_set_enable_global_motion(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_global_motion = CAST(AV1E_SET_ENABLE_GLOBAL_MOTION, args);
+ const int global_motion_arg = CAST(AV1E_SET_ENABLE_GLOBAL_MOTION, args);
+#if CONFIG_REALTIME_ONLY
+ if (global_motion_arg) {
+ ERROR("Global motion can't be enabled in realtime only build.");
+ }
+#endif
+ extra_cfg.enable_global_motion = global_motion_arg;
return update_extra_cfg(ctx, &extra_cfg);
}
static aom_codec_err_t ctrl_set_enable_warped_motion(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.enable_warped_motion = CAST(AV1E_SET_ENABLE_WARPED_MOTION, args);
+ const int warped_motion_arg = CAST(AV1E_SET_ENABLE_WARPED_MOTION, args);
+#if CONFIG_REALTIME_ONLY
+ if (warped_motion_arg) {
+ ERROR("Warped motion can't be enabled in realtime only build.");
+ }
+#endif
+ extra_cfg.enable_warped_motion = warped_motion_arg;
return update_extra_cfg(ctx, &extra_cfg);
}
@@ -1825,6 +2024,13 @@ static aom_codec_err_t ctrl_set_mv_cost_upd_freq(aom_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
+static aom_codec_err_t ctrl_set_dv_cost_upd_freq(aom_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct av1_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.dv_cost_upd_freq = CAST(AV1E_SET_DV_COST_UPD_FREQ, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
static aom_codec_err_t ctrl_set_vmaf_model_path(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
@@ -1832,6 +2038,13 @@ static aom_codec_err_t ctrl_set_vmaf_model_path(aom_codec_alg_priv_t *ctx,
return update_extra_cfg(ctx, &extra_cfg);
}
+static aom_codec_err_t ctrl_set_partition_info_path(aom_codec_alg_priv_t *ctx,
+ va_list args) {
+ struct av1_extracfg extra_cfg = ctx->extra_cfg;
+ extra_cfg.partition_info_path = CAST(AV1E_SET_PARTITION_INFO_PATH, args);
+ return update_extra_cfg(ctx, &extra_cfg);
+}
+
static aom_codec_err_t ctrl_set_film_grain_test_vector(
aom_codec_alg_priv_t *ctx, va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
@@ -1890,7 +2103,13 @@ static aom_codec_err_t ctrl_set_enable_dnl_denoising(aom_codec_alg_priv_t *ctx,
static aom_codec_err_t ctrl_set_deltaq_mode(aom_codec_alg_priv_t *ctx,
va_list args) {
struct av1_extracfg extra_cfg = ctx->extra_cfg;
- extra_cfg.deltaq_mode = CAST(AV1E_SET_DELTAQ_MODE, args);
+ const DELTAQ_MODE deltaq_arg = CAST(AV1E_SET_DELTAQ_MODE, args);
+#if CONFIG_REALTIME_ONLY
+ if (deltaq_arg > NO_DELTA_Q) {
+ ERROR("Delta Q mode can't be enabled in realtime only build.");
+ }
+#endif
+ extra_cfg.deltaq_mode = deltaq_arg;
return update_extra_cfg(ctx, &extra_cfg);
}
@@ -1986,6 +2205,18 @@ static aom_codec_err_t ctrl_enable_sb_multipass_unit_test(
return update_extra_cfg(ctx, &extra_cfg);
}
+static aom_codec_err_t ctrl_set_external_partition(aom_codec_alg_priv_t *ctx,
+ va_list args) {
+ AV1_COMP *const cpi = ctx->ppi->cpi;
+ aom_ext_part_funcs_t funcs = *CAST(AV1E_SET_EXTERNAL_PARTITION, args);
+ aom_ext_part_config_t config;
+ // TODO(chengchen): verify the sb_size has been set at this point.
+ config.superblock_size = cpi->common.seq_params->sb_size;
+ const aom_codec_err_t status =
+ av1_ext_part_create(funcs, config, &cpi->ext_part_controller);
+ return status;
+}
+
#if !CONFIG_REALTIME_ONLY
static aom_codec_err_t create_stats_buffer(FIRSTPASS_STATS **frame_stats_buffer,
STATS_BUFFER_CTX *stats_buf_context,
@@ -2014,27 +2245,22 @@ static aom_codec_err_t create_stats_buffer(FIRSTPASS_STATS **frame_stats_buffer,
static aom_codec_err_t create_context_and_bufferpool(
AV1_PRIMARY *ppi, AV1_COMP **p_cpi, BufferPool **p_buffer_pool,
- AV1EncoderConfig *oxcf, struct aom_codec_pkt_list *pkt_list_head,
- FIRSTPASS_STATS *frame_stats_buf, COMPRESSOR_STAGE stage,
- int num_lap_buffers, int lap_lag_in_frames,
- STATS_BUFFER_CTX *stats_buf_context) {
+ AV1EncoderConfig *oxcf, COMPRESSOR_STAGE stage, int lap_lag_in_frames) {
aom_codec_err_t res = AOM_CODEC_OK;
- *p_buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
- if (*p_buffer_pool == NULL) return AOM_CODEC_MEM_ERROR;
+ if (*p_buffer_pool == NULL) {
+ *p_buffer_pool = (BufferPool *)aom_calloc(1, sizeof(BufferPool));
+ if (*p_buffer_pool == NULL) return AOM_CODEC_MEM_ERROR;
#if CONFIG_MULTITHREAD
- if (pthread_mutex_init(&((*p_buffer_pool)->pool_mutex), NULL)) {
- return AOM_CODEC_MEM_ERROR;
- }
+ if (pthread_mutex_init(&((*p_buffer_pool)->pool_mutex), NULL)) {
+ return AOM_CODEC_MEM_ERROR;
+ }
#endif
- *p_cpi = av1_create_compressor(ppi, oxcf, *p_buffer_pool, frame_stats_buf,
- stage, num_lap_buffers, lap_lag_in_frames,
- stats_buf_context);
- if (*p_cpi == NULL)
- res = AOM_CODEC_MEM_ERROR;
- else
- (*p_cpi)->output_pkt_list = pkt_list_head;
+ }
+ *p_cpi = av1_create_compressor(ppi, oxcf, *p_buffer_pool, stage,
+ lap_lag_in_frames);
+ if (*p_cpi == NULL) res = AOM_CODEC_MEM_ERROR;
return res;
}
@@ -2084,27 +2310,48 @@ static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx) {
priv->oxcf.use_highbitdepth =
(ctx->init_flags & AOM_CODEC_USE_HIGHBITDEPTH) ? 1 : 0;
- priv->ppi = av1_create_primary_compressor();
+ priv->ppi = av1_create_primary_compressor(&priv->pkt_list.head,
+ *num_lap_buffers, &priv->oxcf);
if (!priv->ppi) return AOM_CODEC_MEM_ERROR;
#if !CONFIG_REALTIME_ONLY
res = create_stats_buffer(&priv->frame_stats_buffer,
&priv->stats_buf_context, *num_lap_buffers);
if (res != AOM_CODEC_OK) return AOM_CODEC_MEM_ERROR;
+
+ assert(MAX_LAP_BUFFERS >= MAX_LAG_BUFFERS);
+ int size = get_stats_buf_size(*num_lap_buffers, MAX_LAG_BUFFERS);
+ for (int i = 0; i < size; i++)
+ priv->ppi->twopass.frame_stats_arr[i] = &priv->frame_stats_buffer[i];
+
+ priv->ppi->twopass.stats_buf_ctx = &priv->stats_buf_context;
+ priv->ppi->twopass.stats_in =
+ priv->ppi->twopass.stats_buf_ctx->stats_in_start;
#endif
- res = create_context_and_bufferpool(
- priv->ppi, &priv->ppi->cpi, &priv->buffer_pool, &priv->oxcf,
- &priv->pkt_list.head, priv->frame_stats_buffer, ENCODE_STAGE,
- *num_lap_buffers, -1, &priv->stats_buf_context);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ assert(priv->ppi->num_fp_contexts >= 1);
+ int i;
+ for (i = 0; i < priv->ppi->num_fp_contexts; i++) {
+ res = create_context_and_bufferpool(
+ priv->ppi, &priv->ppi->parallel_cpi[i], &priv->buffer_pool,
+ &priv->oxcf, ENCODE_STAGE, -1);
+ if (res != AOM_CODEC_OK) {
+ return res;
+ }
+ }
+ priv->ppi->cpi = priv->ppi->parallel_cpi[0];
+#else
+ res = create_context_and_bufferpool(priv->ppi, &priv->ppi->cpi,
+ &priv->buffer_pool, &priv->oxcf,
+ ENCODE_STAGE, -1);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
// Create another compressor if look ahead is enabled
if (res == AOM_CODEC_OK && *num_lap_buffers) {
res = create_context_and_bufferpool(
priv->ppi, &priv->ppi->cpi_lap, &priv->buffer_pool_lap, &priv->oxcf,
- NULL, priv->frame_stats_buffer, LAP_STAGE, *num_lap_buffers,
- clamp(lap_lag_in_frames, 0, MAX_LAG_BUFFERS),
- &priv->stats_buf_context);
+ LAP_STAGE, clamp(lap_lag_in_frames, 0, MAX_LAG_BUFFERS));
}
}
}
@@ -2113,12 +2360,16 @@ static aom_codec_err_t encoder_init(aom_codec_ctx_t *ctx) {
}
static void destroy_context_and_bufferpool(AV1_COMP *cpi,
- BufferPool *buffer_pool) {
+ BufferPool **p_buffer_pool) {
av1_remove_compressor(cpi);
+ if (*p_buffer_pool) {
+ av1_free_ref_frame_buffers(*p_buffer_pool);
#if CONFIG_MULTITHREAD
- if (buffer_pool) pthread_mutex_destroy(&buffer_pool->pool_mutex);
+ pthread_mutex_destroy(&(*p_buffer_pool)->pool_mutex);
#endif
- aom_free(buffer_pool);
+ aom_free(*p_buffer_pool);
+ *p_buffer_pool = NULL;
+ }
}
static void destroy_stats_buffer(STATS_BUFFER_CTX *stats_buf_context,
@@ -2133,9 +2384,30 @@ static aom_codec_err_t encoder_destroy(aom_codec_alg_priv_t *ctx) {
if (ctx->ppi) {
AV1_PRIMARY *ppi = ctx->ppi;
- destroy_context_and_bufferpool(ppi->cpi, ctx->buffer_pool);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ for (int i = 0; i < ppi->num_fp_contexts - 1; i++) {
+ if (ppi->parallel_frames_data[i].cx_data_frame) {
+ free(ppi->parallel_frames_data[i].cx_data_frame);
+ }
+ }
+#endif
+#if CONFIG_ENTROPY_STATS
+ print_entropy_stats(ppi);
+#endif
+#if CONFIG_INTERNAL_STATS
+ print_internal_stats(ppi);
+#endif
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ int i;
+ for (i = 0; i < ppi->num_fp_contexts; i++) {
+ destroy_context_and_bufferpool(ppi->parallel_cpi[i], &ctx->buffer_pool);
+ }
+ ppi->cpi = NULL;
+#else
+ destroy_context_and_bufferpool(ppi->cpi, &ctx->buffer_pool);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
if (ppi->cpi_lap) {
- destroy_context_and_bufferpool(ppi->cpi_lap, ctx->buffer_pool_lap);
+ destroy_context_and_bufferpool(ppi->cpi_lap, &ctx->buffer_pool_lap);
}
av1_remove_primary_compressor(ppi);
}
@@ -2151,7 +2423,7 @@ static aom_codec_frame_flags_t get_frame_pkt_flags(const AV1_COMP *cpi,
aom_codec_frame_flags_t flags = lib_flags << 16;
if (lib_flags & FRAMEFLAGS_KEY ||
- (cpi->use_svc &&
+ (cpi->ppi->use_svc &&
svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers +
svc->temporal_layer_id]
.is_key_frame))
@@ -2182,7 +2454,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
AV1_COMP *cpi_lap = ppi->cpi_lap;
if (cpi == NULL) return AOM_CODEC_INVALID_PARAM;
- if (cpi->lap_enabled && cpi_lap == NULL && cpi->oxcf.pass == 0)
+ if (cpi->ppi->lap_enabled && cpi_lap == NULL && cpi->oxcf.pass == 0)
return AOM_CODEC_INVALID_PARAM;
if (img != NULL) {
@@ -2216,6 +2488,22 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
return AOM_CODEC_MEM_ERROR;
}
}
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ for (int i = 0; i < cpi->ppi->num_fp_contexts - 1; i++) {
+ if (cpi->ppi->parallel_frames_data[i].cx_data_frame == NULL) {
+ cpi->ppi->parallel_frames_data[i].cx_data_sz = uncompressed_frame_sz;
+ cpi->ppi->parallel_frames_data[i].frame_display_order_hint = -1;
+ cpi->ppi->parallel_frames_data[i].frame_size = 0;
+ cpi->ppi->parallel_frames_data[i].cx_data_frame =
+ (unsigned char *)malloc(
+ cpi->ppi->parallel_frames_data[i].cx_data_sz);
+ if (cpi->ppi->parallel_frames_data[i].cx_data_frame == NULL) {
+ cpi->ppi->parallel_frames_data[i].cx_data_sz = 0;
+ return AOM_CODEC_MEM_ERROR;
+ }
+ }
+ }
+#endif
}
}
@@ -2226,22 +2514,16 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
// The jmp_buf is valid only for the duration of the function that calls
// setjmp(). Therefore, this function must reset the 'setjmp' field to 0
// before it returns.
- if (setjmp(cpi->common.error.jmp)) {
- cpi->common.error.setjmp = 0;
- res = update_error_state(ctx, &cpi->common.error);
+ if (setjmp(ppi->error.jmp)) {
+ ppi->error.setjmp = 0;
+ res = update_error_state(ctx, &ppi->error);
aom_clear_system_state();
return res;
}
- cpi->common.error.setjmp = 1;
- if (cpi_lap != NULL) {
- if (setjmp(cpi_lap->common.error.jmp)) {
- cpi_lap->common.error.setjmp = 0;
- res = update_error_state(ctx, &cpi_lap->common.error);
- aom_clear_system_state();
- return res;
- }
- cpi_lap->common.error.setjmp = 1;
- }
+ ppi->error.setjmp = 1;
+
+ if (cpi->ppi->use_svc && cpi->svc.use_flexible_mode == 0 && flags == 0)
+ av1_set_svc_fixed_mode(cpi);
// Note(yunqing): While applying encoding flags, always start from enabling
// all, and then modifying according to the flags. Previous frame's flags are
@@ -2251,9 +2533,12 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
av1_apply_encoding_flags(cpi_lap, flags);
}
-#if CONFIG_USE_VMAF_RC
- aom_init_vmaf_model_rc(&cpi->vmaf_info.vmaf_model,
- cpi->oxcf.tune_cfg.vmaf_model_path);
+#if CONFIG_TUNE_VMAF
+ if (ctx->extra_cfg.tuning >= AOM_TUNE_VMAF_WITH_PREPROCESSING &&
+ ctx->extra_cfg.tuning <= AOM_TUNE_VMAF_NEG_MAX_GAIN) {
+ aom_init_vmaf_model(&cpi->vmaf_info.vmaf_model,
+ cpi->oxcf.tune_cfg.vmaf_model_path);
+ }
#endif
// Handle fixed keyframe intervals
@@ -2270,7 +2555,8 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
if (res == AOM_CODEC_OK) {
// Set up internal flags
- if (ctx->base.init_flags & AOM_CODEC_USE_PSNR) cpi->b_calculate_psnr = 1;
+ if (ctx->base.init_flags & AOM_CODEC_USE_PSNR)
+ cpi->ppi->b_calculate_psnr = 1;
if (img != NULL) {
if (!ctx->pts_offset_initialized) {
@@ -2306,11 +2592,18 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
cpi->oxcf.tool_cfg.enable_global_motion);
}
if (!ppi->lookahead)
- aom_internal_error(&cpi->common.error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(&ppi->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate lag buffers");
-
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ int i;
+ for (i = 0; i < ppi->num_fp_contexts; i++) {
+ av1_check_initial_width(ppi->parallel_cpi[i], use_highbitdepth,
+ subsampling_x, subsampling_y);
+ }
+#else
av1_check_initial_width(cpi, use_highbitdepth, subsampling_x,
subsampling_y);
+#endif
if (cpi_lap != NULL) {
av1_check_initial_width(cpi_lap, use_highbitdepth, subsampling_x,
subsampling_y);
@@ -2320,7 +2613,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
// key frame flag when we actually encode this frame.
if (av1_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd,
src_time_stamp, src_end_time_stamp)) {
- res = update_error_state(ctx, &cpi->common.error);
+ res = update_error_state(ctx, &ppi->error);
}
ctx->next_frame_flags = 0;
}
@@ -2337,7 +2630,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
* the buffer size anyway.
*/
if (cx_data_sz < ctx->cx_data_sz / 2) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
+ aom_internal_error(&ppi->error, AOM_CODEC_ERROR,
"Compressed data buffer too small");
}
}
@@ -2358,6 +2651,12 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
}
if ((num_workers > 1) && (cpi->mt_info.num_workers == 0)) {
av1_create_workers(cpi, num_workers);
+#if CONFIG_MULTITHREAD
+ av1_init_mt_sync(cpi, cpi->oxcf.pass == 1);
+ if (cpi_lap != NULL) {
+ av1_init_mt_sync(cpi_lap, 1);
+ }
+#endif // CONFIG_MULTITHREAD
if (cpi->oxcf.pass != 1) {
av1_create_second_pass_workers(cpi, num_workers);
}
@@ -2373,13 +2672,12 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
}
cpi_lap->mt_info.num_workers = cpi->mt_info.num_workers;
const int status = av1_get_compressed_data(
- cpi_lap, &lib_flags, &frame_size, NULL, &dst_time_stamp_la,
- &dst_end_time_stamp_la, !img, timestamp_ratio);
+ cpi_lap, &lib_flags, &frame_size, cx_data_sz, NULL,
+ &dst_time_stamp_la, &dst_end_time_stamp_la, !img, timestamp_ratio);
if (status != -1) {
if (status != AOM_CODEC_OK) {
- aom_internal_error(&cpi_lap->common.error, AOM_CODEC_ERROR, NULL);
+ aom_internal_error(&ppi->error, AOM_CODEC_ERROR, NULL);
}
- cpi_lap->seq_params_locked = 1;
}
lib_flags = 0;
frame_size = 0;
@@ -2390,15 +2688,39 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
int64_t dst_time_stamp;
int64_t dst_end_time_stamp;
while (cx_data_sz >= ctx->cx_data_sz / 2 && !is_frame_visible) {
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ cpi->do_frame_data_update = true;
+ if (ppi->num_fp_contexts > 1 && ppi->gf_group.size > 1) {
+ if (cpi->gf_frame_index < ppi->gf_group.size) {
+ calc_frame_data_update_flag(&ppi->gf_group, cpi->gf_frame_index,
+ &cpi->do_frame_data_update);
+ }
+ }
+#endif
const int status = av1_get_compressed_data(
- cpi, &lib_flags, &frame_size, cx_data, &dst_time_stamp,
+ cpi, &lib_flags, &frame_size, cx_data_sz, cx_data, &dst_time_stamp,
&dst_end_time_stamp, !img, timestamp_ratio);
if (status == -1) break;
if (status != AOM_CODEC_OK) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
+ aom_internal_error(&ppi->error, AOM_CODEC_ERROR, NULL);
}
- cpi->seq_params_locked = 1;
+#if CONFIG_ENTROPY_STATS
+ if (ppi->cpi->oxcf.pass != 1 && !cpi->common.show_existing_frame)
+ av1_accumulate_frame_counts(&ppi->aggregate_fc, &cpi->counts);
+#endif
+#if CONFIG_INTERNAL_STATS
+ if (ppi->cpi->oxcf.pass != 1) {
+ ppi->total_time_compress_data += cpi->time_compress_data;
+ ppi->total_recode_hits += cpi->frame_recode_hits;
+ ppi->total_bytes += cpi->bytes;
+ for (int i = 0; i < MAX_MODES; i++) {
+ ppi->total_mode_chosen_counts[i] += cpi->mode_chosen_counts[i];
+ }
+ }
+#endif // CONFIG_INTERNAL_STATS
+
+ cpi->ppi->seq_params_locked = 1;
if (!frame_size) continue;
assert(cx_data != NULL && cx_data_sz != 0);
const int write_temporal_delimiter =
@@ -2413,12 +2735,13 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
const size_t move_offset = obu_header_size + length_field_size;
memmove(ctx->cx_data + move_offset, ctx->cx_data, frame_size);
obu_header_size = av1_write_obu_header(
- &cpi->level_params, OBU_TEMPORAL_DELIMITER, 0, ctx->cx_data);
+ &cpi->ppi->level_params, &cpi->frame_header_count,
+ OBU_TEMPORAL_DELIMITER, 0, ctx->cx_data);
// OBUs are preceded/succeeded by an unsigned leb128 coded integer.
if (av1_write_uleb_obu_size(obu_header_size, obu_payload_size,
ctx->cx_data) != AOM_CODEC_OK) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
+ aom_internal_error(&ppi->error, AOM_CODEC_ERROR, NULL);
}
frame_size += obu_header_size + obu_payload_size + length_field_size;
@@ -2428,7 +2751,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
size_t curr_frame_size = frame_size;
if (av1_convert_sect5obus_to_annexb(cx_data, &curr_frame_size) !=
AOM_CODEC_OK) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
+ aom_internal_error(&ppi->error, AOM_CODEC_ERROR, NULL);
}
frame_size = curr_frame_size;
@@ -2437,7 +2760,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
memmove(cx_data + length_field_size, cx_data, frame_size);
if (av1_write_uleb_obu_size(0, (uint32_t)frame_size, cx_data) !=
AOM_CODEC_OK) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
+ aom_internal_error(&ppi->error, AOM_CODEC_ERROR, NULL);
}
frame_size += length_field_size;
}
@@ -2458,7 +2781,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
aom_codec_cx_pkt_t pkt;
// decrement frames_left counter
- cpi->frames_left = AOMMAX(0, cpi->frames_left - 1);
+ cpi->ppi->frames_left = AOMMAX(0, cpi->ppi->frames_left - 1);
if (ctx->oxcf.save_as_annexb) {
// B_PRIME (add TU size)
size_t tu_size = ctx->pending_cx_data_sz;
@@ -2466,7 +2789,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
memmove(ctx->cx_data + length_field_size, ctx->cx_data, tu_size);
if (av1_write_uleb_obu_size(0, (uint32_t)tu_size, ctx->cx_data) !=
AOM_CODEC_OK) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR, NULL);
+ aom_internal_error(&ppi->error, AOM_CODEC_ERROR, NULL);
}
ctx->pending_cx_data_sz += length_field_size;
}
@@ -2496,7 +2819,7 @@ static aom_codec_err_t encoder_encode(aom_codec_alg_priv_t *ctx,
}
}
- cpi->common.error.setjmp = 0;
+ ppi->error.setjmp = 0;
return res;
}
@@ -2674,7 +2997,7 @@ static aom_codec_err_t ctrl_set_number_spatial_layers(aom_codec_alg_priv_t *ctx,
const int number_spatial_layers = va_arg(args, int);
if (number_spatial_layers > MAX_NUM_SPATIAL_LAYERS)
return AOM_CODEC_INVALID_PARAM;
- ctx->ppi->cpi->common.number_spatial_layers = number_spatial_layers;
+ ctx->ppi->number_spatial_layers = number_spatial_layers;
return AOM_CODEC_OK;
}
@@ -2690,19 +3013,20 @@ static aom_codec_err_t ctrl_set_layer_id(aom_codec_alg_priv_t *ctx,
static aom_codec_err_t ctrl_set_svc_params(aom_codec_alg_priv_t *ctx,
va_list args) {
- AV1_COMP *const cpi = ctx->ppi->cpi;
+ AV1_PRIMARY *const ppi = ctx->ppi;
+ AV1_COMP *const cpi = ppi->cpi;
AV1_COMMON *const cm = &cpi->common;
aom_svc_params_t *const params = va_arg(args, aom_svc_params_t *);
- cm->number_spatial_layers = params->number_spatial_layers;
- cm->number_temporal_layers = params->number_temporal_layers;
+ ppi->number_spatial_layers = params->number_spatial_layers;
+ ppi->number_temporal_layers = params->number_temporal_layers;
cpi->svc.number_spatial_layers = params->number_spatial_layers;
cpi->svc.number_temporal_layers = params->number_temporal_layers;
- if (cm->number_spatial_layers > 1 || cm->number_temporal_layers > 1) {
+ if (ppi->number_spatial_layers > 1 || ppi->number_temporal_layers > 1) {
unsigned int sl, tl;
- cpi->use_svc = 1;
- for (sl = 0; sl < cm->number_spatial_layers; ++sl) {
- for (tl = 0; tl < cm->number_temporal_layers; ++tl) {
- const int layer = LAYER_IDS_TO_IDX(sl, tl, cm->number_temporal_layers);
+ ctx->ppi->use_svc = 1;
+ for (sl = 0; sl < ppi->number_spatial_layers; ++sl) {
+ for (tl = 0; tl < ppi->number_temporal_layers; ++tl) {
+ const int layer = LAYER_IDS_TO_IDX(sl, tl, ppi->number_temporal_layers);
LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
lc->max_q = params->max_quantizers[layer];
lc->min_q = params->min_quantizers[layer];
@@ -2713,11 +3037,11 @@ static aom_codec_err_t ctrl_set_svc_params(aom_codec_alg_priv_t *ctx,
}
}
if (cm->current_frame.frame_number == 0) {
- if (!cpi->seq_params_locked) {
- SequenceHeader *const seq_params = &cm->seq_params;
+ if (!cpi->ppi->seq_params_locked) {
+ SequenceHeader *const seq_params = &ppi->seq_params;
seq_params->operating_points_cnt_minus_1 =
- cm->number_spatial_layers * cm->number_temporal_layers - 1;
- av1_init_seq_coding_tools(&cm->seq_params, cm, &cpi->oxcf, 1);
+ ppi->number_spatial_layers * ppi->number_temporal_layers - 1;
+ av1_init_seq_coding_tools(ppi, &cpi->oxcf, 1);
}
av1_init_layer_context(cpi);
}
@@ -2732,13 +3056,15 @@ static aom_codec_err_t ctrl_set_svc_ref_frame_config(aom_codec_alg_priv_t *ctx,
AV1_COMP *const cpi = ctx->ppi->cpi;
aom_svc_ref_frame_config_t *const data =
va_arg(args, aom_svc_ref_frame_config_t *);
- cpi->svc.external_ref_frame_config = 1;
+ cpi->svc.set_ref_frame_config = 1;
for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; ++i) {
cpi->svc.reference[i] = data->reference[i];
cpi->svc.ref_idx[i] = data->ref_idx[i];
}
for (unsigned int i = 0; i < REF_FRAMES; ++i)
cpi->svc.refresh[i] = data->refresh[i];
+ cpi->svc.use_flexible_mode = 1;
+ cpi->svc.ksvc_fixed_mode = 0;
return AOM_CODEC_OK;
}
@@ -2831,18 +3157,17 @@ static aom_codec_err_t encoder_set_option(aom_codec_alg_priv_t *ctx,
// Used to mock the argv with just one string "--{name}={value}"
char *argv[2] = { NULL, "" };
size_t len = strlen(name) + strlen(value) + 4;
- char *err_string = ctx->ppi->cpi->common.error.detail;
+ char *err_string = ctx->ppi->error.detail;
#if __STDC_VERSION__ >= 201112L
// We use the keyword _Static_assert because clang-cl does not allow the
// convenience macro static_assert to be used in function scope. See
// https://bugs.llvm.org/show_bug.cgi?id=48904.
- _Static_assert(
- sizeof(ctx->ppi->cpi->common.error.detail) >= ARG_ERR_MSG_MAX_LEN,
- "The size of the err_msg buffer for arg_match_helper must be "
- "at least ARG_ERR_MSG_MAX_LEN");
+ _Static_assert(sizeof(ctx->ppi->error.detail) >= ARG_ERR_MSG_MAX_LEN,
+ "The size of the err_msg buffer for arg_match_helper must be "
+ "at least ARG_ERR_MSG_MAX_LEN");
#else
- assert(sizeof(ctx->ppi->cpi->common.error.detail) >= ARG_ERR_MSG_MAX_LEN);
+ assert(sizeof(ctx->ppi->error.detail) >= ARG_ERR_MSG_MAX_LEN);
#endif
argv[0] = aom_malloc(len * sizeof(argv[1][0]));
@@ -2909,8 +3234,11 @@ static aom_codec_err_t encoder_set_option(aom_codec_alg_priv_t *ctx,
extra_cfg.vmaf_model_path = value;
}
#endif
- else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.cq_level, argv,
- err_string)) {
+ else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.partition_info_path,
+ argv, err_string)) {
+ extra_cfg.partition_info_path = value;
+ } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.cq_level, argv,
+ err_string)) {
extra_cfg.cq_level = arg_parse_uint_helper(&arg, err_string);
} else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.max_intra_rate_pct,
argv, err_string)) {
@@ -3161,6 +3489,9 @@ static aom_codec_err_t encoder_set_option(aom_codec_alg_priv_t *ctx,
} else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.mv_cost_upd_freq,
argv, err_string)) {
extra_cfg.mv_cost_upd_freq = arg_parse_uint_helper(&arg, err_string);
+ } else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.dv_cost_upd_freq,
+ argv, err_string)) {
+ extra_cfg.dv_cost_upd_freq = arg_parse_uint_helper(&arg, err_string);
}
#if CONFIG_DENOISE
else if (arg_match_helper(&arg, &g_av1_codec_arg_defs.denoise_noise_level,
@@ -3215,9 +3546,8 @@ static aom_codec_err_t encoder_set_option(aom_codec_alg_priv_t *ctx,
static aom_codec_err_t ctrl_get_seq_level_idx(aom_codec_alg_priv_t *ctx,
va_list args) {
int *const arg = va_arg(args, int *);
- const AV1_COMP *const cpi = ctx->ppi->cpi;
if (arg == NULL) return AOM_CODEC_INVALID_PARAM;
- return av1_get_seq_level_idx(&cpi->common.seq_params, &cpi->level_params,
+ return av1_get_seq_level_idx(&ctx->ppi->seq_params, &ctx->ppi->level_params,
arg);
}
@@ -3332,6 +3662,7 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ AV1E_SET_SUPERBLOCK_SIZE, ctrl_set_superblock_size },
{ AV1E_SET_SINGLE_TILE_DECODING, ctrl_set_single_tile_decoding },
{ AV1E_SET_VMAF_MODEL_PATH, ctrl_set_vmaf_model_path },
+ { AV1E_SET_PARTITION_INFO_PATH, ctrl_set_partition_info_path },
{ AV1E_SET_FILM_GRAIN_TEST_VECTOR, ctrl_set_film_grain_test_vector },
{ AV1E_SET_FILM_GRAIN_TABLE, ctrl_set_film_grain_table },
{ AV1E_SET_DENOISE_NOISE_LEVEL, ctrl_set_denoise_noise_level },
@@ -3347,6 +3678,8 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
{ AV1E_SET_SVC_REF_FRAME_CONFIG, ctrl_set_svc_ref_frame_config },
{ AV1E_SET_VBR_CORPUS_COMPLEXITY_LAP, ctrl_set_vbr_corpus_complexity_lap },
{ AV1E_ENABLE_SB_MULTIPASS_UNIT_TEST, ctrl_enable_sb_multipass_unit_test },
+ { AV1E_SET_DV_COST_UPD_FREQ, ctrl_set_dv_cost_upd_freq },
+ { AV1E_SET_EXTERNAL_PARTITION, ctrl_set_external_partition },
// Getters
{ AOME_GET_LAST_QUANTIZER, ctrl_get_quantizer },
@@ -3364,6 +3697,7 @@ static aom_codec_ctrl_fn_map_t encoder_ctrl_maps[] = {
};
static const aom_codec_enc_cfg_t encoder_usage_cfg[] = {
+#if !CONFIG_REALTIME_ONLY
{
// NOLINT
AOM_USAGE_GOOD_QUALITY, // g_usage - non-realtime usage
@@ -3415,25 +3749,26 @@ static const aom_codec_enc_cfg_t encoder_usage_cfg[] = {
2000, // rc_two_pass_vbrmax_section
// keyframing settings (kf)
- 0, // fwd_kf_enabled
- AOM_KF_AUTO, // kf_mode
- 0, // kf_min_dist
- 9999, // kf_max_dist
- 0, // sframe_dist
- 1, // sframe_mode
- 0, // large_scale_tile
- 0, // monochrome
- 0, // full_still_picture_hdr
- 0, // save_as_annexb
- 0, // tile_width_count
- 0, // tile_height_count
- { 0 }, // tile_widths
- { 0 }, // tile_heights
- 0, // use_fixed_qp_offsets
- { -1, -1, -1, -1, -1 }, // fixed_qp_offsets
+ 0, // fwd_kf_enabled
+ AOM_KF_AUTO, // kf_mode
+ 0, // kf_min_dist
+ 9999, // kf_max_dist
+ 0, // sframe_dist
+ 1, // sframe_mode
+ 0, // large_scale_tile
+ 0, // monochrome
+ 0, // full_still_picture_hdr
+ 0, // save_as_annexb
+ 0, // tile_width_count
+ 0, // tile_height_count
+ { 0 }, // tile_widths
+ { 0 }, // tile_heights
+ 0, // use_fixed_qp_offsets
+ { -1, -1, -1, -1, -1, -1 }, // fixed_qp_offsets
{ 0, 128, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // cfg
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // cfg
},
+#endif // !CONFIG_REALTIME_ONLY
{
// NOLINT
AOM_USAGE_REALTIME, // g_usage - real-time usage
@@ -3485,25 +3820,26 @@ static const aom_codec_enc_cfg_t encoder_usage_cfg[] = {
2000, // rc_two_pass_vbrmax_section
// keyframing settings (kf)
- 0, // fwd_kf_enabled
- AOM_KF_AUTO, // kf_mode
- 0, // kf_min_dist
- 9999, // kf_max_dist
- 0, // sframe_dist
- 1, // sframe_mode
- 0, // large_scale_tile
- 0, // monochrome
- 0, // full_still_picture_hdr
- 0, // save_as_annexb
- 0, // tile_width_count
- 0, // tile_height_count
- { 0 }, // tile_widths
- { 0 }, // tile_heights
- 0, // use_fixed_qp_offsets
- { -1, -1, -1, -1, -1 }, // fixed_qp_offsets
+ 0, // fwd_kf_enabled
+ AOM_KF_AUTO, // kf_mode
+ 0, // kf_min_dist
+ 9999, // kf_max_dist
+ 0, // sframe_dist
+ 1, // sframe_mode
+ 0, // large_scale_tile
+ 0, // monochrome
+ 0, // full_still_picture_hdr
+ 0, // save_as_annexb
+ 0, // tile_width_count
+ 0, // tile_height_count
+ { 0 }, // tile_widths
+ { 0 }, // tile_heights
+ 0, // use_fixed_qp_offsets
+ { -1, -1, -1, -1, -1, -1 }, // fixed_qp_offsets
{ 0, 128, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // cfg
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // cfg
},
+#if !CONFIG_REALTIME_ONLY
{
// NOLINT
AOM_USAGE_ALL_INTRA, // g_usage - all intra usage
@@ -3572,8 +3908,9 @@ static const aom_codec_enc_cfg_t encoder_usage_cfg[] = {
0, // use_fixed_qp_offsets
{ -1, -1, -1, -1, -1 }, // fixed_qp_offsets
{ 0, 128, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // cfg
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // cfg
},
+#endif // !CONFIG_REALTIME_ONLY
};
// This data structure and function are exported in aom/aomcx.h
@@ -3598,13 +3935,13 @@ aom_codec_iface_t aom_codec_av1_cx_algo = {
},
{
// NOLINT
- 3, // 3 cfg
- encoder_usage_cfg, // aom_codec_enc_cfg_t
- encoder_encode, // aom_codec_encode_fn_t
- encoder_get_cxdata, // aom_codec_get_cx_data_fn_t
- encoder_set_config, // aom_codec_enc_config_set_fn_t
- encoder_get_global_headers, // aom_codec_get_global_headers_fn_t
- encoder_get_preview // aom_codec_get_preview_frame_fn_t
+ NELEMENTS(encoder_usage_cfg), // cfg_count
+ encoder_usage_cfg, // aom_codec_enc_cfg_t
+ encoder_encode, // aom_codec_encode_fn_t
+ encoder_get_cxdata, // aom_codec_get_cx_data_fn_t
+ encoder_set_config, // aom_codec_enc_config_set_fn_t
+ encoder_get_global_headers, // aom_codec_get_global_headers_fn_t
+ encoder_get_preview // aom_codec_get_preview_frame_fn_t
},
encoder_set_option // aom_codec_set_option_fn_t
};
diff --git a/third_party/libaom/source/libaom/av1/av1_dx_iface.c b/third_party/libaom/source/libaom/av1/av1_dx_iface.c
index 1ee8a576d3..02968abd16 100644
--- a/third_party/libaom/source/libaom/av1/av1_dx_iface.c
+++ b/third_party/libaom/source/libaom/av1/av1_dx_iface.c
@@ -115,14 +115,18 @@ static aom_codec_err_t decoder_destroy(aom_codec_alg_priv_t *ctx) {
if (ctx->frame_worker != NULL) {
AVxWorker *const worker = ctx->frame_worker;
FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
+ AV1Decoder *const pbi = frame_worker_data->pbi;
aom_get_worker_interface()->end(worker);
- aom_free(frame_worker_data->pbi->common.tpl_mvs);
- frame_worker_data->pbi->common.tpl_mvs = NULL;
+ aom_free(pbi->common.tpl_mvs);
+ pbi->common.tpl_mvs = NULL;
av1_remove_common(&frame_worker_data->pbi->common);
+ av1_free_cdef_buffers(&pbi->common, &pbi->cdef_worker, &pbi->cdef_sync,
+ pbi->num_workers);
+ av1_free_cdef_sync(&pbi->cdef_sync);
#if !CONFIG_REALTIME_ONLY
- av1_free_restoration_buffers(&frame_worker_data->pbi->common);
+ av1_free_restoration_buffers(&pbi->common);
#endif
- av1_decoder_remove(frame_worker_data->pbi);
+ av1_decoder_remove(pbi);
aom_free(frame_worker_data);
#if CONFIG_MULTITHREAD
pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex);
@@ -392,7 +396,7 @@ static void init_buffer_callbacks(aom_codec_alg_priv_t *ctx) {
pool->release_fb_cb = av1_release_frame_buffer;
if (av1_alloc_internal_frame_buffers(&pool->int_frame_buffers))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(&pbi->error, AOM_CODEC_MEM_ERROR,
"Failed to initialize internal frame buffers");
pool->cb_priv = &pool->int_frame_buffers;
@@ -527,7 +531,7 @@ static aom_codec_err_t decode_one(aom_codec_alg_priv_t *ctx,
*data = frame_worker_data->data_end;
if (worker->had_error)
- return update_error_state(ctx, &frame_worker_data->pbi->common.error);
+ return update_error_state(ctx, &frame_worker_data->pbi->error);
check_resync(ctx, frame_worker_data->pbi);
@@ -558,7 +562,7 @@ static aom_codec_err_t decoder_inspect(aom_codec_alg_priv_t *ctx,
check_resync(ctx, frame_worker_data->pbi);
if (ctx->frame_worker->had_error)
- return update_error_state(ctx, &frame_worker_data->pbi->common.error);
+ return update_error_state(ctx, &frame_worker_data->pbi->error);
// Allow extra zero bytes after the frame end
while (data < data_end) {
@@ -823,7 +827,7 @@ static aom_image_t *decoder_get_frame(aom_codec_alg_priv_t *ctx,
aom_image_t *res =
add_grain_if_needed(ctx, img, &ctx->image_with_grain, grain_params);
if (!res) {
- aom_internal_error(&pbi->common.error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Grain systhesis failed\n");
}
*index += 1; // Advance the iterator to point to the next image
@@ -1091,10 +1095,9 @@ static aom_codec_err_t ctrl_get_still_picture(aom_codec_alg_priv_t *ctx,
FrameWorkerData *const frame_worker_data =
(FrameWorkerData *)worker->data1;
const AV1Decoder *pbi = frame_worker_data->pbi;
- still_picture_info->is_still_picture =
- (int)pbi->common.seq_params.still_picture;
+ still_picture_info->is_still_picture = (int)pbi->seq_params.still_picture;
still_picture_info->is_reduced_still_picture_hdr =
- (int)(pbi->common.seq_params.reduced_still_picture_hdr);
+ (int)(pbi->seq_params.reduced_still_picture_hdr);
return AOM_CODEC_OK;
} else {
return AOM_CODEC_ERROR;
@@ -1112,7 +1115,7 @@ static aom_codec_err_t ctrl_get_sb_size(aom_codec_alg_priv_t *ctx,
FrameWorkerData *const frame_worker_data =
(FrameWorkerData *)worker->data1;
const AV1Decoder *pbi = frame_worker_data->pbi;
- if (pbi->common.seq_params.sb_size == BLOCK_128X128) {
+ if (pbi->seq_params.sb_size == BLOCK_128X128) {
*sb_size = AOM_SUPERBLOCK_SIZE_128X128;
} else {
*sb_size = AOM_SUPERBLOCK_SIZE_64X64;
@@ -1291,7 +1294,7 @@ static aom_codec_err_t ctrl_get_bit_depth(aom_codec_alg_priv_t *ctx,
FrameWorkerData *const frame_worker_data =
(FrameWorkerData *)worker->data1;
const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
- *bit_depth = cm->seq_params.bit_depth;
+ *bit_depth = cm->seq_params->bit_depth;
return AOM_CODEC_OK;
} else {
return AOM_CODEC_ERROR;
@@ -1327,9 +1330,9 @@ static aom_codec_err_t ctrl_get_img_format(aom_codec_alg_priv_t *ctx,
(FrameWorkerData *)worker->data1;
const AV1_COMMON *const cm = &frame_worker_data->pbi->common;
- *img_fmt = get_img_format(cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y,
- cm->seq_params.use_highbitdepth);
+ *img_fmt = get_img_format(cm->seq_params->subsampling_x,
+ cm->seq_params->subsampling_y,
+ cm->seq_params->use_highbitdepth);
return AOM_CODEC_OK;
} else {
return AOM_CODEC_ERROR;
diff --git a/third_party/libaom/source/libaom/av1/common/alloccommon.c b/third_party/libaom/source/libaom/av1/common/alloccommon.c
index cd997cd875..8624255218 100644
--- a/third_party/libaom/source/libaom/av1/common/alloccommon.c
+++ b/third_party/libaom/source/libaom/av1/common/alloccommon.c
@@ -17,8 +17,10 @@
#include "av1/common/alloccommon.h"
#include "av1/common/av1_common_int.h"
#include "av1/common/blockd.h"
+#include "av1/common/cdef_block.h"
#include "av1/common/entropymode.h"
#include "av1/common/entropymv.h"
+#include "av1/common/thread_common.h"
int av1_get_MBs(int width, int height) {
const int aligned_width = ALIGN_POWER_OF_TWO(width, 3);
@@ -51,6 +53,227 @@ void av1_free_ref_frame_buffers(BufferPool *pool) {
}
}
+static INLINE void free_cdef_linebuf_conditional(
+ AV1_COMMON *const cm, const size_t *new_linebuf_size) {
+ CdefInfo *cdef_info = &cm->cdef_info;
+ for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
+ if (new_linebuf_size[plane] != cdef_info->allocated_linebuf_size[plane]) {
+ aom_free(cdef_info->linebuf[plane]);
+ cdef_info->linebuf[plane] = NULL;
+ }
+ }
+}
+
+static INLINE void free_cdef_bufs_conditional(AV1_COMMON *const cm,
+ uint16_t **colbuf,
+ uint16_t **srcbuf,
+ const size_t *new_colbuf_size,
+ const size_t new_srcbuf_size) {
+ CdefInfo *cdef_info = &cm->cdef_info;
+ if (new_srcbuf_size != cdef_info->allocated_srcbuf_size) {
+ aom_free(*srcbuf);
+ *srcbuf = NULL;
+ }
+ for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
+ if (new_colbuf_size[plane] != cdef_info->allocated_colbuf_size[plane]) {
+ aom_free(colbuf[plane]);
+ colbuf[plane] = NULL;
+ }
+ }
+}
+
+static INLINE void free_cdef_bufs(uint16_t **colbuf, uint16_t **srcbuf) {
+ aom_free(*srcbuf);
+ *srcbuf = NULL;
+ for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
+ aom_free(colbuf[plane]);
+ colbuf[plane] = NULL;
+ }
+}
+
+static INLINE void free_cdef_row_sync(AV1CdefRowSync **cdef_row_mt,
+ const int num_mi_rows) {
+ if (*cdef_row_mt == NULL) return;
+#if CONFIG_MULTITHREAD
+ for (int row_idx = 0; row_idx < num_mi_rows; row_idx++) {
+ pthread_mutex_destroy((*cdef_row_mt)[row_idx].row_mutex_);
+ pthread_cond_destroy((*cdef_row_mt)[row_idx].row_cond_);
+ aom_free((*cdef_row_mt)[row_idx].row_mutex_);
+ aom_free((*cdef_row_mt)[row_idx].row_cond_);
+ }
+#else
+ (void)num_mi_rows;
+#endif // CONFIG_MULTITHREAD
+ aom_free(*cdef_row_mt);
+ *cdef_row_mt = NULL;
+}
+
+void av1_free_cdef_buffers(AV1_COMMON *const cm,
+ AV1CdefWorkerData **cdef_worker,
+ AV1CdefSync *cdef_sync, int num_workers) {
+ CdefInfo *cdef_info = &cm->cdef_info;
+ const int num_mi_rows = cdef_info->allocated_mi_rows;
+
+ for (int plane = 0; plane < MAX_MB_PLANE; plane++) {
+ aom_free(cdef_info->linebuf[plane]);
+ cdef_info->linebuf[plane] = NULL;
+ }
+ // De-allocation of column buffer & source buffer (worker_0).
+ free_cdef_bufs(cdef_info->colbuf, &cdef_info->srcbuf);
+
+ if (num_workers < 2) return;
+ if (*cdef_worker != NULL) {
+ for (int idx = num_workers - 1; idx >= 1; idx--) {
+ // De-allocation of column buffer & source buffer for remaining workers.
+ free_cdef_bufs((*cdef_worker)[idx].colbuf, &(*cdef_worker)[idx].srcbuf);
+ }
+ aom_free(*cdef_worker);
+ *cdef_worker = NULL;
+ }
+ free_cdef_row_sync(&cdef_sync->cdef_row_mt, num_mi_rows);
+}
+
+static INLINE void alloc_cdef_linebuf(AV1_COMMON *const cm, uint16_t **linebuf,
+ const int num_planes) {
+ CdefInfo *cdef_info = &cm->cdef_info;
+ for (int plane = 0; plane < num_planes; plane++) {
+ if (linebuf[plane] == NULL)
+ CHECK_MEM_ERROR(cm, linebuf[plane],
+ aom_malloc(cdef_info->allocated_linebuf_size[plane]));
+ }
+}
+
+static INLINE void alloc_cdef_bufs(AV1_COMMON *const cm, uint16_t **colbuf,
+ uint16_t **srcbuf, const int num_planes) {
+ CdefInfo *cdef_info = &cm->cdef_info;
+ if (*srcbuf == NULL)
+ CHECK_MEM_ERROR(cm, *srcbuf,
+ aom_memalign(16, cdef_info->allocated_srcbuf_size));
+
+ for (int plane = 0; plane < num_planes; plane++) {
+ if (colbuf[plane] == NULL)
+ CHECK_MEM_ERROR(cm, colbuf[plane],
+ aom_malloc(cdef_info->allocated_colbuf_size[plane]));
+ }
+}
+
+static INLINE void alloc_cdef_row_sync(AV1_COMMON *const cm,
+ AV1CdefRowSync **cdef_row_mt,
+ const int num_mi_rows) {
+ if (*cdef_row_mt != NULL) return;
+
+ CHECK_MEM_ERROR(cm, *cdef_row_mt,
+ aom_malloc(sizeof(**cdef_row_mt) * num_mi_rows));
+#if CONFIG_MULTITHREAD
+ for (int row_idx = 0; row_idx < num_mi_rows; row_idx++) {
+ CHECK_MEM_ERROR(cm, (*cdef_row_mt)[row_idx].row_mutex_,
+ aom_malloc(sizeof(*(*cdef_row_mt)[row_idx].row_mutex_)));
+ pthread_mutex_init((*cdef_row_mt)[row_idx].row_mutex_, NULL);
+
+ CHECK_MEM_ERROR(cm, (*cdef_row_mt)[row_idx].row_cond_,
+ aom_malloc(sizeof(*(*cdef_row_mt)[row_idx].row_cond_)));
+ pthread_cond_init((*cdef_row_mt)[row_idx].row_cond_, NULL);
+
+ (*cdef_row_mt)[row_idx].is_row_done = 0;
+ }
+#endif // CONFIG_MULTITHREAD
+}
+
+void av1_alloc_cdef_buffers(AV1_COMMON *const cm,
+ AV1CdefWorkerData **cdef_worker,
+ AV1CdefSync *cdef_sync, int num_workers) {
+ const int num_planes = av1_num_planes(cm);
+ size_t new_linebuf_size[MAX_MB_PLANE] = { 0 };
+ size_t new_colbuf_size[MAX_MB_PLANE] = { 0 };
+ size_t new_srcbuf_size = 0;
+ CdefInfo *const cdef_info = &cm->cdef_info;
+ // Check for configuration change
+ const int num_mi_rows =
+ (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ const int is_num_workers_changed =
+ cdef_info->allocated_num_workers != num_workers;
+ const int is_cdef_enabled =
+ cm->seq_params->enable_cdef && !cm->tiles.large_scale;
+
+ // num-bufs=3 represents ping-pong buffers for top linebuf,
+ // followed by bottom linebuf.
+ // ping-pong is to avoid top linebuf over-write by consecutive row.
+ int num_bufs = 3;
+ if (num_workers > 1)
+ num_bufs = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+
+ if (is_cdef_enabled) {
+ // Calculate src buffer size
+ new_srcbuf_size = sizeof(*cdef_info->srcbuf) * CDEF_INBUF_SIZE;
+ for (int plane = 0; plane < num_planes; plane++) {
+ const int shift =
+ plane == AOM_PLANE_Y ? 0 : cm->seq_params->subsampling_x;
+ // Calculate top and bottom line buffer size
+ const int luma_stride =
+ ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols << MI_SIZE_LOG2, 4);
+ new_linebuf_size[plane] = sizeof(*cdef_info->linebuf) * num_bufs *
+ (CDEF_VBORDER << 1) * (luma_stride >> shift);
+ // Calculate column buffer size
+ const int block_height =
+ (CDEF_BLOCKSIZE << (MI_SIZE_LOG2 - shift)) * 2 * CDEF_VBORDER;
+ new_colbuf_size[plane] =
+ sizeof(*cdef_info->colbuf[plane]) * block_height * CDEF_HBORDER;
+ }
+ }
+
+ // Free src, line and column buffers for worker 0 in case of reallocation
+ free_cdef_linebuf_conditional(cm, new_linebuf_size);
+ free_cdef_bufs_conditional(cm, cdef_info->colbuf, &cdef_info->srcbuf,
+ new_colbuf_size, new_srcbuf_size);
+
+ if (*cdef_worker != NULL) {
+ if (is_num_workers_changed) {
+ // Free src and column buffers for remaining workers in case of change in
+ // num_workers
+ for (int idx = cdef_info->allocated_num_workers - 1; idx >= 1; idx--)
+ free_cdef_bufs((*cdef_worker)[idx].colbuf, &(*cdef_worker)[idx].srcbuf);
+ } else if (num_workers > 1) {
+ // Free src and column buffers for remaining workers in case of
+ // reallocation
+ for (int idx = num_workers - 1; idx >= 1; idx--)
+ free_cdef_bufs_conditional(cm, (*cdef_worker)[idx].colbuf,
+ &(*cdef_worker)[idx].srcbuf, new_colbuf_size,
+ new_srcbuf_size);
+ }
+ }
+
+ if (cdef_info->allocated_mi_rows != num_mi_rows)
+ free_cdef_row_sync(&cdef_sync->cdef_row_mt, cdef_info->allocated_mi_rows);
+
+ // Store allocated sizes for reallocation
+ cdef_info->allocated_srcbuf_size = new_srcbuf_size;
+ av1_copy(cdef_info->allocated_colbuf_size, new_colbuf_size);
+ av1_copy(cdef_info->allocated_linebuf_size, new_linebuf_size);
+ // Store configuration to check change in configuration
+ cdef_info->allocated_mi_rows = num_mi_rows;
+ cdef_info->allocated_num_workers = num_workers;
+
+ if (!is_cdef_enabled) return;
+
+ // Memory allocation of column buffer & source buffer (worker_0).
+ alloc_cdef_bufs(cm, cdef_info->colbuf, &cdef_info->srcbuf, num_planes);
+ alloc_cdef_linebuf(cm, cdef_info->linebuf, num_planes);
+
+ if (num_workers < 2) return;
+
+ if (*cdef_worker == NULL)
+ CHECK_MEM_ERROR(cm, *cdef_worker,
+ aom_calloc(num_workers, sizeof(**cdef_worker)));
+
+ // Memory allocation of column buffer & source buffer for remaining workers.
+ for (int idx = num_workers - 1; idx >= 1; idx--)
+ alloc_cdef_bufs(cm, (*cdef_worker)[idx].colbuf, &(*cdef_worker)[idx].srcbuf,
+ num_planes);
+
+ alloc_cdef_row_sync(cm, &cdef_sync->cdef_row_mt,
+ cdef_info->allocated_mi_rows);
+}
+
#if !CONFIG_REALTIME_ONLY
// Assumes cm->rst_info[p].restoration_unit_size is already initialized
void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
@@ -86,11 +309,11 @@ void av1_alloc_restoration_buffers(AV1_COMMON *cm) {
// Now we need to allocate enough space to store the line buffers for the
// stripes
const int frame_w = cm->superres_upscaled_width;
- const int use_highbd = cm->seq_params.use_highbitdepth;
+ const int use_highbd = cm->seq_params->use_highbitdepth;
for (int p = 0; p < num_planes; ++p) {
const int is_uv = p > 0;
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
+ const int ss_x = is_uv && cm->seq_params->subsampling_x;
const int plane_w = ((frame_w + ss_x) >> ss_x) + 2 * RESTORATION_EXTRA_HORZ;
const int stride = ALIGN_POWER_OF_TWO(plane_w, 5);
const int buf_size = num_stripes * stride * RESTORATION_CTX_VERT
diff --git a/third_party/libaom/source/libaom/av1/common/alloccommon.h b/third_party/libaom/source/libaom/av1/common/alloccommon.h
index e75c226831..0b43889d20 100644
--- a/third_party/libaom/source/libaom/av1/common/alloccommon.h
+++ b/third_party/libaom/source/libaom/av1/common/alloccommon.h
@@ -24,6 +24,8 @@ struct AV1Common;
struct BufferPool;
struct CommonContexts;
struct CommonModeInfoParams;
+struct AV1CdefWorker;
+struct AV1CdefSyncData;
void av1_remove_common(struct AV1Common *cm);
@@ -36,6 +38,12 @@ void av1_init_mi_buffers(struct CommonModeInfoParams *mi_params);
void av1_free_context_buffers(struct AV1Common *cm);
void av1_free_ref_frame_buffers(struct BufferPool *pool);
+void av1_alloc_cdef_buffers(struct AV1Common *const cm,
+ struct AV1CdefWorker **cdef_worker,
+ struct AV1CdefSyncData *cdef_sync, int num_workers);
+void av1_free_cdef_buffers(struct AV1Common *const cm,
+ struct AV1CdefWorker **cdef_worker,
+ struct AV1CdefSyncData *cdef_sync, int num_workers);
#if !CONFIG_REALTIME_ONLY
void av1_alloc_restoration_buffers(struct AV1Common *cm);
void av1_free_restoration_buffers(struct AV1Common *cm);
diff --git a/third_party/libaom/source/libaom/av1/common/av1_common_int.h b/third_party/libaom/source/libaom/av1/common/av1_common_int.h
index 0a68cb5fd5..981a186579 100644
--- a/third_party/libaom/source/libaom/av1/common/av1_common_int.h
+++ b/third_party/libaom/source/libaom/av1/common/av1_common_int.h
@@ -135,7 +135,10 @@ typedef struct RefCntBuffer {
// distance when a very old frame is used as a reference.
unsigned int display_order_hint;
unsigned int ref_display_order_hint[INTER_REFS_PER_FRAME];
-
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Frame's level within the hierarchical structure.
+ unsigned int pyramid_level;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
MV_REF *mvs;
uint8_t *seg_map;
struct segmentation seg;
@@ -192,12 +195,32 @@ typedef struct BufferPool {
/*!\brief Parameters related to CDEF */
typedef struct {
- int cdef_damping; /*!< CDEF damping factor */
- int nb_cdef_strengths; /*!< Number of CDEF strength values */
- int cdef_strengths[CDEF_MAX_STRENGTHS]; /*!< CDEF strength values for luma */
- int cdef_uv_strengths[CDEF_MAX_STRENGTHS]; /*!< CDEF strength values for
- chroma */
- int cdef_bits; /*!< Number of CDEF strength values in bits */
+ //! CDEF column line buffer
+ uint16_t *colbuf[MAX_MB_PLANE];
+ //! CDEF top & bottom line buffer
+ uint16_t *linebuf[MAX_MB_PLANE];
+ //! CDEF intermediate buffer
+ uint16_t *srcbuf;
+ //! CDEF column line buffer sizes
+ size_t allocated_colbuf_size[MAX_MB_PLANE];
+ //! CDEF top and bottom line buffer sizes
+ size_t allocated_linebuf_size[MAX_MB_PLANE];
+ //! CDEF intermediate buffer size
+ size_t allocated_srcbuf_size;
+ //! CDEF damping factor
+ int cdef_damping;
+ //! Number of CDEF strength values
+ int nb_cdef_strengths;
+ //! CDEF strength values for luma
+ int cdef_strengths[CDEF_MAX_STRENGTHS];
+ //! CDEF strength values for chroma
+ int cdef_uv_strengths[CDEF_MAX_STRENGTHS];
+ //! Number of CDEF strength values in bits
+ int cdef_bits;
+ //! Number of rows in the frame in 4 pixel
+ int allocated_mi_rows;
+ //! Number of CDEF workers
+ int allocated_num_workers;
} CdefInfo;
/*!\cond */
@@ -320,6 +343,10 @@ typedef struct {
unsigned int order_hint;
unsigned int display_order_hint;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Frame's level within the hierarchical structure.
+ unsigned int pyramid_level;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
unsigned int frame_number;
SkipModeInfo skip_mode_info;
int refresh_frame_flags; // Which ref frames are overwritten by this frame
@@ -602,12 +629,12 @@ struct CommonQuantParams {
/*!
* Delta of qindex (from base_qindex) for V plane DC coefficients.
- * Same as those for U plane if cm->seq_params.separate_uv_delta_q == 0.
+ * Same as those for U plane if cm->seq_params->separate_uv_delta_q == 0.
*/
int u_ac_delta_q;
/*!
* Delta of qindex (from base_qindex) for V plane AC coefficients.
- * Same as those for U plane if cm->seq_params.separate_uv_delta_q == 0.
+ * Same as those for U plane if cm->seq_params->separate_uv_delta_q == 0.
*/
int v_ac_delta_q;
@@ -728,7 +755,7 @@ typedef struct AV1Common {
/*!
* Code and details about current error status.
*/
- struct aom_internal_error_info error;
+ struct aom_internal_error_info *error;
/*!
* AV1 allows two types of frame scaling operations:
@@ -780,10 +807,6 @@ typedef struct AV1Common {
uint8_t superres_scale_denominator;
/*!
- * If true, buffer removal times are present.
- */
- bool buffer_removal_time_present;
- /*!
* buffer_removal_times[op_num] specifies the frame removal time in units of
* DecCT clock ticks counted from the removal time of the last random access
* point for operating point op_num.
@@ -950,7 +973,7 @@ typedef struct AV1Common {
* Elements part of the sequence header, that are applicable for all the
* frames in the video.
*/
- SequenceHeader seq_params;
+ SequenceHeader *seq_params;
/*!
* Current CDFs of all the symbols for the current frame.
@@ -982,7 +1005,7 @@ typedef struct AV1Common {
CommonContexts above_contexts;
/**
- * \name Signaled when cm->seq_params.frame_id_numbers_present_flag == 1
+ * \name Signaled when cm->seq_params->frame_id_numbers_present_flag == 1
*/
/**@{*/
int current_frame_id; /*!< frame ID for the current frame. */
@@ -1014,20 +1037,12 @@ typedef struct AV1Common {
int8_t ref_frame_side[REF_FRAMES];
/*!
- * Number of temporal layers: may be > 1 for SVC (scalable vector coding).
- */
- unsigned int number_temporal_layers;
- /*!
* Temporal layer ID of this frame
* (in the range 0 ... (number_temporal_layers - 1)).
*/
int temporal_layer_id;
/*!
- * Number of spatial layers: may be > 1 for SVC (scalable vector coding).
- */
- unsigned int number_spatial_layers;
- /*!
* Spatial layer ID of this frame
* (in the range 0 ... (number_spatial_layers - 1)).
*/
@@ -1192,15 +1207,15 @@ static INLINE RefCntBuffer *get_primary_ref_frame_buf(
// Returns 1 if this frame might allow mvs from some reference frame.
static INLINE int frame_might_allow_ref_frame_mvs(const AV1_COMMON *cm) {
return !cm->features.error_resilient_mode &&
- cm->seq_params.order_hint_info.enable_ref_frame_mvs &&
- cm->seq_params.order_hint_info.enable_order_hint &&
+ cm->seq_params->order_hint_info.enable_ref_frame_mvs &&
+ cm->seq_params->order_hint_info.enable_order_hint &&
!frame_is_intra_only(cm);
}
// Returns 1 if this frame might use warped_motion
static INLINE int frame_might_allow_warped_motion(const AV1_COMMON *cm) {
return !cm->features.error_resilient_mode && !frame_is_intra_only(cm) &&
- cm->seq_params.enable_warped_motion;
+ cm->seq_params->enable_warped_motion;
}
static INLINE void ensure_mv_buffer(RefCntBuffer *buf, AV1_COMMON *cm) {
@@ -1240,7 +1255,7 @@ static INLINE void ensure_mv_buffer(RefCntBuffer *buf, AV1_COMMON *cm) {
void cfl_init(CFL_CTX *cfl, const SequenceHeader *seq_params);
static INLINE int av1_num_planes(const AV1_COMMON *cm) {
- return cm->seq_params.monochrome ? 1 : MAX_MB_PLANE;
+ return cm->seq_params->monochrome ? 1 : MAX_MB_PLANE;
}
static INLINE void av1_init_above_context(CommonContexts *above_contexts,
@@ -1279,8 +1294,8 @@ static INLINE void av1_init_macroblockd(AV1_COMMON *cm, MACROBLOCKD *xd) {
}
}
xd->mi_stride = cm->mi_params.mi_stride;
- xd->error_info = &cm->error;
- cfl_init(&xd->cfl, &cm->seq_params);
+ xd->error_info = cm->error;
+ cfl_init(&xd->cfl, cm->seq_params);
}
static INLINE void set_entropy_context(MACROBLOCKD *xd, int mi_row, int mi_col,
@@ -1562,7 +1577,7 @@ static INLINE void av1_zero_above_context(AV1_COMMON *const cm,
const MACROBLOCKD *xd,
int mi_col_start, int mi_col_end,
const int tile_row) {
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const int num_planes = av1_num_planes(cm);
const int width = mi_col_end - mi_col_start;
const int aligned_width =
diff --git a/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c b/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c
index caa15c21e2..18ae0f28f4 100644
--- a/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c
+++ b/third_party/libaom/source/libaom/av1/common/av1_loopfilter.c
@@ -351,8 +351,14 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
const uint32_t scale_vert = plane_ptr->subsampling_y;
uint8_t *const dst_ptr = plane_ptr->dst.buf;
const int dst_stride = plane_ptr->dst.stride;
- const int y_range = (MAX_MIB_SIZE >> scale_vert);
- const int x_range = (MAX_MIB_SIZE >> scale_horz);
+ const int plane_mi_rows =
+ ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
+ const int plane_mi_cols =
+ ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
+ const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
+ (MAX_MIB_SIZE >> scale_vert));
+ const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
+ (MAX_MIB_SIZE >> scale_horz));
for (int y = 0; y < y_range; y++) {
uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
for (int x = 0; x < x_range;) {
@@ -376,8 +382,8 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
}
#if CONFIG_AV1_HIGHBITDEPTH
- const int use_highbitdepth = cm->seq_params.use_highbitdepth;
- const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
+ const int use_highbitdepth = cm->seq_params->use_highbitdepth;
+ const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth;
switch (params.filter_length) {
// apply 4-tap filtering
case 4:
@@ -456,6 +462,84 @@ void av1_filter_block_plane_vert(const AV1_COMMON *const cm,
}
}
+void av1_filter_block_plane_vert_rt(const AV1_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ const int plane,
+ const MACROBLOCKD_PLANE *const plane_ptr,
+ const uint32_t mi_row,
+ const uint32_t mi_col) {
+ const uint32_t scale_horz = plane_ptr->subsampling_x;
+ const uint32_t scale_vert = plane_ptr->subsampling_y;
+ uint8_t *const dst_ptr = plane_ptr->dst.buf;
+ const int dst_stride = plane_ptr->dst.stride;
+ const int plane_mi_rows =
+ ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
+ const int plane_mi_cols =
+ ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
+ const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
+ (MAX_MIB_SIZE >> scale_vert));
+ const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
+ (MAX_MIB_SIZE >> scale_horz));
+ assert(!plane);
+ assert(!(y_range % 2));
+ for (int y = 0; y < y_range; y += 2) {
+ uint8_t *p = dst_ptr + y * MI_SIZE * dst_stride;
+ for (int x = 0; x < x_range;) {
+ // inner loop always filter vertical edges in a MI block. If MI size
+ // is 8x8, it will filter the vertical edge aligned with a 8x8 block.
+ // If 4x4 transform is used, it will then filter the internal edge
+ // aligned with a 4x4 block
+ const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
+ const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
+ uint32_t advance_units;
+ TX_SIZE tx_size;
+ AV1_DEBLOCKING_PARAMETERS params;
+ memset(&params, 0, sizeof(params));
+
+ tx_size =
+ set_lpf_parameters(&params, ((ptrdiff_t)1 << scale_horz), cm, xd,
+ VERT_EDGE, curr_x, curr_y, plane, plane_ptr);
+ if (tx_size == TX_INVALID) {
+ params.filter_length = 0;
+ tx_size = TX_4X4;
+ }
+
+ switch (params.filter_length) {
+ // apply 4-tap filtering
+ case 4:
+ aom_lpf_vertical_4_dual(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr, params.mblim, params.lim,
+ params.hev_thr);
+ break;
+ case 6: // apply 6-tap filter for chroma plane only
+ assert(plane != 0);
+ aom_lpf_vertical_6_dual(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr, params.mblim, params.lim,
+ params.hev_thr);
+ break;
+ // apply 8-tap filtering
+ case 8:
+ aom_lpf_vertical_8_dual(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr, params.mblim, params.lim,
+ params.hev_thr);
+ break;
+ // apply 14-tap filtering
+ case 14:
+ aom_lpf_vertical_14_dual(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr, params.mblim, params.lim,
+ params.hev_thr);
+ break;
+ // no filtering
+ default: break;
+ }
+ // advance the destination pointer
+ advance_units = tx_size_wide_unit[tx_size];
+ x += advance_units;
+ p += advance_units * MI_SIZE;
+ }
+ }
+}
+
void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
const MACROBLOCKD *const xd, const int plane,
const MACROBLOCKD_PLANE *const plane_ptr,
@@ -464,8 +548,14 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
const uint32_t scale_vert = plane_ptr->subsampling_y;
uint8_t *const dst_ptr = plane_ptr->dst.buf;
const int dst_stride = plane_ptr->dst.stride;
- const int y_range = (MAX_MIB_SIZE >> scale_vert);
- const int x_range = (MAX_MIB_SIZE >> scale_horz);
+ const int plane_mi_rows =
+ ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
+ const int plane_mi_cols =
+ ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
+ const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
+ (MAX_MIB_SIZE >> scale_vert));
+ const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
+ (MAX_MIB_SIZE >> scale_horz));
for (int x = 0; x < x_range; x++) {
uint8_t *p = dst_ptr + x * MI_SIZE;
for (int y = 0; y < y_range;) {
@@ -489,8 +579,8 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
}
#if CONFIG_AV1_HIGHBITDEPTH
- const int use_highbitdepth = cm->seq_params.use_highbitdepth;
- const aom_bit_depth_t bit_depth = cm->seq_params.bit_depth;
+ const int use_highbitdepth = cm->seq_params->use_highbitdepth;
+ const aom_bit_depth_t bit_depth = cm->seq_params->bit_depth;
switch (params.filter_length) {
// apply 4-tap filtering
case 4:
@@ -572,6 +662,84 @@ void av1_filter_block_plane_horz(const AV1_COMMON *const cm,
}
}
+void av1_filter_block_plane_horz_rt(const AV1_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ const int plane,
+ const MACROBLOCKD_PLANE *const plane_ptr,
+ const uint32_t mi_row,
+ const uint32_t mi_col) {
+ const uint32_t scale_horz = plane_ptr->subsampling_x;
+ const uint32_t scale_vert = plane_ptr->subsampling_y;
+ uint8_t *const dst_ptr = plane_ptr->dst.buf;
+ const int dst_stride = plane_ptr->dst.stride;
+ const int plane_mi_rows =
+ ROUND_POWER_OF_TWO(cm->mi_params.mi_rows, scale_vert);
+ const int plane_mi_cols =
+ ROUND_POWER_OF_TWO(cm->mi_params.mi_cols, scale_horz);
+ const int y_range = AOMMIN((int)(plane_mi_rows - (mi_row >> scale_vert)),
+ (MAX_MIB_SIZE >> scale_vert));
+ const int x_range = AOMMIN((int)(plane_mi_cols - (mi_col >> scale_horz)),
+ (MAX_MIB_SIZE >> scale_horz));
+ assert(!plane);
+ for (int x = 0; x < x_range; x += 2) {
+ uint8_t *p = dst_ptr + x * MI_SIZE;
+ for (int y = 0; y < y_range;) {
+ // inner loop always filter vertical edges in a MI block. If MI size
+ // is 8x8, it will first filter the vertical edge aligned with a 8x8
+ // block. If 4x4 transform is used, it will then filter the internal
+ // edge aligned with a 4x4 block
+ const uint32_t curr_x = ((mi_col * MI_SIZE) >> scale_horz) + x * MI_SIZE;
+ const uint32_t curr_y = ((mi_row * MI_SIZE) >> scale_vert) + y * MI_SIZE;
+ uint32_t advance_units;
+ TX_SIZE tx_size;
+ AV1_DEBLOCKING_PARAMETERS params;
+ memset(&params, 0, sizeof(params));
+
+ tx_size = set_lpf_parameters(
+ &params, (cm->mi_params.mi_stride << scale_vert), cm, xd, HORZ_EDGE,
+ curr_x, curr_y, plane, plane_ptr);
+ if (tx_size == TX_INVALID) {
+ params.filter_length = 0;
+ tx_size = TX_4X4;
+ }
+
+ switch (params.filter_length) {
+ // apply 4-tap filtering
+ case 4:
+ aom_lpf_horizontal_4_dual(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr, params.mblim, params.lim,
+ params.hev_thr);
+ break;
+ // apply 6-tap filtering
+ case 6:
+ assert(plane != 0);
+ aom_lpf_horizontal_6_dual(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr, params.mblim, params.lim,
+ params.hev_thr);
+ break;
+ // apply 8-tap filtering
+ case 8:
+ aom_lpf_horizontal_8_dual(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr, params.mblim, params.lim,
+ params.hev_thr);
+ break;
+ // apply 14-tap filtering
+ case 14:
+ aom_lpf_horizontal_14_dual(p, dst_stride, params.mblim, params.lim,
+ params.hev_thr, params.mblim, params.lim,
+ params.hev_thr);
+ break;
+ // no filtering
+ default: break;
+ }
+ // advance the destination pointer
+ advance_units = tx_size_high_unit[tx_size];
+ y += advance_units;
+ p += advance_units * dst_stride * MI_SIZE;
+ }
+ }
+}
+
void av1_filter_block_plane_vert_test(const AV1_COMMON *const cm,
const MACROBLOCKD *const xd,
const int plane,
@@ -661,7 +829,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
#if CONFIG_LPF_MASK
int is_decoding,
#endif
- int plane_start, int plane_end) {
+ int plane_start, int plane_end, int is_realtime) {
struct macroblockd_plane *pd = xd->plane;
const int col_start = 0;
const int col_end = cm->mi_params.mi_cols;
@@ -679,7 +847,7 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
else if (plane == 2 && !(cm->lf.filter_level_v))
continue;
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, 0, 0,
+ av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, 0, 0,
plane, plane + 1);
av1_build_bitmask_vert_info(cm, &pd[plane], plane);
@@ -716,49 +884,106 @@ static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, AV1_COMMON *cm,
continue;
else if (plane == 2 && !(cm->lf.filter_level_v))
continue;
-
if (cm->lf.combine_vert_horz_lf) {
// filter all vertical and horizontal edges in every 128x128 super block
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
// filter vertical edges
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
+ av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer,
+ mi_row, mi_col, plane, plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+ (void)is_realtime;
av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
mi_col);
+#else
+ if (is_realtime && !plane) {
+ av1_filter_block_plane_vert_rt(cm, xd, plane, &pd[plane], mi_row,
+ mi_col);
+
+ } else {
+ av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
+ mi_col);
+ }
+#endif
// filter horizontal edges
if (mi_col - MAX_MIB_SIZE >= 0) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer,
+ av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer,
mi_row, mi_col - MAX_MIB_SIZE, plane,
plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+ (void)is_realtime;
av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
mi_col - MAX_MIB_SIZE);
+#else
+ if (is_realtime && !plane) {
+ av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row,
+ mi_col - MAX_MIB_SIZE);
+ } else {
+ av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+ mi_col - MAX_MIB_SIZE);
+ }
+#endif
}
}
// filter horizontal edges
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
+ av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer, mi_row,
mi_col - MAX_MIB_SIZE, plane, plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+ (void)is_realtime;
av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
mi_col - MAX_MIB_SIZE);
+#else
+ if (is_realtime && !plane) {
+ av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row,
+ mi_col - MAX_MIB_SIZE);
+
+ } else {
+ av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+ mi_col - MAX_MIB_SIZE);
+ }
+#endif
}
} else {
// filter all vertical edges in every 128x128 super block
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
+ av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer,
+ mi_row, mi_col, plane, plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+ (void)is_realtime;
av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
mi_col);
+#else
+ if (is_realtime && !plane) {
+ av1_filter_block_plane_vert_rt(cm, xd, plane, &pd[plane], mi_row,
+ mi_col);
+ } else {
+ av1_filter_block_plane_vert(cm, xd, plane, &pd[plane], mi_row,
+ mi_col);
+ }
+#endif
}
}
// filter all horizontal edges in every 128x128 super block
for (mi_row = start; mi_row < stop; mi_row += MAX_MIB_SIZE) {
for (mi_col = col_start; mi_col < col_end; mi_col += MAX_MIB_SIZE) {
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame_buffer, mi_row,
- mi_col, plane, plane + 1);
+ av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame_buffer,
+ mi_row, mi_col, plane, plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+ (void)is_realtime;
av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
mi_col);
+#else
+ if (is_realtime && !plane) {
+ av1_filter_block_plane_horz_rt(cm, xd, plane, &pd[plane], mi_row,
+ mi_col);
+
+ } else {
+ av1_filter_block_plane_horz(cm, xd, plane, &pd[plane], mi_row,
+ mi_col);
+ }
+#endif
}
}
}
@@ -770,7 +995,8 @@ void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
#if CONFIG_LPF_MASK
int is_decoding,
#endif
- int plane_start, int plane_end, int partial_frame) {
+ int plane_start, int plane_end, int partial_frame,
+ int is_realtime) {
int start_mi_row, end_mi_row, mi_rows_to_filter;
start_mi_row = 0;
@@ -786,5 +1012,5 @@ void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
#if CONFIG_LPF_MASK
is_decoding,
#endif
- plane_start, plane_end);
+ plane_start, plane_end, is_realtime);
}
diff --git a/third_party/libaom/source/libaom/av1/common/av1_loopfilter.h b/third_party/libaom/source/libaom/av1/common/av1_loopfilter.h
index ca16bbe614..ed4453b2a7 100644
--- a/third_party/libaom/source/libaom/av1/common/av1_loopfilter.h
+++ b/third_party/libaom/source/libaom/av1/common/av1_loopfilter.h
@@ -151,7 +151,7 @@ void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
#else
void av1_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
struct macroblockd *xd, int plane_start,
- int plane_end, int partial_frame);
+ int plane_end, int partial_frame, int is_realtime);
#endif
void av1_filter_block_plane_vert(const struct AV1Common *const cm,
@@ -164,6 +164,20 @@ void av1_filter_block_plane_horz(const struct AV1Common *const cm,
const MACROBLOCKD_PLANE *const plane_ptr,
const uint32_t mi_row, const uint32_t mi_col);
+void av1_filter_block_plane_vert_rt(const struct AV1Common *const cm,
+ const MACROBLOCKD *const xd,
+ const int plane,
+ const MACROBLOCKD_PLANE *const plane_ptr,
+ const uint32_t mi_row,
+ const uint32_t mi_col);
+
+void av1_filter_block_plane_horz_rt(const struct AV1Common *const cm,
+ const MACROBLOCKD *const xd,
+ const int plane,
+ const MACROBLOCKD_PLANE *const plane_ptr,
+ const uint32_t mi_row,
+ const uint32_t mi_col);
+
uint8_t av1_get_filter_level(const struct AV1Common *cm,
const loop_filter_info_n *lfi_n, const int dir_idx,
int plane, const MB_MODE_INFO *mbmi);
diff --git a/third_party/libaom/source/libaom/av1/common/blockd.h b/third_party/libaom/source/libaom/av1/common/blockd.h
index 1d1c381bca..5e535add2d 100644
--- a/third_party/libaom/source/libaom/av1/common/blockd.h
+++ b/third_party/libaom/source/libaom/av1/common/blockd.h
@@ -194,11 +194,6 @@ typedef struct RD_STATS {
int zero_rate;
#if CONFIG_RD_DEBUG
int txb_coeff_cost[MAX_MB_PLANE];
- // TODO(jingning): Temporary solution to silence stack over-size warning
- // in handle_inter_mode. This should be fixed after rate-distortion
- // optimization refactoring.
- int16_t txb_coeff_cost_map[MAX_MB_PLANE][TXB_COEFF_COST_MAP_SIZE]
- [TXB_COEFF_COST_MAP_SIZE];
#endif // CONFIG_RD_DEBUG
} RD_STATS;
@@ -325,6 +320,9 @@ typedef struct MB_MODE_INFO {
int8_t cdef_strength : 4;
/**@}*/
+ /*! \brief Skip CDEF for this superblock */
+ uint8_t skip_cdef_curr_sb;
+
#if CONFIG_RD_DEBUG
/*! \brief RD info used for debugging */
RD_STATS rd_stats;
@@ -552,10 +550,6 @@ typedef struct cfl_ctx {
// Whether the reconstructed luma pixels need to be stored
int store_y;
-
-#if CONFIG_DEBUG
- int rate;
-#endif // CONFIG_DEBUG
} CFL_CTX;
typedef struct dist_wtd_comp_params {
@@ -810,7 +804,7 @@ typedef struct macroblockd {
FRAME_CONTEXT *tile_ctx;
/*!
- * Bit depth: copied from cm->seq_params.bit_depth for convenience.
+ * Bit depth: copied from cm->seq_params->bit_depth for convenience.
*/
int bd;
@@ -893,7 +887,7 @@ typedef struct macroblockd {
/*!
* Mask for this block used for compound prediction.
*/
- DECLARE_ALIGNED(16, uint8_t, seg_mask[2 * MAX_SB_SQUARE]);
+ uint8_t *seg_mask;
/*!
* CFL (chroma from luma) related parameters.
@@ -937,13 +931,42 @@ typedef struct macroblockd {
/*!\cond */
static INLINE int is_cur_buf_hbd(const MACROBLOCKD *xd) {
+#if CONFIG_AV1_HIGHBITDEPTH
return xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? 1 : 0;
+#else
+ (void)xd;
+ return 0;
+#endif
}
static INLINE uint8_t *get_buf_by_bd(const MACROBLOCKD *xd, uint8_t *buf16) {
+#if CONFIG_AV1_HIGHBITDEPTH
return (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH)
? CONVERT_TO_BYTEPTR(buf16)
: buf16;
+#else
+ (void)xd;
+ return buf16;
+#endif
+}
+
+typedef struct BitDepthInfo {
+ int bit_depth;
+ /*! Is the image buffer high bit depth?
+ * Low bit depth buffer uses uint8_t.
+ * High bit depth buffer uses uint16_t.
+ * Equivalent to cm->seq_params->use_highbitdepth
+ */
+ int use_highbitdepth_buf;
+} BitDepthInfo;
+
+static INLINE BitDepthInfo get_bit_depth_info(const MACROBLOCKD *xd) {
+ BitDepthInfo bit_depth_info;
+ bit_depth_info.bit_depth = xd->bd;
+ bit_depth_info.use_highbitdepth_buf = is_cur_buf_hbd(xd);
+ assert(IMPLIES(!bit_depth_info.use_highbitdepth_buf,
+ bit_depth_info.bit_depth == 8));
+ return bit_depth_info;
}
static INLINE int get_sqr_bsize_idx(BLOCK_SIZE bsize) {
diff --git a/third_party/libaom/source/libaom/av1/common/cdef.c b/third_party/libaom/source/libaom/av1/common/cdef.c
index d9b5a104e4..9ab7d4d235 100644
--- a/third_party/libaom/source/libaom/av1/common/cdef.c
+++ b/third_party/libaom/source/libaom/av1/common/cdef.c
@@ -21,35 +21,6 @@
#include "av1/common/cdef_block.h"
#include "av1/common/reconinter.h"
-enum { TOP, LEFT, BOTTOM, RIGHT, BOUNDARIES } UENUM1BYTE(BOUNDARY);
-
-/*!\brief Parameters related to CDEF Block */
-typedef struct {
- uint16_t *src;
- uint8_t *dst;
- uint16_t *colbuf[MAX_MB_PLANE];
- cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
-
- int xdec;
- int ydec;
- int mi_wide_l2;
- int mi_high_l2;
- int frame_boundary[BOUNDARIES];
-
- int damping;
- int coeff_shift;
- int level;
- int sec_strength;
- int cdef_count;
- int is_zero_level;
- int dir[CDEF_NBLOCKS][CDEF_NBLOCKS];
- int var[CDEF_NBLOCKS][CDEF_NBLOCKS];
-
- int dst_stride;
- int coffset;
- int roffset;
-} CdefBlockInfo;
-
static int is_8x8_block_skip(MB_MODE_INFO **grid, int mi_row, int mi_col,
int mi_stride) {
MB_MODE_INFO **mbmi = grid + mi_row * mi_stride + mi_col;
@@ -116,10 +87,10 @@ void cdef_copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride,
}
}
-static void copy_sb8_16(AV1_COMMON *cm, uint16_t *dst, int dstride,
- const uint8_t *src, int src_voffset, int src_hoffset,
- int sstride, int vsize, int hsize) {
- if (cm->seq_params.use_highbitdepth) {
+void av1_cdef_copy_sb8_16(const AV1_COMMON *const cm, uint16_t *const dst,
+ int dstride, const uint8_t *src, int src_voffset,
+ int src_hoffset, int sstride, int vsize, int hsize) {
+ if (cm->seq_params->use_highbitdepth) {
const uint16_t *base =
&CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset];
cdef_copy_rect8_16bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
@@ -151,29 +122,35 @@ static INLINE void copy_rect(uint16_t *dst, int dstride, const uint16_t *src,
// Inputs:
// cm: Pointer to common structure.
// fb_info: Pointer to the CDEF block-level parameter structure.
-// linebuf: Top feedback buffer for CDEF.
+// colbuf: Left column buffer for CDEF.
// cdef_left: Left block is filtered or not.
// fbc, fbr: col and row index of a block.
// plane: plane index Y/CB/CR.
-// prev_row_cdef: Top blocks are filtered or not.
// Returns:
// Nothing will be returned.
-static void cdef_prepare_fb(AV1_COMMON *cm, CdefBlockInfo *fb_info,
- uint16_t **linebuf, const int *cdef_left, int fbc,
- int fbr, uint8_t plane,
- unsigned char *prev_row_cdef) {
+static void cdef_prepare_fb(const AV1_COMMON *const cm, CdefBlockInfo *fb_info,
+ uint16_t **const colbuf, const int *cdef_left,
+ int fbc, int fbr, int plane) {
const CommonModeInfoParams *const mi_params = &cm->mi_params;
uint16_t *src = fb_info->src;
- const int stride = (mi_params->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
+ const int luma_stride =
+ ALIGN_POWER_OF_TWO(mi_params->mi_cols << MI_SIZE_LOG2, 4);
const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
int cstart = 0;
if (!*cdef_left) cstart = -CDEF_HBORDER;
int rend, cend;
- int nhb = AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
- int nvb = AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
- int hsize = nhb << fb_info->mi_wide_l2;
- int vsize = nvb << fb_info->mi_high_l2;
+ const int nhb =
+ AOMMIN(MI_SIZE_64X64, mi_params->mi_cols - MI_SIZE_64X64 * fbc);
+ const int nvb =
+ AOMMIN(MI_SIZE_64X64, mi_params->mi_rows - MI_SIZE_64X64 * fbr);
+ const int hsize = nhb << fb_info->mi_wide_l2;
+ const int vsize = nvb << fb_info->mi_high_l2;
+ const uint16_t *top_linebuf = fb_info->top_linebuf[plane];
+ const uint16_t *bot_linebuf = fb_info->bot_linebuf[plane];
+ const int bot_offset = (vsize + CDEF_VBORDER) * CDEF_BSTRIDE;
+ const int stride =
+ luma_stride >> (plane == AOM_PLANE_Y ? 0 : cm->seq_params->subsampling_x);
if (fbc == nhfb - 1)
cend = hsize;
@@ -185,54 +162,55 @@ static void cdef_prepare_fb(AV1_COMMON *cm, CdefBlockInfo *fb_info,
else
rend = vsize + CDEF_VBORDER;
- if (fbc == nhfb - 1) {
- /* On the last superblock column, fill in the right border with
- CDEF_VERY_LARGE to avoid filtering with the outside. */
- fill_rect(&src[cend + CDEF_HBORDER], CDEF_BSTRIDE, rend + CDEF_VBORDER,
- hsize + CDEF_HBORDER - cend, CDEF_VERY_LARGE);
- }
- if (fbr == nvfb - 1) {
- /* On the last superblock row, fill in the bottom border with
- CDEF_VERY_LARGE to avoid filtering with the outside. */
- fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
- CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
- }
/* Copy in the pixels we need from the current superblock for
deringing.*/
- copy_sb8_16(cm, &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
- CDEF_BSTRIDE, fb_info->dst, fb_info->roffset,
- fb_info->coffset + cstart, fb_info->dst_stride, rend,
- cend - cstart);
- if (!prev_row_cdef[fbc]) {
- copy_sb8_16(cm, &src[CDEF_HBORDER], CDEF_BSTRIDE, fb_info->dst,
- fb_info->roffset - CDEF_VBORDER, fb_info->coffset,
- fb_info->dst_stride, CDEF_VBORDER, hsize);
- } else if (fbr > 0) {
- copy_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE,
- &linebuf[plane][fb_info->coffset], stride, CDEF_VBORDER, hsize);
+ av1_cdef_copy_sb8_16(
+ cm, &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
+ CDEF_BSTRIDE, fb_info->dst, fb_info->roffset, fb_info->coffset + cstart,
+ fb_info->dst_stride, vsize, cend - cstart);
+
+ /* Copy in the pixels we need for the current superblock from bottom buffer.*/
+ if (fbr < nvfb - 1) {
+ copy_rect(&src[bot_offset + CDEF_HBORDER], CDEF_BSTRIDE,
+ &bot_linebuf[fb_info->coffset], stride, CDEF_VBORDER, hsize);
+ } else {
+ fill_rect(&src[bot_offset + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER,
+ hsize, CDEF_VERY_LARGE);
+ }
+ if (fbr < nvfb - 1 && fbc > 0) {
+ copy_rect(&src[bot_offset], CDEF_BSTRIDE,
+ &bot_linebuf[fb_info->coffset - CDEF_HBORDER], stride,
+ CDEF_VBORDER, CDEF_HBORDER);
+ } else {
+ fill_rect(&src[bot_offset], CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
+ CDEF_VERY_LARGE);
+ }
+ if (fbr < nvfb - 1 && fbc < nhfb - 1) {
+ copy_rect(&src[bot_offset + hsize + CDEF_HBORDER], CDEF_BSTRIDE,
+ &bot_linebuf[fb_info->coffset + hsize], stride, CDEF_VBORDER,
+ CDEF_HBORDER);
+ } else {
+ fill_rect(&src[bot_offset + hsize + CDEF_HBORDER], CDEF_BSTRIDE,
+ CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
+ }
+
+ /* Copy in the pixels we need from the current superblock from top buffer.*/
+ if (fbr > 0) {
+ copy_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, &top_linebuf[fb_info->coffset],
+ stride, CDEF_VBORDER, hsize);
} else {
fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize,
CDEF_VERY_LARGE);
}
- if (!prev_row_cdef[fbc - 1]) {
- copy_sb8_16(cm, src, CDEF_BSTRIDE, fb_info->dst,
- fb_info->roffset - CDEF_VBORDER,
- fb_info->coffset - CDEF_HBORDER, fb_info->dst_stride,
- CDEF_VBORDER, CDEF_HBORDER);
- } else if (fbr > 0 && fbc > 0) {
- copy_rect(src, CDEF_BSTRIDE,
- &linebuf[plane][fb_info->coffset - CDEF_HBORDER], stride,
- CDEF_VBORDER, CDEF_HBORDER);
+ if (fbr > 0 && fbc > 0) {
+ copy_rect(src, CDEF_BSTRIDE, &top_linebuf[fb_info->coffset - CDEF_HBORDER],
+ stride, CDEF_VBORDER, CDEF_HBORDER);
} else {
fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
}
- if (!prev_row_cdef[fbc + 1]) {
- copy_sb8_16(cm, &src[CDEF_HBORDER + hsize], CDEF_BSTRIDE, fb_info->dst,
- fb_info->roffset - CDEF_VBORDER, fb_info->coffset + hsize,
- fb_info->dst_stride, CDEF_VBORDER, CDEF_HBORDER);
- } else if (fbr > 0 && fbc < nhfb - 1) {
+ if (fbr > 0 && fbc < nhfb - 1) {
copy_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
- &linebuf[plane][fb_info->coffset + hsize], stride, CDEF_VBORDER,
+ &top_linebuf[fb_info->coffset + hsize], stride, CDEF_VBORDER,
CDEF_HBORDER);
} else {
fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER,
@@ -241,36 +219,25 @@ static void cdef_prepare_fb(AV1_COMMON *cm, CdefBlockInfo *fb_info,
if (*cdef_left) {
/* If we deringed the superblock on the left then we need to copy in
saved pixels. */
- copy_rect(src, CDEF_BSTRIDE, fb_info->colbuf[plane], CDEF_HBORDER,
+ copy_rect(src, CDEF_BSTRIDE, colbuf[plane], CDEF_HBORDER,
rend + CDEF_VBORDER, CDEF_HBORDER);
}
/* Saving pixels in case we need to dering the superblock on the
right. */
- copy_rect(fb_info->colbuf[plane], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE,
+ copy_rect(colbuf[plane], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE,
rend + CDEF_VBORDER, CDEF_HBORDER);
- copy_sb8_16(cm, &linebuf[plane][fb_info->coffset], stride, fb_info->dst,
- (MI_SIZE_64X64 << fb_info->mi_high_l2) * (fbr + 1) - CDEF_VBORDER,
- fb_info->coffset, fb_info->dst_stride, CDEF_VBORDER, hsize);
- if (fb_info->frame_boundary[TOP]) {
- fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER,
- CDEF_VERY_LARGE);
- }
if (fb_info->frame_boundary[LEFT]) {
fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER,
CDEF_VERY_LARGE);
}
- if (fb_info->frame_boundary[BOTTOM]) {
- fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
- CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
- }
if (fb_info->frame_boundary[RIGHT]) {
fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
}
}
-static INLINE void cdef_filter_fb(CdefBlockInfo *fb_info, uint8_t plane,
+static INLINE void cdef_filter_fb(CdefBlockInfo *const fb_info, int plane,
uint8_t use_highbitdepth) {
int offset = fb_info->dst_stride * fb_info->roffset + fb_info->coffset;
if (use_highbitdepth) {
@@ -291,11 +258,11 @@ static INLINE void cdef_filter_fb(CdefBlockInfo *fb_info, uint8_t plane,
}
// Initializes block-level parameters for CDEF.
-static INLINE void cdef_init_fb_col(MACROBLOCKD *xd,
+static INLINE void cdef_init_fb_col(const MACROBLOCKD *const xd,
const CdefInfo *const cdef_info,
- CdefBlockInfo *fb_info,
- const int mbmi_cdef_strength, int fbc,
- int fbr, uint8_t plane) {
+ CdefBlockInfo *const fb_info,
+ int mbmi_cdef_strength, int fbc, int fbr,
+ int plane) {
if (plane == AOM_PLANE_Y) {
fb_info->level =
cdef_info->cdef_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
@@ -328,9 +295,9 @@ static INLINE void cdef_init_fb_col(MACROBLOCKD *xd,
fb_info->coffset = MI_SIZE_64X64 * fbc << fb_info->mi_wide_l2;
}
-static bool cdef_fb_col(AV1_COMMON *cm, MACROBLOCKD *xd, CdefBlockInfo *fb_info,
- int fbc, int fbr, int *cdef_left, uint16_t **linebuf,
- unsigned char *prev_row_cdef) {
+static void cdef_fb_col(const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
+ CdefBlockInfo *const fb_info, uint16_t **const colbuf,
+ int *cdef_left, int fbc, int fbr) {
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int mbmi_cdef_strength =
mi_params
@@ -343,9 +310,9 @@ static bool cdef_fb_col(AV1_COMMON *cm, MACROBLOCKD *xd, CdefBlockInfo *fb_info,
MI_SIZE_64X64 * fbc] == NULL ||
mbmi_cdef_strength == -1) {
*cdef_left = 0;
- return 0;
+ return;
}
- for (uint8_t plane = 0; plane < num_planes; plane++) {
+ for (int plane = 0; plane < num_planes; plane++) {
cdef_init_fb_col(xd, &cm->cdef_info, fb_info, mbmi_cdef_strength, fbc, fbr,
plane);
if (fb_info->is_zero_level ||
@@ -353,20 +320,26 @@ static bool cdef_fb_col(AV1_COMMON *cm, MACROBLOCKD *xd, CdefBlockInfo *fb_info,
mi_params, fbr * MI_SIZE_64X64, fbc * MI_SIZE_64X64,
fb_info->dlist, BLOCK_64X64)) == 0) {
*cdef_left = 0;
- return 0;
+ return;
}
- cdef_prepare_fb(cm, fb_info, linebuf, cdef_left, fbc, fbr, plane,
- prev_row_cdef);
- cdef_filter_fb(fb_info, plane, cm->seq_params.use_highbitdepth);
+ cdef_prepare_fb(cm, fb_info, colbuf, cdef_left, fbc, fbr, plane);
+ cdef_filter_fb(fb_info, plane, cm->seq_params->use_highbitdepth);
}
*cdef_left = 1;
- return 1;
}
-static INLINE void cdef_init_fb_row(CdefBlockInfo *fb_info, int mi_rows,
- int fbr) {
- const int nvfb = (mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
-
+// Initializes row-level parameters for CDEF frame.
+void av1_cdef_init_fb_row(const AV1_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ CdefBlockInfo *const fb_info,
+ uint16_t **const linebuf, uint16_t *const src,
+ struct AV1CdefSyncData *const cdef_sync, int fbr) {
+ (void)cdef_sync;
+ const int num_planes = av1_num_planes(cm);
+ const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ const int luma_stride =
+ ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols << MI_SIZE_LOG2, 4);
+ const bool ping_pong = fbr & 1;
// for the current filter block, it's top left corner mi structure (mi_tl)
// is first accessed to check whether the top and left boundaries are
// frame boundaries. Then bottom-left and top-right mi structures are
@@ -379,78 +352,58 @@ static INLINE void cdef_init_fb_row(CdefBlockInfo *fb_info, int mi_rows,
fb_info->frame_boundary[TOP] = (MI_SIZE_64X64 * fbr == 0) ? 1 : 0;
if (fbr != nvfb - 1)
fb_info->frame_boundary[BOTTOM] =
- (MI_SIZE_64X64 * (fbr + 1) == mi_rows) ? 1 : 0;
+ (MI_SIZE_64X64 * (fbr + 1) == cm->mi_params.mi_rows) ? 1 : 0;
else
fb_info->frame_boundary[BOTTOM] = 1;
+
+ fb_info->src = src;
+ fb_info->damping = cm->cdef_info.cdef_damping;
+ fb_info->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
+ av1_zero(fb_info->dir);
+ av1_zero(fb_info->var);
+
+ for (int plane = 0; plane < num_planes; plane++) {
+ const int mi_high_l2 = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
+ const int offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2;
+ const int stride = luma_stride >> xd->plane[plane].subsampling_x;
+ // here ping-pong buffers are maintained for top linebuf
+ // to avoid linebuf over-write by consecutive row.
+ uint16_t *const top_linebuf =
+ &linebuf[plane][ping_pong * CDEF_VBORDER * stride];
+ fb_info->bot_linebuf[plane] = &linebuf[plane][(CDEF_VBORDER << 1) * stride];
+
+ if (fbr != nvfb - 1) // top line buffer copy
+ av1_cdef_copy_sb8_16(cm, top_linebuf, stride, xd->plane[plane].dst.buf,
+ offset - CDEF_VBORDER, 0,
+ xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
+ fb_info->top_linebuf[plane] =
+ &linebuf[plane][(!ping_pong) * CDEF_VBORDER * stride];
+
+ if (fbr != nvfb - 1) // bottom line buffer copy
+ av1_cdef_copy_sb8_16(cm, fb_info->bot_linebuf[plane], stride,
+ xd->plane[plane].dst.buf, offset, 0,
+ xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
+ }
}
-static void cdef_fb_row(AV1_COMMON *cm, MACROBLOCKD *xd, CdefBlockInfo *fb_info,
- uint16_t **linebuf, int fbr,
- unsigned char *curr_row_cdef,
- unsigned char *prev_row_cdef) {
+void av1_cdef_fb_row(const AV1_COMMON *const cm, MACROBLOCKD *xd,
+ uint16_t **const linebuf, uint16_t **const colbuf,
+ uint16_t *const src, int fbr,
+ cdef_init_fb_row_t cdef_init_fb_row_fn,
+ struct AV1CdefSyncData *const cdef_sync) {
+ CdefBlockInfo fb_info;
int cdef_left = 1;
const int nhfb = (cm->mi_params.mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- cdef_init_fb_row(fb_info, cm->mi_params.mi_rows, fbr);
+ cdef_init_fb_row_fn(cm, xd, &fb_info, linebuf, src, cdef_sync, fbr);
for (int fbc = 0; fbc < nhfb; fbc++) {
- fb_info->frame_boundary[LEFT] = (MI_SIZE_64X64 * fbc == 0) ? 1 : 0;
+ fb_info.frame_boundary[LEFT] = (MI_SIZE_64X64 * fbc == 0) ? 1 : 0;
if (fbc != nhfb - 1)
- fb_info->frame_boundary[RIGHT] =
+ fb_info.frame_boundary[RIGHT] =
(MI_SIZE_64X64 * (fbc + 1) == cm->mi_params.mi_cols) ? 1 : 0;
else
- fb_info->frame_boundary[RIGHT] = 1;
- curr_row_cdef[fbc] = cdef_fb_col(cm, xd, fb_info, fbc, fbr, &cdef_left,
- linebuf, prev_row_cdef);
- }
-}
-
-// Initialize the frame-level CDEF parameters.
-// Inputs:
-// frame: Pointer to input frame buffer.
-// cm: Pointer to common structure.
-// xd: Pointer to common current coding block structure.
-// fb_info: Pointer to the CDEF block-level parameter structure.
-// src: Intermediate input buffer for CDEF.
-// colbuf: Left feedback buffer for CDEF.
-// linebuf: Top feedback buffer for CDEF.
-// Returns:
-// Nothing will be returned.
-static void cdef_prepare_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- MACROBLOCKD *xd, CdefBlockInfo *fb_info,
- uint16_t *src, uint16_t **colbuf,
- uint16_t **linebuf) {
- const int num_planes = av1_num_planes(cm);
- const int stride = (cm->mi_params.mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
- av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
- num_planes);
-
- for (uint8_t plane = 0; plane < num_planes; plane++) {
- linebuf[plane] = aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
- const int mi_high_l2 = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
- const int block_height = (MI_SIZE_64X64 << mi_high_l2) + 2 * CDEF_VBORDER;
- colbuf[plane] = aom_malloc(
- sizeof(*colbuf) *
- ((CDEF_BLOCKSIZE << (MI_SIZE_LOG2 - xd->plane[plane].subsampling_y)) +
- 2 * CDEF_VBORDER) *
- CDEF_HBORDER);
- fill_rect(colbuf[plane], CDEF_HBORDER, block_height, CDEF_HBORDER,
- CDEF_VERY_LARGE);
- fb_info->colbuf[plane] = colbuf[plane];
- }
-
- fb_info->src = src;
- fb_info->damping = cm->cdef_info.cdef_damping;
- fb_info->coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
- memset(fb_info->dir, 0, sizeof(fb_info->dir));
- memset(fb_info->var, 0, sizeof(fb_info->var));
-}
-
-static void cdef_free(unsigned char *row_cdef, uint16_t **colbuf,
- uint16_t **linebuf, const int num_planes) {
- aom_free(row_cdef);
- for (uint8_t plane = 0; plane < num_planes; plane++) {
- aom_free(colbuf[plane]);
- aom_free(linebuf[plane]);
+ fb_info.frame_boundary[RIGHT] = 1;
+ cdef_fb_col(cm, xd, &fb_info, colbuf, &cdef_left, fbc, fbr);
}
}
@@ -461,29 +414,15 @@ static void cdef_free(unsigned char *row_cdef, uint16_t **colbuf,
// xd: Pointer to common current coding block structure.
// Returns:
// Nothing will be returned.
-void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
- MACROBLOCKD *xd) {
- DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
- uint16_t *colbuf[MAX_MB_PLANE] = { NULL };
- uint16_t *linebuf[MAX_MB_PLANE] = { NULL };
- CdefBlockInfo fb_info;
- unsigned char *row_cdef, *prev_row_cdef, *curr_row_cdef;
+void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *const cm,
+ MACROBLOCKD *xd, cdef_init_fb_row_t cdef_init_fb_row_fn) {
const int num_planes = av1_num_planes(cm);
const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- const int nhfb = (cm->mi_params.mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- row_cdef = aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
- memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
- prev_row_cdef = row_cdef + 1;
- curr_row_cdef = prev_row_cdef + nhfb + 2;
- cdef_prepare_frame(frame, cm, xd, &fb_info, src, colbuf, linebuf);
-
- for (int fbr = 0; fbr < nvfb; fbr++) {
- unsigned char *tmp;
- cdef_fb_row(cm, xd, &fb_info, linebuf, fbr, curr_row_cdef, prev_row_cdef);
- tmp = prev_row_cdef;
- prev_row_cdef = curr_row_cdef;
- curr_row_cdef = tmp;
- }
- cdef_free(row_cdef, colbuf, linebuf, num_planes);
+ av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
+ num_planes);
+
+ for (int fbr = 0; fbr < nvfb; fbr++)
+ av1_cdef_fb_row(cm, xd, cm->cdef_info.linebuf, cm->cdef_info.colbuf,
+ cm->cdef_info.srcbuf, fbr, cdef_init_fb_row_fn, NULL);
}
diff --git a/third_party/libaom/source/libaom/av1/common/cdef.h b/third_party/libaom/source/libaom/av1/common/cdef.h
index 4d6e60023b..194117884e 100644
--- a/third_party/libaom/source/libaom/av1/common/cdef.h
+++ b/third_party/libaom/source/libaom/av1/common/cdef.h
@@ -23,6 +23,40 @@
#include "av1/common/av1_common_int.h"
#include "av1/common/cdef_block.h"
+enum { TOP, LEFT, BOTTOM, RIGHT, BOUNDARIES } UENUM1BYTE(BOUNDARY);
+
+struct AV1CdefSyncData;
+
+/*!\brief Parameters related to CDEF Block */
+typedef struct {
+ uint16_t *src; /*!< CDEF intermediate buffer */
+ uint16_t *top_linebuf[MAX_MB_PLANE]; /*!< CDEF top line buffer */
+ uint16_t *bot_linebuf[MAX_MB_PLANE]; /*!< CDEF bottom line buffer */
+ uint8_t *dst; /*!< CDEF destination buffer */
+ cdef_list
+ dlist[MI_SIZE_64X64 * MI_SIZE_64X64]; /*!< CDEF 8x8 block positions */
+
+ int xdec; /*!< Sub-sampling X */
+ int ydec; /*!< Sub-sampling X */
+ int mi_wide_l2; /*!< Pixels per mi unit in width */
+ int mi_high_l2; /*!< Pixels per mi unit in height */
+ int frame_boundary[BOUNDARIES]; /*!< frame boundaries */
+
+ int damping; /*!< CDEF damping factor */
+ int coeff_shift; /*!< Bit-depth based shift for calculating filter strength */
+ int level; /*!< CDEF filtering level */
+ int sec_strength; /*!< CDEF secondary strength */
+ int cdef_count; /*!< Number of CDEF sub-blocks in superblock */
+ int is_zero_level; /*!< CDEF filtering level ON/OFF */
+ int dir[CDEF_NBLOCKS]
+ [CDEF_NBLOCKS]; /*!< CDEF filter direction for all 8x8 sub-blocks*/
+ int var[CDEF_NBLOCKS][CDEF_NBLOCKS]; /*!< variance for all 8x8 sub-blocks */
+
+ int dst_stride; /*!< CDEF destination buffer stride */
+ int coffset; /*!< current superblock offset in a row */
+ int roffset; /*!< current row offset */
+} CdefBlockInfo;
+
static INLINE int sign(int i) { return i < 0 ? -1 : 1; }
static INLINE int constrain(int diff, int threshold, int damping) {
@@ -41,19 +75,36 @@ int av1_cdef_compute_sb_list(const CommonModeInfoParams *const mi_params,
int mi_row, int mi_col, cdef_list *dlist,
BLOCK_SIZE bsize);
+typedef void (*cdef_init_fb_row_t)(
+ const AV1_COMMON *const cm, const MACROBLOCKD *const xd,
+ CdefBlockInfo *const fb_info, uint16_t **const linebuf, uint16_t *const src,
+ struct AV1CdefSyncData *const cdef_sync, int fbr);
+
/*!\brief Function for applying CDEF to a frame
*
* \ingroup in_loop_cdef
* This function applies CDEF to a frame.
*
- * \param[in, out] frame Compressed frame buffer
- * \param[in, out] cm Pointer to top level common structure
- * \param[in] xd Pointer to common current coding block structure
+ * \param[in, out] frame Compressed frame buffer
+ * \param[in, out] cm Pointer to top level common structure
+ * \param[in] xd Pointer to common current coding block structure
+ * \param[in] cdef_init_fb_row_fn Function Pointer
*
* \return Nothing is returned. Instead, the filtered frame is output in
* \c frame.
*/
-void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, MACROBLOCKD *xd);
+void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *const cm,
+ MACROBLOCKD *xd, cdef_init_fb_row_t cdef_init_fb_row_fn);
+void av1_cdef_fb_row(const AV1_COMMON *const cm, MACROBLOCKD *xd,
+ uint16_t **const linebuf, uint16_t **const colbuf,
+ uint16_t *const src, int fbr,
+ cdef_init_fb_row_t cdef_init_fb_row_fn,
+ struct AV1CdefSyncData *const cdef_sync);
+void av1_cdef_init_fb_row(const AV1_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ CdefBlockInfo *const fb_info,
+ uint16_t **const linebuf, uint16_t *const src,
+ struct AV1CdefSyncData *const cdef_sync, int fbr);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/libaom/source/libaom/av1/common/cdef_block.h b/third_party/libaom/source/libaom/av1/common/cdef_block.h
index 6b0ae0a9db..574df2d0de 100644
--- a/third_party/libaom/source/libaom/av1/common/cdef_block.h
+++ b/third_party/libaom/source/libaom/av1/common/cdef_block.h
@@ -19,8 +19,8 @@
#define CDEF_NBLOCKS ((1 << MAX_SB_SIZE_LOG2) / 8)
#define CDEF_SB_SHIFT (MAX_SB_SIZE_LOG2 - CDEF_BLOCKSIZE_LOG2)
-/* We need to buffer three vertical lines. */
-#define CDEF_VBORDER (3)
+/* We need to buffer two vertical lines. */
+#define CDEF_VBORDER (2)
/* We only need to buffer three horizontal pixels too, but let's align to
16 bytes (8 x 16 bits) to make vectorization easier. */
#define CDEF_HBORDER (8)
diff --git a/third_party/libaom/source/libaom/av1/common/cfl.h b/third_party/libaom/source/libaom/av1/common/cfl.h
index 0062e9f7ba..0d53764f28 100644
--- a/third_party/libaom/source/libaom/av1/common/cfl.h
+++ b/third_party/libaom/source/libaom/av1/common/cfl.h
@@ -39,7 +39,7 @@ static INLINE CFL_ALLOWED_TYPE store_cfl_required(const AV1_COMMON *cm,
const MACROBLOCKD *xd) {
const MB_MODE_INFO *mbmi = xd->mi[0];
- if (cm->seq_params.monochrome) return CFL_DISALLOWED;
+ if (cm->seq_params->monochrome) return CFL_DISALLOWED;
if (!xd->is_chroma_ref) {
// For non-chroma-reference blocks, we should always store the luma pixels,
diff --git a/third_party/libaom/source/libaom/av1/common/common.h b/third_party/libaom/source/libaom/av1/common/common.h
index bed6083db2..cc2da98a16 100644
--- a/third_party/libaom/source/libaom/av1/common/common.h
+++ b/third_party/libaom/source/libaom/av1/common/common.h
@@ -50,7 +50,7 @@ static INLINE int get_unsigned_bits(unsigned int num_values) {
}
#define CHECK_MEM_ERROR(cm, lval, expr) \
- AOM_CHECK_MEM_ERROR(&cm->error, lval, expr)
+ AOM_CHECK_MEM_ERROR(cm->error, lval, expr)
#define AOM_FRAME_MARKER 0x2
diff --git a/third_party/libaom/source/libaom/av1/common/common_data.h b/third_party/libaom/source/libaom/av1/common/common_data.h
index 402845cafe..38e14714c0 100644
--- a/third_party/libaom/source/libaom/av1/common/common_data.h
+++ b/third_party/libaom/source/libaom/av1/common/common_data.h
@@ -434,9 +434,12 @@ static const int intra_mode_context[INTRA_MODES] = {
static const int quant_dist_weight[4][2] = {
{ 2, 3 }, { 2, 5 }, { 2, 7 }, { 1, MAX_FRAME_DISTANCE }
};
-static const int quant_dist_lookup_table[2][4][2] = {
- { { 9, 7 }, { 11, 5 }, { 12, 4 }, { 13, 3 } },
- { { 7, 9 }, { 5, 11 }, { 4, 12 }, { 3, 13 } },
+
+static const int quant_dist_lookup_table[4][2] = {
+ { 9, 7 },
+ { 11, 5 },
+ { 12, 4 },
+ { 13, 3 },
};
#ifdef __cplusplus
diff --git a/third_party/libaom/source/libaom/av1/common/enums.h b/third_party/libaom/source/libaom/av1/common/enums.h
index 9c2976b08d..0e1e744daf 100644
--- a/third_party/libaom/source/libaom/av1/common/enums.h
+++ b/third_party/libaom/source/libaom/av1/common/enums.h
@@ -321,6 +321,7 @@ enum { PLANE_TYPE_Y, PLANE_TYPE_UV, PLANE_TYPES } UENUM1BYTE(PLANE_TYPE);
#define CFL_ALPHABET_SIZE_LOG2 4
#define CFL_ALPHABET_SIZE (1 << CFL_ALPHABET_SIZE_LOG2)
#define CFL_MAGS_SIZE ((2 << CFL_ALPHABET_SIZE_LOG2) + 1)
+#define CFL_INDEX_ZERO CFL_ALPHABET_SIZE
#define CFL_IDX_U(idx) (idx >> CFL_ALPHABET_SIZE_LOG2)
#define CFL_IDX_V(idx) (idx & (CFL_ALPHABET_SIZE - 1))
@@ -451,6 +452,14 @@ enum {
UV_MODE_INVALID, // For uv_mode in inter blocks
} UENUM1BYTE(UV_PREDICTION_MODE);
+// Number of top model rd to store for pruning y modes in intra mode decision
+#define TOP_INTRA_MODEL_COUNT 4
+// Total number of luma intra prediction modes (include both directional and
+// non-directional modes)
+// 61 = PAETH_PRED - DC_PRED + 1 + 6 * 8
+// Because there are 8 directional modes, each has additional 6 delta angles.
+#define LUMA_MODE_COUNT 61
+
enum {
SIMPLE_TRANSLATION,
OBMC_CAUSAL, // 2-sided OBMC
diff --git a/third_party/libaom/source/libaom/av1/common/loopfiltermask.c b/third_party/libaom/source/libaom/av1/common/loopfiltermask.c
index 1ae0b112ce..22ab0adf2c 100644
--- a/third_party/libaom/source/libaom/av1/common/loopfiltermask.c
+++ b/third_party/libaom/source/libaom/av1/common/loopfiltermask.c
@@ -1002,11 +1002,11 @@ void av1_filter_block_plane_bitmask_vert(
}
#if CONFIG_AV1_HIGHBITDEPTH
- if (cm->seq_params.use_highbitdepth)
+ if (cm->seq_params->use_highbitdepth)
highbd_filter_selectively_vert_row2(
ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
- &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
+ &cm->lf_info, lfl, lfl2, (int)cm->seq_params->bit_depth);
else
filter_selectively_vert_row2(
ssx, dst->buf, dst->stride, pl, mask_16x16_0, mask_8x8_0, mask_4x4_0,
@@ -1075,10 +1075,11 @@ void av1_filter_block_plane_bitmask_horz(
mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
#if CONFIG_AV1_HIGHBITDEPTH
- if (cm->seq_params.use_highbitdepth)
- highbd_filter_selectively_horiz(
- CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, ssx, mask_16x16,
- mask_8x8, mask_4x4, &cm->lf_info, lfl, (int)cm->seq_params.bit_depth);
+ if (cm->seq_params->use_highbitdepth)
+ highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
+ dst->stride, pl, ssx, mask_16x16,
+ mask_8x8, mask_4x4, &cm->lf_info, lfl,
+ (int)cm->seq_params->bit_depth);
else
filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
mask_8x8, mask_4x4, &cm->lf_info, lfl);
@@ -1109,10 +1110,10 @@ void av1_filter_block_plane_ver(AV1_COMMON *const cm,
uint8_t *lfl2;
// filter two rows at a time
- for (r = 0; r < cm->seq_params.mib_size &&
+ for (r = 0; r < cm->seq_params->mib_size &&
((mi_row + r) << MI_SIZE_LOG2 < cm->height);
r += r_step) {
- for (c = 0; c < cm->seq_params.mib_size &&
+ for (c = 0; c < cm->seq_params->mib_size &&
((mi_col + c) << MI_SIZE_LOG2 < cm->width);
c += MI_SIZE_64X64) {
dst->buf += ((c << MI_SIZE_LOG2) >> ssx);
@@ -1159,11 +1160,11 @@ void av1_filter_block_plane_ver(AV1_COMMON *const cm,
uint64_t mask_4x4_1 = (mask_4x4 >> shift_next) & mask_cutoff;
#if CONFIG_AV1_HIGHBITDEPTH
- if (cm->seq_params.use_highbitdepth)
+ if (cm->seq_params->use_highbitdepth)
highbd_filter_selectively_vert_row2(
ssx, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, pl, mask_16x16_0,
mask_8x8_0, mask_4x4_0, mask_16x16_1, mask_8x8_1, mask_4x4_1,
- &cm->lf_info, lfl, lfl2, (int)cm->seq_params.bit_depth);
+ &cm->lf_info, lfl, lfl2, (int)cm->seq_params->bit_depth);
else
filter_selectively_vert_row2(ssx, dst->buf, dst->stride, pl,
mask_16x16_0, mask_8x8_0, mask_4x4_0,
@@ -1194,10 +1195,10 @@ void av1_filter_block_plane_hor(AV1_COMMON *const cm,
uint64_t mask_4x4 = 0;
uint8_t *lfl;
- for (r = 0; r < cm->seq_params.mib_size &&
+ for (r = 0; r < cm->seq_params->mib_size &&
((mi_row + r) << MI_SIZE_LOG2 < cm->height);
r += r_step) {
- for (c = 0; c < cm->seq_params.mib_size &&
+ for (c = 0; c < cm->seq_params->mib_size &&
((mi_col + c) << MI_SIZE_LOG2 < cm->width);
c += MI_SIZE_64X64) {
if (mi_row + r == 0) continue;
@@ -1235,11 +1236,11 @@ void av1_filter_block_plane_hor(AV1_COMMON *const cm,
mask_4x4 = (mask_4x4 >> shift) & mask_cutoff;
#if CONFIG_AV1_HIGHBITDEPTH
- if (cm->seq_params.use_highbitdepth)
+ if (cm->seq_params->use_highbitdepth)
highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf),
dst->stride, pl, ssx, mask_16x16,
mask_8x8, mask_4x4, &cm->lf_info, lfl,
- (int)cm->seq_params.bit_depth);
+ (int)cm->seq_params->bit_depth);
else
filter_selectively_horiz(dst->buf, dst->stride, pl, ssx, mask_16x16,
mask_8x8, mask_4x4, &cm->lf_info, lfl);
@@ -1260,9 +1261,11 @@ void av1_store_bitmask_vartx(AV1_COMMON *cm, int mi_row, int mi_col,
const TX_SIZE tx_size_y_vert = txsize_vert_map[tx_size];
const TX_SIZE tx_size_y_horz = txsize_horz_map[tx_size];
const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize(
- mbmi->bsize, cm->seq_params.subsampling_x, cm->seq_params.subsampling_y)];
+ mbmi->bsize, cm->seq_params->subsampling_x,
+ cm->seq_params->subsampling_y)];
const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize(
- mbmi->bsize, cm->seq_params.subsampling_x, cm->seq_params.subsampling_y)];
+ mbmi->bsize, cm->seq_params->subsampling_x,
+ cm->seq_params->subsampling_y)];
const int is_square_transform_size = tx_size <= TX_64X64;
int mask_id = 0;
int offset = 0;
@@ -1330,9 +1333,11 @@ void av1_store_bitmask_univariant_tx(AV1_COMMON *cm, int mi_row, int mi_col,
const TX_SIZE tx_size_y_vert = txsize_vert_map[mbmi->tx_size];
const TX_SIZE tx_size_y_horz = txsize_horz_map[mbmi->tx_size];
const TX_SIZE tx_size_uv_vert = txsize_vert_map[av1_get_max_uv_txsize(
- mbmi->bsize, cm->seq_params.subsampling_x, cm->seq_params.subsampling_y)];
+ mbmi->bsize, cm->seq_params->subsampling_x,
+ cm->seq_params->subsampling_y)];
const TX_SIZE tx_size_uv_horz = txsize_horz_map[av1_get_max_uv_txsize(
- mbmi->bsize, cm->seq_params.subsampling_x, cm->seq_params.subsampling_y)];
+ mbmi->bsize, cm->seq_params->subsampling_x,
+ cm->seq_params->subsampling_y)];
const int is_square_transform_size = mbmi->tx_size <= TX_64X64;
int mask_id = 0;
int offset = 0;
diff --git a/third_party/libaom/source/libaom/av1/common/mv.h b/third_party/libaom/source/libaom/av1/common/mv.h
index be539e8201..3203bf7278 100644
--- a/third_party/libaom/source/libaom/av1/common/mv.h
+++ b/third_party/libaom/source/libaom/av1/common/mv.h
@@ -12,6 +12,8 @@
#ifndef AOM_AV1_COMMON_MV_H_
#define AOM_AV1_COMMON_MV_H_
+#include <stdlib.h>
+
#include "av1/common/common.h"
#include "av1/common/common_data.h"
#include "aom_dsp/aom_filter.h"
diff --git a/third_party/libaom/source/libaom/av1/common/mvref_common.c b/third_party/libaom/source/libaom/av1/common/mvref_common.c
index 04e050a691..3431e7d6ad 100644
--- a/third_party/libaom/source/libaom/av1/common/mvref_common.c
+++ b/third_party/libaom/source/libaom/av1/common/mvref_common.c
@@ -258,7 +258,7 @@ static AOM_INLINE void scan_blk_mbmi(
static int has_top_right(const AV1_COMMON *cm, const MACROBLOCKD *xd,
int mi_row, int mi_col, int bs) {
- const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size];
+ const int sb_mi_size = mi_size_wide[cm->seq_params->sb_size];
const int mask_row = mi_row & (sb_mi_size - 1);
const int mask_col = mi_col & (sb_mi_size - 1);
@@ -347,7 +347,7 @@ static int add_tpl_ref_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
const int cur_frame_index = cm->cur_frame->order_hint;
const RefCntBuffer *const buf_0 = get_ref_frame_buf(cm, rf[0]);
const int frame0_index = buf_0->order_hint;
- const int cur_offset_0 = get_relative_dist(&cm->seq_params.order_hint_info,
+ const int cur_offset_0 = get_relative_dist(&cm->seq_params->order_hint_info,
cur_frame_index, frame0_index);
int idx;
const int allow_high_precision_mv = cm->features.allow_high_precision_mv;
@@ -380,7 +380,7 @@ static int add_tpl_ref_mv(const AV1_COMMON *cm, const MACROBLOCKD *xd,
// Process compound inter mode
const RefCntBuffer *const buf_1 = get_ref_frame_buf(cm, rf[1]);
const int frame1_index = buf_1->order_hint;
- const int cur_offset_1 = get_relative_dist(&cm->seq_params.order_hint_info,
+ const int cur_offset_1 = get_relative_dist(&cm->seq_params->order_hint_info,
cur_frame_index, frame1_index);
int_mv comp_refmv;
get_mv_projection(&comp_refmv.as_mv, prev_frame_mvs->mfmv0.as_mv,
@@ -838,7 +838,9 @@ void av1_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *nearest_mv,
void av1_setup_frame_buf_refs(AV1_COMMON *cm) {
cm->cur_frame->order_hint = cm->current_frame.order_hint;
cm->cur_frame->display_order_hint = cm->current_frame.display_order_hint;
-
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ cm->cur_frame->pyramid_level = cm->current_frame.pyramid_level;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
MV_REFERENCE_FRAME ref_frame;
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
@@ -854,10 +856,10 @@ void av1_setup_frame_sign_bias(AV1_COMMON *cm) {
MV_REFERENCE_FRAME ref_frame;
for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref_frame);
- if (cm->seq_params.order_hint_info.enable_order_hint && buf != NULL) {
+ if (cm->seq_params->order_hint_info.enable_order_hint && buf != NULL) {
const int ref_order_hint = buf->order_hint;
cm->ref_frame_sign_bias[ref_frame] =
- (get_relative_dist(&cm->seq_params.order_hint_info, ref_order_hint,
+ (get_relative_dist(&cm->seq_params->order_hint_info, ref_order_hint,
(int)cm->current_frame.order_hint) <= 0)
? 0
: 1;
@@ -930,10 +932,10 @@ static int motion_field_projection(AV1_COMMON *cm,
&start_frame_buf->ref_order_hints[0];
const int cur_order_hint = cm->cur_frame->order_hint;
int start_to_current_frame_offset = get_relative_dist(
- &cm->seq_params.order_hint_info, start_frame_order_hint, cur_order_hint);
+ &cm->seq_params->order_hint_info, start_frame_order_hint, cur_order_hint);
for (MV_REFERENCE_FRAME rf = LAST_FRAME; rf <= INTER_REFS_PER_FRAME; ++rf) {
- ref_offset[rf] = get_relative_dist(&cm->seq_params.order_hint_info,
+ ref_offset[rf] = get_relative_dist(&cm->seq_params->order_hint_info,
start_frame_order_hint,
ref_order_hints[rf - LAST_FRAME]);
}
@@ -981,7 +983,7 @@ static int motion_field_projection(AV1_COMMON *cm,
}
void av1_setup_motion_field(AV1_COMMON *cm) {
- const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info;
+ const OrderHintInfo *const order_hint_info = &cm->seq_params->order_hint_info;
memset(cm->ref_frame_side, 0, sizeof(cm->ref_frame_side));
if (!order_hint_info->enable_order_hint) return;
@@ -1219,7 +1221,7 @@ uint8_t av1_findSamples(const AV1_COMMON *cm, MACROBLOCKD *xd, int *pts,
}
void av1_setup_skip_mode_allowed(AV1_COMMON *cm) {
- const OrderHintInfo *const order_hint_info = &cm->seq_params.order_hint_info;
+ const OrderHintInfo *const order_hint_info = &cm->seq_params->order_hint_info;
SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
skip_mode_info->skip_mode_allowed = 0;
@@ -1323,11 +1325,11 @@ void av1_set_frame_refs(AV1_COMMON *const cm, int *remapped_ref_idx,
int lst_frame_sort_idx = -1;
int gld_frame_sort_idx = -1;
- assert(cm->seq_params.order_hint_info.enable_order_hint);
- assert(cm->seq_params.order_hint_info.order_hint_bits_minus_1 >= 0);
+ assert(cm->seq_params->order_hint_info.enable_order_hint);
+ assert(cm->seq_params->order_hint_info.order_hint_bits_minus_1 >= 0);
const int cur_order_hint = (int)cm->current_frame.order_hint;
const int cur_frame_sort_idx =
- 1 << cm->seq_params.order_hint_info.order_hint_bits_minus_1;
+ 1 << cm->seq_params->order_hint_info.order_hint_bits_minus_1;
REF_FRAME_INFO ref_frame_info[REF_FRAMES];
int ref_flag_list[INTER_REFS_PER_FRAME] = { 0, 0, 0, 0, 0, 0, 0 };
@@ -1349,7 +1351,7 @@ void av1_set_frame_refs(AV1_COMMON *const cm, int *remapped_ref_idx,
ref_frame_info[i].sort_idx =
(offset == -1) ? -1
: cur_frame_sort_idx +
- get_relative_dist(&cm->seq_params.order_hint_info,
+ get_relative_dist(&cm->seq_params->order_hint_info,
offset, cur_order_hint);
assert(ref_frame_info[i].sort_idx >= -1);
@@ -1360,11 +1362,11 @@ void av1_set_frame_refs(AV1_COMMON *const cm, int *remapped_ref_idx,
// Confirm both LAST_FRAME and GOLDEN_FRAME are valid forward reference
// frames.
if (lst_frame_sort_idx == -1 || lst_frame_sort_idx >= cur_frame_sort_idx) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(cm->error, AOM_CODEC_CORRUPT_FRAME,
"Inter frame requests a look-ahead frame as LAST");
}
if (gld_frame_sort_idx == -1 || gld_frame_sort_idx >= cur_frame_sort_idx) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(cm->error, AOM_CODEC_CORRUPT_FRAME,
"Inter frame requests a look-ahead frame as GOLDEN");
}
diff --git a/third_party/libaom/source/libaom/av1/common/pred_common.h b/third_party/libaom/source/libaom/av1/common/pred_common.h
index 12bcce84f2..3db9dd69ef 100644
--- a/third_party/libaom/source/libaom/av1/common/pred_common.h
+++ b/third_party/libaom/source/libaom/av1/common/pred_common.h
@@ -107,9 +107,9 @@ static INLINE int get_comp_index_context(const AV1_COMMON *cm,
if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
- int fwd = abs(get_relative_dist(&cm->seq_params.order_hint_info,
+ int fwd = abs(get_relative_dist(&cm->seq_params->order_hint_info,
fwd_frame_index, cur_frame_index));
- int bck = abs(get_relative_dist(&cm->seq_params.order_hint_info,
+ int bck = abs(get_relative_dist(&cm->seq_params->order_hint_info,
cur_frame_index, bck_frame_index));
const MB_MODE_INFO *const above_mi = xd->above_mbmi;
diff --git a/third_party/libaom/source/libaom/av1/common/reconinter.c b/third_party/libaom/source/libaom/av1/common/reconinter.c
index ad155b26ae..70f4c6d5ee 100644
--- a/third_party/libaom/source/libaom/av1/common/reconinter.c
+++ b/third_party/libaom/source/libaom/av1/common/reconinter.c
@@ -713,8 +713,8 @@ void av1_build_one_inter_predictor(
}
void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
- const MB_MODE_INFO *mbmi, int order_idx,
- int *fwd_offset, int *bck_offset,
+ const MB_MODE_INFO *mbmi, int *fwd_offset,
+ int *bck_offset,
int *use_dist_wtd_comp_avg,
int is_compound) {
assert(fwd_offset != NULL && bck_offset != NULL);
@@ -734,18 +734,18 @@ void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
- int d0 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info,
+ int d0 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
fwd_frame_index, cur_frame_index)),
0, MAX_FRAME_DISTANCE);
- int d1 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info,
+ int d1 = clamp(abs(get_relative_dist(&cm->seq_params->order_hint_info,
cur_frame_index, bck_frame_index)),
0, MAX_FRAME_DISTANCE);
const int order = d0 <= d1;
if (d0 == 0 || d1 == 0) {
- *fwd_offset = quant_dist_lookup_table[order_idx][3][order];
- *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order];
+ *fwd_offset = quant_dist_lookup_table[3][order];
+ *bck_offset = quant_dist_lookup_table[3][1 - order];
return;
}
@@ -758,8 +758,8 @@ void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
}
- *fwd_offset = quant_dist_lookup_table[order_idx][i][order];
- *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order];
+ *fwd_offset = quant_dist_lookup_table[i][order];
+ *bck_offset = quant_dist_lookup_table[i][1 - order];
}
// True if the following hold:
@@ -911,7 +911,7 @@ static void build_inter_predictors_8x8_and_bigger(
ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
av1_dist_wtd_comp_weight_assign(
- cm, mi, 0, &inter_pred_params.conv_params.fwd_offset,
+ cm, mi, &inter_pred_params.conv_params.fwd_offset,
&inter_pred_params.conv_params.bck_offset,
&inter_pred_params.conv_params.use_dist_wtd_comp_avg, is_compound);
@@ -1189,7 +1189,6 @@ void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
uint8_t **dst_buf2) {
-#if CONFIG_AV1_HIGHBITDEPTH
if (is_cur_buf_hbd(xd)) {
int len = sizeof(uint16_t);
dst_buf1[0] = CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[0]);
@@ -1203,16 +1202,13 @@ void av1_setup_obmc_dst_bufs(MACROBLOCKD *xd, uint8_t **dst_buf1,
dst_buf2[2] =
CONVERT_TO_BYTEPTR(xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2 * len);
} else {
-#endif // CONFIG_AV1_HIGHBITDEPTH
dst_buf1[0] = xd->tmp_obmc_bufs[0];
dst_buf1[1] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE;
dst_buf1[2] = xd->tmp_obmc_bufs[0] + MAX_SB_SQUARE * 2;
dst_buf2[0] = xd->tmp_obmc_bufs[1];
dst_buf2[1] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE;
dst_buf2[2] = xd->tmp_obmc_bufs[1] + MAX_SB_SQUARE * 2;
-#if CONFIG_AV1_HIGHBITDEPTH
}
-#endif // CONFIG_AV1_HIGHBITDEPTH
}
void av1_setup_build_prediction_by_above_pred(
@@ -1363,10 +1359,12 @@ void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
assert(xd->mi[0]->use_intrabc == 0);
+ const SequenceHeader *seq_params = cm->seq_params;
- av1_predict_intra_block(cm, xd, pd->width, pd->height,
- max_txsize_rect_lookup[plane_bsize], mode, 0, 0,
- FILTER_INTRA_MODES, ctx->plane[plane],
+ av1_predict_intra_block(xd, seq_params->sb_size,
+ seq_params->enable_intra_edge_filter, pd->width,
+ pd->height, max_txsize_rect_lookup[plane_bsize], mode,
+ 0, 0, FILTER_INTRA_MODES, ctx->plane[plane],
ctx->stride[plane], dst, dst_stride, 0, 0, plane);
}
diff --git a/third_party/libaom/source/libaom/av1/common/reconinter.h b/third_party/libaom/source/libaom/av1/common/reconinter.h
index c8696160b6..056dc67d07 100644
--- a/third_party/libaom/source/libaom/av1/common/reconinter.h
+++ b/third_party/libaom/source/libaom/av1/common/reconinter.h
@@ -368,8 +368,8 @@ static INLINE const uint8_t *av1_get_contiguous_soft_mask(int8_t wedge_index,
}
void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
- const MB_MODE_INFO *mbmi, int order_idx,
- int *fwd_offset, int *bck_offset,
+ const MB_MODE_INFO *mbmi, int *fwd_offset,
+ int *bck_offset,
int *use_dist_wtd_comp_avg,
int is_compound);
diff --git a/third_party/libaom/source/libaom/av1/common/reconintra.c b/third_party/libaom/source/libaom/av1/common/reconintra.c
index 0c01f92183..51b01786ea 100644
--- a/third_party/libaom/source/libaom/av1/common/reconintra.c
+++ b/third_party/libaom/source/libaom/av1/common/reconintra.c
@@ -193,7 +193,7 @@ static const uint8_t *get_has_tr_table(PARTITION_TYPE partition,
return ret;
}
-static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
+static int has_top_right(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
int mi_col, int top_available, int right_available,
PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
int col_off, int ss_x, int ss_y) {
@@ -223,7 +223,7 @@ static int has_top_right(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
const int bh_in_mi_log2 = mi_size_high_log2[bsize];
- const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
+ const int sb_mi_size = mi_size_high[sb_size];
const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
@@ -378,7 +378,7 @@ static const uint8_t *get_has_bl_table(PARTITION_TYPE partition,
return ret;
}
-static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
+static int has_bottom_left(BLOCK_SIZE sb_size, BLOCK_SIZE bsize, int mi_row,
int mi_col, int bottom_available, int left_available,
PARTITION_TYPE partition, TX_SIZE txsz, int row_off,
int col_off, int ss_x, int ss_y) {
@@ -415,7 +415,7 @@ static int has_bottom_left(const AV1_COMMON *cm, BLOCK_SIZE bsize, int mi_row,
const int bw_in_mi_log2 = mi_size_wide_log2[bsize];
const int bh_in_mi_log2 = mi_size_high_log2[bsize];
- const int sb_mi_size = mi_size_high[cm->seq_params.sb_size];
+ const int sb_mi_size = mi_size_high[sb_size];
const int blk_row_in_sb = (mi_row & (sb_mi_size - 1)) >> bh_in_mi_log2;
const int blk_col_in_sb = (mi_col & (sb_mi_size - 1)) >> bw_in_mi_log2;
@@ -971,7 +971,7 @@ static int is_smooth(const MB_MODE_INFO *mbmi, int plane) {
}
}
-static int get_filt_type(const MACROBLOCKD *xd, int plane) {
+static int get_intra_edge_filter_type(const MACROBLOCKD *xd, int plane) {
int ab_sm, le_sm;
if (plane == 0) {
@@ -1144,11 +1144,11 @@ void av1_upsample_intra_edge_high_c(uint16_t *p, int sz, int bd) {
}
#if CONFIG_AV1_HIGHBITDEPTH
static void build_intra_predictors_high(
- const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8,
- int dst_stride, PREDICTION_MODE mode, int angle_delta,
- FILTER_INTRA_MODE filter_intra_mode, TX_SIZE tx_size,
- int disable_edge_filter, int n_top_px, int n_topright_px, int n_left_px,
- int n_bottomleft_px, int plane) {
+ const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride,
+ PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
+ TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
+ int n_left_px, int n_bottomleft_px, int intra_edge_filter_type,
+ int bit_depth) {
int i;
uint16_t *dst = CONVERT_TO_SHORTPTR(dst8);
uint16_t *ref = CONVERT_TO_SHORTPTR(ref8);
@@ -1166,7 +1166,7 @@ static void build_intra_predictors_high(
int p_angle = 0;
const int is_dr_mode = av1_is_directional_mode(mode);
const int use_filter_intra = filter_intra_mode != FILTER_INTRA_MODES;
- int base = 128 << (xd->bd - 8);
+ int base = 128 << (bit_depth - 8);
// The left_data, above_data buffers must be zeroed to fix some intermittent
// valgrind errors. Uninitialized reads in intra pred modules (e.g. width = 4
// path in av1_highbd_dr_prediction_z2_avx2()) from left_data, above_data are
@@ -1270,7 +1270,7 @@ static void build_intra_predictors_high(
if (use_filter_intra) {
highbd_filter_intra_predictor(dst, dst_stride, tx_size, above_row, left_col,
- filter_intra_mode, xd->bd);
+ filter_intra_mode, bit_depth);
return;
}
@@ -1280,61 +1280,57 @@ static void build_intra_predictors_high(
if (!disable_edge_filter) {
const int need_right = p_angle < 90;
const int need_bottom = p_angle > 180;
- const int filt_type = get_filt_type(xd, plane);
if (p_angle != 90 && p_angle != 180) {
const int ab_le = need_above_left ? 1 : 0;
if (need_above && need_left && (txwpx + txhpx >= 24)) {
filter_intra_edge_corner_high(above_row, left_col);
}
if (need_above && n_top_px > 0) {
- const int strength =
- intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
+ const int strength = intra_edge_filter_strength(
+ txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
av1_filter_intra_edge_high(above_row - ab_le, n_px, strength);
}
if (need_left && n_left_px > 0) {
const int strength = intra_edge_filter_strength(
- txhpx, txwpx, p_angle - 180, filt_type);
+ txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
av1_filter_intra_edge_high(left_col - ab_le, n_px, strength);
}
}
- upsample_above =
- av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
+ upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
+ intra_edge_filter_type);
if (need_above && upsample_above) {
const int n_px = txwpx + (need_right ? txhpx : 0);
- av1_upsample_intra_edge_high(above_row, n_px, xd->bd);
+ av1_upsample_intra_edge_high(above_row, n_px, bit_depth);
}
- upsample_left =
- av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
+ upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
+ intra_edge_filter_type);
if (need_left && upsample_left) {
const int n_px = txhpx + (need_bottom ? txwpx : 0);
- av1_upsample_intra_edge_high(left_col, n_px, xd->bd);
+ av1_upsample_intra_edge_high(left_col, n_px, bit_depth);
}
}
highbd_dr_predictor(dst, dst_stride, tx_size, above_row, left_col,
- upsample_above, upsample_left, p_angle, xd->bd);
+ upsample_above, upsample_left, p_angle, bit_depth);
return;
}
// predict
if (mode == DC_PRED) {
dc_pred_high[n_left_px > 0][n_top_px > 0][tx_size](
- dst, dst_stride, above_row, left_col, xd->bd);
+ dst, dst_stride, above_row, left_col, bit_depth);
} else {
- pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, xd->bd);
+ pred_high[mode][tx_size](dst, dst_stride, above_row, left_col, bit_depth);
}
}
#endif // CONFIG_AV1_HIGHBITDEPTH
-static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
- int ref_stride, uint8_t *dst, int dst_stride,
- PREDICTION_MODE mode, int angle_delta,
- FILTER_INTRA_MODE filter_intra_mode,
- TX_SIZE tx_size, int disable_edge_filter,
- int n_top_px, int n_topright_px,
- int n_left_px, int n_bottomleft_px,
- int plane) {
+static void build_intra_predictors(
+ const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride,
+ PREDICTION_MODE mode, int angle_delta, FILTER_INTRA_MODE filter_intra_mode,
+ TX_SIZE tx_size, int disable_edge_filter, int n_top_px, int n_topright_px,
+ int n_left_px, int n_bottomleft_px, int intra_edge_filter_type) {
int i;
const uint8_t *above_ref = ref - ref_stride;
const uint8_t *left_ref = ref - 1;
@@ -1462,33 +1458,32 @@ static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref,
if (!disable_edge_filter) {
const int need_right = p_angle < 90;
const int need_bottom = p_angle > 180;
- const int filt_type = get_filt_type(xd, plane);
if (p_angle != 90 && p_angle != 180) {
const int ab_le = need_above_left ? 1 : 0;
if (need_above && need_left && (txwpx + txhpx >= 24)) {
filter_intra_edge_corner(above_row, left_col);
}
if (need_above && n_top_px > 0) {
- const int strength =
- intra_edge_filter_strength(txwpx, txhpx, p_angle - 90, filt_type);
+ const int strength = intra_edge_filter_strength(
+ txwpx, txhpx, p_angle - 90, intra_edge_filter_type);
const int n_px = n_top_px + ab_le + (need_right ? txhpx : 0);
av1_filter_intra_edge(above_row - ab_le, n_px, strength);
}
if (need_left && n_left_px > 0) {
const int strength = intra_edge_filter_strength(
- txhpx, txwpx, p_angle - 180, filt_type);
+ txhpx, txwpx, p_angle - 180, intra_edge_filter_type);
const int n_px = n_left_px + ab_le + (need_bottom ? txwpx : 0);
av1_filter_intra_edge(left_col - ab_le, n_px, strength);
}
}
- upsample_above =
- av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90, filt_type);
+ upsample_above = av1_use_intra_edge_upsample(txwpx, txhpx, p_angle - 90,
+ intra_edge_filter_type);
if (need_above && upsample_above) {
const int n_px = txwpx + (need_right ? txhpx : 0);
av1_upsample_intra_edge(above_row, n_px);
}
- upsample_left =
- av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180, filt_type);
+ upsample_left = av1_use_intra_edge_upsample(txhpx, txwpx, p_angle - 180,
+ intra_edge_filter_type);
if (need_left && upsample_left) {
const int n_px = txhpx + (need_bottom ? txwpx : 0);
av1_upsample_intra_edge(left_col, n_px);
@@ -1559,11 +1554,14 @@ static INLINE BLOCK_SIZE scale_chroma_bsize(BLOCK_SIZE bsize, int subsampling_x,
return bs;
}
-void av1_predict_intra_block(
- const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx,
- TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
- FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride,
- uint8_t *dst, int dst_stride, int col_off, int row_off, int plane) {
+void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
+ int enable_intra_edge_filter, int wpx, int hpx,
+ TX_SIZE tx_size, PREDICTION_MODE mode,
+ int angle_delta, int use_palette,
+ FILTER_INTRA_MODE filter_intra_mode,
+ const uint8_t *ref, int ref_stride, uint8_t *dst,
+ int dst_stride, int col_off, int row_off,
+ int plane) {
const MB_MODE_INFO *const mbmi = xd->mi[0];
const int txwpx = tx_size_wide[tx_size];
const int txhpx = tx_size_high[tx_size];
@@ -1626,32 +1624,32 @@ void av1_predict_intra_block(
}
const int have_top_right =
- has_top_right(cm, bsize, mi_row, mi_col, have_top, right_available,
+ has_top_right(sb_size, bsize, mi_row, mi_col, have_top, right_available,
partition, tx_size, row_off, col_off, ss_x, ss_y);
- const int have_bottom_left =
- has_bottom_left(cm, bsize, mi_row, mi_col, bottom_available, have_left,
- partition, tx_size, row_off, col_off, ss_x, ss_y);
+ const int have_bottom_left = has_bottom_left(
+ sb_size, bsize, mi_row, mi_col, bottom_available, have_left, partition,
+ tx_size, row_off, col_off, ss_x, ss_y);
- const int disable_edge_filter = !cm->seq_params.enable_intra_edge_filter;
+ const int disable_edge_filter = !enable_intra_edge_filter;
+ const int intra_edge_filter_type = get_intra_edge_filter_type(xd, plane);
#if CONFIG_AV1_HIGHBITDEPTH
if (is_cur_buf_hbd(xd)) {
build_intra_predictors_high(
- xd, ref, ref_stride, dst, dst_stride, mode, angle_delta,
- filter_intra_mode, tx_size, disable_edge_filter,
- have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
+ ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
+ tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
have_top_right ? AOMMIN(txwpx, xr) : 0,
have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
- have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
+ have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type,
+ xd->bd);
return;
}
#endif
- build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode,
- angle_delta, filter_intra_mode, tx_size,
- disable_edge_filter,
- have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
- have_top_right ? AOMMIN(txwpx, xr) : 0,
- have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
- have_bottom_left ? AOMMIN(txhpx, yd) : 0, plane);
+ build_intra_predictors(
+ ref, ref_stride, dst, dst_stride, mode, angle_delta, filter_intra_mode,
+ tx_size, disable_edge_filter, have_top ? AOMMIN(txwpx, xr + txwpx) : 0,
+ have_top_right ? AOMMIN(txwpx, xr) : 0,
+ have_left ? AOMMIN(txhpx, yd + txhpx) : 0,
+ have_bottom_left ? AOMMIN(txhpx, yd) : 0, intra_edge_filter_type);
}
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
@@ -1669,6 +1667,7 @@ void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
? mbmi->filter_intra_mode_info.filter_intra_mode
: FILTER_INTRA_MODES;
const int angle_delta = mbmi->angle_delta[plane != AOM_PLANE_Y] * ANGLE_STEP;
+ const SequenceHeader *seq_params = cm->seq_params;
if (plane != AOM_PLANE_Y && mbmi->uv_mode == UV_CFL_PRED) {
#if CONFIG_DEBUG
@@ -1687,10 +1686,11 @@ void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
CFL_CTX *const cfl = &xd->cfl;
CFL_PRED_TYPE pred_plane = get_cfl_pred_type(plane);
if (cfl->dc_pred_is_cached[pred_plane] == 0) {
- av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
- angle_delta, use_palette, filter_intra_mode, dst,
- dst_stride, dst, dst_stride, blk_col, blk_row,
- plane);
+ av1_predict_intra_block(xd, seq_params->sb_size,
+ seq_params->enable_intra_edge_filter, pd->width,
+ pd->height, tx_size, mode, angle_delta,
+ use_palette, filter_intra_mode, dst, dst_stride,
+ dst, dst_stride, blk_col, blk_row, plane);
if (cfl->use_dc_pred_cache) {
cfl_store_dc_pred(xd, dst, pred_plane, tx_size_wide[tx_size]);
cfl->dc_pred_is_cached[pred_plane] = 1;
@@ -1701,9 +1701,10 @@ void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
cfl_predict_block(xd, dst, dst_stride, tx_size, plane);
return;
}
- av1_predict_intra_block(cm, xd, pd->width, pd->height, tx_size, mode,
- angle_delta, use_palette, filter_intra_mode, dst,
- dst_stride, dst, dst_stride, blk_col, blk_row, plane);
+ av1_predict_intra_block(
+ xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
+ pd->height, tx_size, mode, angle_delta, use_palette, filter_intra_mode,
+ dst, dst_stride, dst, dst_stride, blk_col, blk_row, plane);
}
void av1_init_intra_predictors(void) {
diff --git a/third_party/libaom/source/libaom/av1/common/reconintra.h b/third_party/libaom/source/libaom/av1/common/reconintra.h
index 907db5daf8..fa66ccd541 100644
--- a/third_party/libaom/source/libaom/av1/common/reconintra.h
+++ b/third_party/libaom/source/libaom/av1/common/reconintra.h
@@ -26,11 +26,14 @@ void av1_init_intra_predictors(void);
void av1_predict_intra_block_facade(const AV1_COMMON *cm, MACROBLOCKD *xd,
int plane, int blk_col, int blk_row,
TX_SIZE tx_size);
-void av1_predict_intra_block(
- const AV1_COMMON *cm, const MACROBLOCKD *xd, int wpx, int hpx,
- TX_SIZE tx_size, PREDICTION_MODE mode, int angle_delta, int use_palette,
- FILTER_INTRA_MODE filter_intra_mode, const uint8_t *ref, int ref_stride,
- uint8_t *dst, int dst_stride, int col_off, int row_off, int plane);
+void av1_predict_intra_block(const MACROBLOCKD *xd, BLOCK_SIZE sb_size,
+ int enable_intra_edge_filter, int wpx, int hpx,
+ TX_SIZE tx_size, PREDICTION_MODE mode,
+ int angle_delta, int use_palette,
+ FILTER_INTRA_MODE filter_intra_mode,
+ const uint8_t *ref, int ref_stride, uint8_t *dst,
+ int dst_stride, int col_off, int row_off,
+ int plane);
// Mapping of interintra to intra mode for use in the intra component
static const PREDICTION_MODE interintra_to_intra_mode[INTERINTRA_MODES] = {
@@ -64,7 +67,7 @@ static INLINE int av1_allow_intrabc(const AV1_COMMON *const cm) {
static INLINE int av1_filter_intra_allowed_bsize(const AV1_COMMON *const cm,
BLOCK_SIZE bs) {
- if (!cm->seq_params.enable_filter_intra || bs == BLOCK_INVALID) return 0;
+ if (!cm->seq_params->enable_filter_intra || bs == BLOCK_INVALID) return 0;
return block_size_wide[bs] <= 32 && block_size_high[bs] <= 32;
}
diff --git a/third_party/libaom/source/libaom/av1/common/resize.c b/third_party/libaom/source/libaom/av1/common/resize.c
index 0cfb5a29b8..112a08a539 100644
--- a/third_party/libaom/source/libaom/av1/common/resize.c
+++ b/third_party/libaom/source/libaom/av1/common/resize.c
@@ -1263,7 +1263,7 @@ void av1_upscale_normative_rows(const AV1_COMMON *cm, const uint8_t *src,
int src_stride, uint8_t *dst, int dst_stride,
int plane, int rows) {
const int is_uv = (plane > 0);
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
+ const int ss_x = is_uv && cm->seq_params->subsampling_x;
const int downscaled_plane_width = ROUND_POWER_OF_TWO(cm->width, ss_x);
const int upscaled_plane_width =
ROUND_POWER_OF_TWO(cm->superres_upscaled_width, ss_x);
@@ -1305,11 +1305,11 @@ void av1_upscale_normative_rows(const AV1_COMMON *cm, const uint8_t *src,
const int pad_right = (j == cm->tiles.cols - 1);
#if CONFIG_AV1_HIGHBITDEPTH
- if (cm->seq_params.use_highbitdepth)
+ if (cm->seq_params->use_highbitdepth)
highbd_upscale_normative_rect(src_ptr, rows, src_width, src_stride,
dst_ptr, rows, dst_width, dst_stride,
x_step_qn, x0_qn, pad_left, pad_right,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
else
upscale_normative_rect(src_ptr, rows, src_width, src_stride, dst_ptr,
rows, dst_width, dst_stride, x_step_qn, x0_qn,
@@ -1354,18 +1354,18 @@ YV12_BUFFER_CONFIG *av1_scale_if_required(
if (scaling_required) {
const int num_planes = av1_num_planes(cm);
#if CONFIG_AV1_HIGHBITDEPTH
- if (use_optimized_scaler && cm->seq_params.bit_depth == AOM_BITS_8) {
+ if (use_optimized_scaler && cm->seq_params->bit_depth == AOM_BITS_8) {
av1_resize_and_extend_frame(unscaled, scaled, filter, phase, num_planes);
} else {
av1_resize_and_extend_frame_nonnormative(
- unscaled, scaled, (int)cm->seq_params.bit_depth, num_planes);
+ unscaled, scaled, (int)cm->seq_params->bit_depth, num_planes);
}
#else
if (use_optimized_scaler) {
av1_resize_and_extend_frame(unscaled, scaled, filter, phase, num_planes);
} else {
av1_resize_and_extend_frame_nonnormative(
- unscaled, scaled, (int)cm->seq_params.bit_depth, num_planes);
+ unscaled, scaled, (int)cm->seq_params->bit_depth, num_planes);
}
#endif
return scaled;
@@ -1432,7 +1432,7 @@ static void copy_buffer_config(const YV12_BUFFER_CONFIG *const src,
void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
const int num_planes = av1_num_planes(cm);
if (!av1_superres_scaled(cm)) return;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const int byte_alignment = cm->features.byte_alignment;
YV12_BUFFER_CONFIG copy_buffer;
@@ -1445,7 +1445,7 @@ void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
&copy_buffer, aligned_width, cm->height, seq_params->subsampling_x,
seq_params->subsampling_y, seq_params->use_highbitdepth,
AOM_BORDER_IN_PIXELS, byte_alignment))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate copy buffer for superres upscaling");
// Copy function assumes the frames are the same size.
@@ -1468,7 +1468,7 @@ void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
if (release_fb_cb(cb_priv, fb)) {
unlock_buffer_pool(pool);
aom_internal_error(
- &cm->error, AOM_CODEC_MEM_ERROR,
+ cm->error, AOM_CODEC_MEM_ERROR,
"Failed to free current frame buffer before superres upscaling");
}
// aom_realloc_frame_buffer() leaves config data for frame_to_show intact
@@ -1479,7 +1479,7 @@ void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
AOM_BORDER_IN_PIXELS, byte_alignment, fb, cb, cb_priv, 0)) {
unlock_buffer_pool(pool);
aom_internal_error(
- &cm->error, AOM_CODEC_MEM_ERROR,
+ cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate current frame buffer for superres upscaling");
}
unlock_buffer_pool(pool);
@@ -1495,7 +1495,7 @@ void av1_superres_upscale(AV1_COMMON *cm, BufferPool *const pool) {
seq_params->subsampling_y, seq_params->use_highbitdepth,
AOM_BORDER_IN_PIXELS, byte_alignment))
aom_internal_error(
- &cm->error, AOM_CODEC_MEM_ERROR,
+ cm->error, AOM_CODEC_MEM_ERROR,
"Failed to reallocate current frame buffer for superres upscaling");
// Restore config data back to frame_to_show
diff --git a/third_party/libaom/source/libaom/av1/common/restoration.c b/third_party/libaom/source/libaom/av1/common/restoration.c
index 41d0e22501..202953c889 100644
--- a/third_party/libaom/source/libaom/av1/common/restoration.c
+++ b/third_party/libaom/source/libaom/av1/common/restoration.c
@@ -42,8 +42,8 @@ const sgr_params_type av1_sgr_params[SGRPROJ_PARAMS] = {
AV1PixelRect av1_whole_frame_rect(const AV1_COMMON *cm, int is_uv) {
AV1PixelRect rect;
- int ss_x = is_uv && cm->seq_params.subsampling_x;
- int ss_y = is_uv && cm->seq_params.subsampling_y;
+ int ss_x = is_uv && cm->seq_params->subsampling_x;
+ int ss_y = is_uv && cm->seq_params->subsampling_y;
rect.top = 0;
rect.bottom = ROUND_POWER_OF_TWO(cm->height, ss_y);
@@ -1107,7 +1107,7 @@ void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
YV12_BUFFER_CONFIG *frame,
AV1_COMMON *cm, int optimized_lr,
int num_planes) {
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const int bit_depth = seq_params->bit_depth;
const int highbd = seq_params->use_highbitdepth;
lr_ctxt->dst = &cm->rst_frame;
@@ -1118,7 +1118,7 @@ void av1_loop_restoration_filter_frame_init(AV1LrStruct *lr_ctxt,
lr_ctxt->dst, frame_width, frame_height, seq_params->subsampling_x,
seq_params->subsampling_y, highbd, AOM_RESTORATION_FRAME_BORDER,
cm->features.byte_alignment, NULL, NULL, NULL, 0) < 0)
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate restoration dst buffer");
lr_ctxt->on_rest_unit = filter_frame_on_unit;
@@ -1299,7 +1299,7 @@ void av1_foreach_rest_unit_in_plane(const struct AV1Common *cm, int plane,
int32_t *tmpbuf,
RestorationLineBuffers *rlbs) {
const int is_uv = plane > 0;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
+ const int ss_y = is_uv && cm->seq_params->subsampling_y;
const RestorationInfo *rsi = &cm->rst_info[plane];
@@ -1315,7 +1315,7 @@ int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
int *rrow1) {
assert(rcol0 && rcol1 && rrow0 && rrow1);
- if (bsize != cm->seq_params.sb_size) return 0;
+ if (bsize != cm->seq_params->sb_size) return 0;
if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) return 0;
assert(!cm->features.all_lossless);
@@ -1345,8 +1345,8 @@ int av1_loop_restoration_corners_in_sb(const struct AV1Common *cm, int plane,
const int vert_units = av1_lr_count_units_in_tile(size, tile_h);
// The size of an MI-unit on this plane of the image
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
+ const int ss_x = is_uv && cm->seq_params->subsampling_x;
+ const int ss_y = is_uv && cm->seq_params->subsampling_y;
const int mi_size_x = MI_SIZE >> ss_x;
const int mi_size_y = MI_SIZE >> ss_y;
@@ -1427,7 +1427,7 @@ static void save_deblock_boundary_lines(
int upscaled_width;
int line_bytes;
if (av1_superres_scaled(cm)) {
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
+ const int ss_x = is_uv && cm->seq_params->subsampling_x;
upscaled_width = (cm->superres_upscaled_width + ss_x) >> ss_x;
line_bytes = upscaled_width << use_highbd;
if (use_highbd)
@@ -1474,7 +1474,7 @@ static void save_cdef_boundary_lines(const YV12_BUFFER_CONFIG *frame,
// At the point where this function is called, we've already applied
// superres. So we don't need to extend the lines here, we can just
// pull directly from the topmost row of the upscaled frame.
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
+ const int ss_x = is_uv && cm->seq_params->subsampling_x;
const int upscaled_width = av1_superres_scaled(cm)
? (cm->superres_upscaled_width + ss_x) >> ss_x
: src_width;
@@ -1494,7 +1494,7 @@ static void save_tile_row_boundary_lines(const YV12_BUFFER_CONFIG *frame,
int use_highbd, int plane,
AV1_COMMON *cm, int after_cdef) {
const int is_uv = plane > 0;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
+ const int ss_y = is_uv && cm->seq_params->subsampling_y;
const int stripe_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
const int stripe_off = RESTORATION_UNIT_OFFSET >> ss_y;
@@ -1559,7 +1559,7 @@ static void save_tile_row_boundary_lines(const YV12_BUFFER_CONFIG *frame,
void av1_loop_restoration_save_boundary_lines(const YV12_BUFFER_CONFIG *frame,
AV1_COMMON *cm, int after_cdef) {
const int num_planes = av1_num_planes(cm);
- const int use_highbd = cm->seq_params.use_highbitdepth;
+ const int use_highbd = cm->seq_params->use_highbitdepth;
for (int p = 0; p < num_planes; ++p) {
save_tile_row_boundary_lines(frame, use_highbd, p, cm, after_cdef);
}
diff --git a/third_party/libaom/source/libaom/av1/common/thread_common.c b/third_party/libaom/source/libaom/av1/common/thread_common.c
index 638dc4c951..0c45749de1 100644
--- a/third_party/libaom/source/libaom/av1/common/thread_common.c
+++ b/third_party/libaom/source/libaom/av1/common/thread_common.c
@@ -152,6 +152,61 @@ static void loop_filter_data_reset(LFWorkerData *lf_data,
}
}
+void av1_alloc_cdef_sync(AV1_COMMON *const cm, AV1CdefSync *cdef_sync,
+ int num_workers) {
+ if (num_workers < 1) return;
+#if CONFIG_MULTITHREAD
+ if (cdef_sync->mutex_ == NULL) {
+ CHECK_MEM_ERROR(cm, cdef_sync->mutex_,
+ aom_malloc(sizeof(*(cdef_sync->mutex_))));
+ if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
+ }
+#else
+ (void)cm;
+ (void)cdef_sync;
+#endif // CONFIG_MULTITHREAD
+}
+
+void av1_free_cdef_sync(AV1CdefSync *cdef_sync) {
+ if (cdef_sync == NULL) return;
+#if CONFIG_MULTITHREAD
+ if (cdef_sync->mutex_ != NULL) {
+ pthread_mutex_destroy(cdef_sync->mutex_);
+ aom_free(cdef_sync->mutex_);
+ }
+#endif // CONFIG_MULTITHREAD
+}
+
+static INLINE void cdef_row_mt_sync_read(AV1CdefSync *const cdef_sync,
+ int row) {
+ if (!row) return;
+#if CONFIG_MULTITHREAD
+ AV1CdefRowSync *const cdef_row_mt = cdef_sync->cdef_row_mt;
+ pthread_mutex_lock(cdef_row_mt[row - 1].row_mutex_);
+ while (cdef_row_mt[row - 1].is_row_done != 1)
+ pthread_cond_wait(cdef_row_mt[row - 1].row_cond_,
+ cdef_row_mt[row - 1].row_mutex_);
+ cdef_row_mt[row - 1].is_row_done = 0;
+ pthread_mutex_unlock(cdef_row_mt[row - 1].row_mutex_);
+#else
+ (void)cdef_sync;
+#endif // CONFIG_MULTITHREAD
+}
+
+static INLINE void cdef_row_mt_sync_write(AV1CdefSync *const cdef_sync,
+ int row) {
+#if CONFIG_MULTITHREAD
+ AV1CdefRowSync *const cdef_row_mt = cdef_sync->cdef_row_mt;
+ pthread_mutex_lock(cdef_row_mt[row].row_mutex_);
+ pthread_cond_signal(cdef_row_mt[row].row_cond_);
+ cdef_row_mt[row].is_row_done = 1;
+ pthread_mutex_unlock(cdef_row_mt[row].row_mutex_);
+#else
+ (void)cdef_sync;
+ (void)row;
+#endif // CONFIG_MULTITHREAD
+}
+
static INLINE void sync_read(AV1LfSync *const lf_sync, int r, int c,
int plane) {
#if CONFIG_MULTITHREAD
@@ -211,7 +266,7 @@ static void enqueue_lf_jobs(AV1LfSync *lf_sync, AV1_COMMON *cm, int start,
#if CONFIG_LPF_MASK
int is_decoding,
#endif
- int plane_start, int plane_end) {
+ int plane_start, int plane_end, int is_realtime) {
int mi_row, plane, dir;
AV1LfMTInfo *lf_job_queue = lf_sync->job_queue;
lf_sync->jobs_enqueued = 0;
@@ -238,6 +293,7 @@ static void enqueue_lf_jobs(AV1LfSync *lf_sync, AV1_COMMON *cm, int start,
lf_job_queue->mi_row = mi_row;
lf_job_queue->plane = plane;
lf_job_queue->dir = dir;
+ lf_job_queue->is_realtime = is_realtime;
lf_job_queue++;
lf_sync->jobs_enqueued++;
}
@@ -272,7 +328,7 @@ static INLINE void thread_loop_filter_rows(
const int sb_cols =
ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, MAX_MIB_SIZE_LOG2) >>
MAX_MIB_SIZE_LOG2;
- int mi_row, mi_col, plane, dir;
+ int mi_row, mi_col, plane, dir, is_realtime;
int r, c;
while (1) {
@@ -283,17 +339,29 @@ static INLINE void thread_loop_filter_rows(
plane = cur_job_info->plane;
dir = cur_job_info->dir;
r = mi_row >> MAX_MIB_SIZE_LOG2;
+ is_realtime = cur_job_info->is_realtime && !plane;
if (dir == 0) {
for (mi_col = 0; mi_col < cm->mi_params.mi_cols;
mi_col += MAX_MIB_SIZE) {
c = mi_col >> MAX_MIB_SIZE_LOG2;
- av1_setup_dst_planes(planes, cm->seq_params.sb_size, frame_buffer,
+ av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer,
mi_row, mi_col, plane, plane + 1);
-
+#if CONFIG_AV1_HIGHBITDEPTH
+ (void)is_realtime;
av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row,
mi_col);
+#else
+ if (is_realtime) {
+ av1_filter_block_plane_vert_rt(cm, xd, plane, &planes[plane],
+ mi_row, mi_col);
+
+ } else {
+ av1_filter_block_plane_vert(cm, xd, plane, &planes[plane], mi_row,
+ mi_col);
+ }
+#endif
sync_write(lf_sync, r, c, sb_cols, plane);
}
} else if (dir == 1) {
@@ -309,10 +377,21 @@ static INLINE void thread_loop_filter_rows(
// completed
sync_read(lf_sync, r + 1, c, plane);
- av1_setup_dst_planes(planes, cm->seq_params.sb_size, frame_buffer,
+ av1_setup_dst_planes(planes, cm->seq_params->sb_size, frame_buffer,
mi_row, mi_col, plane, plane + 1);
+#if CONFIG_AV1_HIGHBITDEPTH
+ (void)is_realtime;
av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row,
mi_col);
+#else
+ if (is_realtime) {
+ av1_filter_block_plane_horz_rt(cm, xd, plane, &planes[plane],
+ mi_row, mi_col);
+ } else {
+ av1_filter_block_plane_horz(cm, xd, plane, &planes[plane], mi_row,
+ mi_col);
+ }
+#endif
}
}
} else {
@@ -405,7 +484,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
int is_decoding,
#endif
AVxWorker *workers, int nworkers,
- AV1LfSync *lf_sync) {
+ AV1LfSync *lf_sync, int is_realtime) {
const AVxWorkerInterface *const winterface = aom_get_worker_interface();
#if CONFIG_LPF_MASK
int sb_rows;
@@ -441,7 +520,7 @@ static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
#if CONFIG_LPF_MASK
is_decoding,
#endif
- plane_start, plane_end);
+ plane_start, plane_end, is_realtime);
// Set up loopfilter thread data.
for (i = num_workers - 1; i >= 0; --i) {
@@ -484,7 +563,7 @@ void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
int is_decoding,
#endif
AVxWorker *workers, int num_workers,
- AV1LfSync *lf_sync) {
+ AV1LfSync *lf_sync, int is_realtime) {
int start_mi_row, end_mi_row, mi_rows_to_filter;
start_mi_row = 0;
@@ -512,7 +591,7 @@ void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
// TODO(chengchen): can we remove this?
struct macroblockd_plane *pd = xd->plane;
- av1_setup_dst_planes(pd, cm->seq_params.sb_size, frame, 0, 0, plane,
+ av1_setup_dst_planes(pd, cm->seq_params->sb_size, frame, 0, 0, plane,
plane + 1);
av1_build_bitmask_vert_info(cm, &pd[plane], plane);
@@ -526,7 +605,7 @@ void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
}
#else
loop_filter_rows_mt(frame, cm, xd, start_mi_row, end_mi_row, plane_start,
- plane_end, workers, num_workers, lf_sync);
+ plane_end, workers, num_workers, lf_sync, is_realtime);
#endif
}
@@ -720,7 +799,7 @@ static void enqueue_lr_jobs(AV1LrSync *lr_sync, AV1LrStruct *lr_ctxt,
for (int plane = 0; plane < num_planes; plane++) {
if (cm->rst_info[plane].frame_restoration_type == RESTORE_NONE) continue;
const int is_uv = plane > 0;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
+ const int ss_y = is_uv && cm->seq_params->subsampling_y;
AV1PixelRect tile_rect = ctxt[plane].tile_rect;
const int unit_size = ctxt[plane].rsi->restoration_unit_size;
@@ -932,3 +1011,198 @@ void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
cm);
}
#endif
+
+// Initializes cdef_sync parameters.
+static AOM_INLINE void reset_cdef_job_info(AV1CdefSync *const cdef_sync) {
+ cdef_sync->end_of_frame = 0;
+ cdef_sync->fbr = 0;
+ cdef_sync->fbc = 0;
+}
+
+static AOM_INLINE void launch_cdef_workers(AVxWorker *const workers,
+ int num_workers) {
+ const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+ for (int i = num_workers - 1; i >= 0; i--) {
+ AVxWorker *const worker = &workers[i];
+ if (i == 0)
+ winterface->execute(worker);
+ else
+ winterface->launch(worker);
+ }
+}
+
+static AOM_INLINE void sync_cdef_workers(AVxWorker *const workers,
+ AV1_COMMON *const cm,
+ int num_workers) {
+ const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+ int had_error = 0;
+
+ // Wait for completion of Cdef frame.
+ for (int i = num_workers - 1; i >= 0; i--) {
+ AVxWorker *const worker = &workers[i];
+ had_error |= !winterface->sync(worker);
+ }
+ if (had_error)
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
+ "Failed to process cdef frame");
+}
+
+// Updates the row index of the next job to be processed.
+// Also updates end_of_frame flag when the processing of all rows is complete.
+static void update_cdef_row_next_job_info(AV1CdefSync *const cdef_sync,
+ const int nvfb) {
+ cdef_sync->fbr++;
+ if (cdef_sync->fbr == nvfb) {
+ cdef_sync->end_of_frame = 1;
+ }
+}
+
+// Checks if a job is available. If job is available,
+// populates next job information and returns 1, else returns 0.
+static AOM_INLINE int get_cdef_row_next_job(AV1CdefSync *const cdef_sync,
+ int *cur_fbr, const int nvfb) {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(cdef_sync->mutex_);
+#endif // CONFIG_MULTITHREAD
+ int do_next_row = 0;
+ // Populates information needed for current job and update the row
+ // index of the next row to be processed.
+ if (cdef_sync->end_of_frame == 0) {
+ do_next_row = 1;
+ *cur_fbr = cdef_sync->fbr;
+ update_cdef_row_next_job_info(cdef_sync, nvfb);
+ }
+#if CONFIG_MULTITHREAD
+ pthread_mutex_unlock(cdef_sync->mutex_);
+#endif // CONFIG_MULTITHREAD
+ return do_next_row;
+}
+
+// Hook function for each thread in CDEF multi-threading.
+static int cdef_sb_row_worker_hook(void *arg1, void *arg2) {
+ AV1CdefSync *const cdef_sync = (AV1CdefSync *)arg1;
+ AV1CdefWorkerData *const cdef_worker = (AV1CdefWorkerData *)arg2;
+ const int nvfb =
+ (cdef_worker->cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ int cur_fbr;
+ while (get_cdef_row_next_job(cdef_sync, &cur_fbr, nvfb)) {
+ av1_cdef_fb_row(cdef_worker->cm, cdef_worker->xd, cdef_worker->linebuf,
+ cdef_worker->colbuf, cdef_worker->srcbuf, cur_fbr,
+ cdef_worker->cdef_init_fb_row_fn, cdef_sync);
+ }
+ return 1;
+}
+
+// Assigns CDEF hook function and thread data to each worker.
+static void prepare_cdef_frame_workers(
+ AV1_COMMON *const cm, MACROBLOCKD *xd, AV1CdefWorkerData *const cdef_worker,
+ AVxWorkerHook hook, AVxWorker *const workers, AV1CdefSync *const cdef_sync,
+ int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn) {
+ const int num_planes = av1_num_planes(cm);
+
+ cdef_worker[0].srcbuf = cm->cdef_info.srcbuf;
+ for (int plane = 0; plane < num_planes; plane++)
+ cdef_worker[0].colbuf[plane] = cm->cdef_info.colbuf[plane];
+ for (int i = num_workers - 1; i >= 0; i--) {
+ AVxWorker *const worker = &workers[i];
+ cdef_worker[i].cm = cm;
+ cdef_worker[i].xd = xd;
+ cdef_worker[i].cdef_init_fb_row_fn = cdef_init_fb_row_fn;
+ for (int plane = 0; plane < num_planes; plane++)
+ cdef_worker[i].linebuf[plane] = cm->cdef_info.linebuf[plane];
+
+ worker->hook = hook;
+ worker->data1 = cdef_sync;
+ worker->data2 = &cdef_worker[i];
+ }
+}
+
+// Initializes row-level parameters for CDEF frame.
+void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ CdefBlockInfo *const fb_info,
+ uint16_t **const linebuf, uint16_t *const src,
+ struct AV1CdefSyncData *const cdef_sync, int fbr) {
+ const int num_planes = av1_num_planes(cm);
+ const int nvfb = (cm->mi_params.mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
+ const int luma_stride =
+ ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols << MI_SIZE_LOG2, 4);
+
+ // for the current filter block, it's top left corner mi structure (mi_tl)
+ // is first accessed to check whether the top and left boundaries are
+ // frame boundaries. Then bottom-left and top-right mi structures are
+ // accessed to check whether the bottom and right boundaries
+ // (respectively) are frame boundaries.
+ //
+ // Note that we can't just check the bottom-right mi structure - eg. if
+ // we're at the right-hand edge of the frame but not the bottom, then
+ // the bottom-right mi is NULL but the bottom-left is not.
+ fb_info->frame_boundary[TOP] = (MI_SIZE_64X64 * fbr == 0) ? 1 : 0;
+ if (fbr != nvfb - 1)
+ fb_info->frame_boundary[BOTTOM] =
+ (MI_SIZE_64X64 * (fbr + 1) == cm->mi_params.mi_rows) ? 1 : 0;
+ else
+ fb_info->frame_boundary[BOTTOM] = 1;
+
+ fb_info->src = src;
+ fb_info->damping = cm->cdef_info.cdef_damping;
+ fb_info->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
+ av1_zero(fb_info->dir);
+ av1_zero(fb_info->var);
+
+ for (int plane = 0; plane < num_planes; plane++) {
+ const int stride = luma_stride >> xd->plane[plane].subsampling_x;
+ uint16_t *top_linebuf = &linebuf[plane][0];
+ uint16_t *bot_linebuf = &linebuf[plane][nvfb * CDEF_VBORDER * stride];
+ {
+ const int mi_high_l2 = MI_SIZE_LOG2 - xd->plane[plane].subsampling_y;
+ const int top_offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2;
+ const int bot_offset = MI_SIZE_64X64 * (fbr + 1) << mi_high_l2;
+
+ if (fbr != nvfb - 1) // if (fbr != 0) // top line buffer copy
+ av1_cdef_copy_sb8_16(
+ cm, &top_linebuf[(fbr + 1) * CDEF_VBORDER * stride], stride,
+ xd->plane[plane].dst.buf, top_offset - CDEF_VBORDER, 0,
+ xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
+ if (fbr != nvfb - 1) // bottom line buffer copy
+ av1_cdef_copy_sb8_16(cm, &bot_linebuf[fbr * CDEF_VBORDER * stride],
+ stride, xd->plane[plane].dst.buf, bot_offset, 0,
+ xd->plane[plane].dst.stride, CDEF_VBORDER, stride);
+ }
+
+ fb_info->top_linebuf[plane] = &linebuf[plane][fbr * CDEF_VBORDER * stride];
+ fb_info->bot_linebuf[plane] =
+ &linebuf[plane]
+ [nvfb * CDEF_VBORDER * stride + (fbr * CDEF_VBORDER * stride)];
+ }
+
+ cdef_row_mt_sync_write(cdef_sync, fbr);
+ cdef_row_mt_sync_read(cdef_sync, fbr);
+}
+
+// Implements multi-threading for CDEF.
+// Perform CDEF on input frame.
+// Inputs:
+// frame: Pointer to input frame buffer.
+// cm: Pointer to common structure.
+// xd: Pointer to common current coding block structure.
+// Returns:
+// Nothing will be returned.
+void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
+ AV1CdefWorkerData *const cdef_worker,
+ AVxWorker *const workers, AV1CdefSync *const cdef_sync,
+ int num_workers,
+ cdef_init_fb_row_t cdef_init_fb_row_fn) {
+ YV12_BUFFER_CONFIG *frame = &cm->cur_frame->buf;
+ const int num_planes = av1_num_planes(cm);
+
+ av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
+ num_planes);
+
+ reset_cdef_job_info(cdef_sync);
+ prepare_cdef_frame_workers(cm, xd, cdef_worker, cdef_sb_row_worker_hook,
+ workers, cdef_sync, num_workers,
+ cdef_init_fb_row_fn);
+ launch_cdef_workers(workers, num_workers);
+ sync_cdef_workers(workers, cm, num_workers);
+}
diff --git a/third_party/libaom/source/libaom/av1/common/thread_common.h b/third_party/libaom/source/libaom/av1/common/thread_common.h
index 97b8abcff6..bcb4b879c1 100644
--- a/third_party/libaom/source/libaom/av1/common/thread_common.h
+++ b/third_party/libaom/source/libaom/av1/common/thread_common.h
@@ -15,6 +15,7 @@
#include "config/aom_config.h"
#include "av1/common/av1_loopfilter.h"
+#include "av1/common/cdef.h"
#include "aom_util/aom_thread.h"
#ifdef __cplusplus
@@ -27,6 +28,7 @@ typedef struct AV1LfMTInfo {
int mi_row;
int plane;
int dir;
+ int is_realtime;
} AV1LfMTInfo;
// Loopfilter row synchronization
@@ -97,6 +99,55 @@ typedef struct AV1LrSyncData {
int jobs_dequeued;
} AV1LrSync;
+typedef struct AV1CdefWorker {
+ AV1_COMMON *cm;
+ MACROBLOCKD *xd;
+ uint16_t *colbuf[MAX_MB_PLANE];
+ uint16_t *srcbuf;
+ uint16_t *linebuf[MAX_MB_PLANE];
+ cdef_init_fb_row_t cdef_init_fb_row_fn;
+} AV1CdefWorkerData;
+
+typedef struct AV1CdefRowSync {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_t *row_mutex_;
+ pthread_cond_t *row_cond_;
+#endif // CONFIG_MULTITHREAD
+ int is_row_done;
+} AV1CdefRowSync;
+
+// Data related to CDEF search multi-thread synchronization.
+typedef struct AV1CdefSyncData {
+#if CONFIG_MULTITHREAD
+ // Mutex lock used while dispatching jobs.
+ pthread_mutex_t *mutex_;
+#endif // CONFIG_MULTITHREAD
+ // Data related to CDEF row mt sync information
+ AV1CdefRowSync *cdef_row_mt;
+ // Flag to indicate all blocks are processed and end of frame is reached
+ int end_of_frame;
+ // Row index in units of 64x64 block
+ int fbr;
+ // Column index in units of 64x64 block
+ int fbc;
+} AV1CdefSync;
+
+void av1_cdef_frame_mt(AV1_COMMON *const cm, MACROBLOCKD *const xd,
+ AV1CdefWorkerData *const cdef_worker,
+ AVxWorker *const workers, AV1CdefSync *const cdef_sync,
+ int num_workers, cdef_init_fb_row_t cdef_init_fb_row_fn);
+void av1_cdef_init_fb_row_mt(const AV1_COMMON *const cm,
+ const MACROBLOCKD *const xd,
+ CdefBlockInfo *const fb_info,
+ uint16_t **const linebuf, uint16_t *const src,
+ struct AV1CdefSyncData *const cdef_sync, int fbr);
+void av1_cdef_copy_sb8_16(const AV1_COMMON *const cm, uint16_t *const dst,
+ int dstride, const uint8_t *src, int src_voffset,
+ int src_hoffset, int sstride, int vsize, int hsize);
+void av1_alloc_cdef_sync(AV1_COMMON *const cm, AV1CdefSync *cdef_sync,
+ int num_workers);
+void av1_free_cdef_sync(AV1CdefSync *cdef_sync);
+
// Deallocate loopfilter synchronization related mutex and data.
void av1_loop_filter_dealloc(AV1LfSync *lf_sync);
@@ -107,7 +158,7 @@ void av1_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct AV1Common *cm,
int is_decoding,
#endif
AVxWorker *workers, int num_workers,
- AV1LfSync *lf_sync);
+ AV1LfSync *lf_sync, int is_realtime);
#if !CONFIG_REALTIME_ONLY
void av1_loop_restoration_filter_frame_mt(YV12_BUFFER_CONFIG *frame,
diff --git a/third_party/libaom/source/libaom/av1/common/tile_common.c b/third_party/libaom/source/libaom/av1/common/tile_common.c
index 1b11bd7606..8f5d2a6316 100644
--- a/third_party/libaom/source/libaom/av1/common/tile_common.c
+++ b/third_party/libaom/source/libaom/av1/common/tile_common.c
@@ -28,7 +28,7 @@ static int tile_log2(int blk_size, int target) {
}
void av1_get_tile_limits(AV1_COMMON *const cm) {
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
CommonTileParams *const tiles = &cm->tiles;
const int mi_cols =
ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, seq_params->mib_size_log2);
@@ -130,9 +130,9 @@ void av1_calculate_tile_rows(const SequenceHeader *const seq_params,
void av1_tile_set_row(TileInfo *tile, const AV1_COMMON *cm, int row) {
assert(row < cm->tiles.rows);
int mi_row_start = cm->tiles.row_start_sb[row]
- << cm->seq_params.mib_size_log2;
+ << cm->seq_params->mib_size_log2;
int mi_row_end = cm->tiles.row_start_sb[row + 1]
- << cm->seq_params.mib_size_log2;
+ << cm->seq_params->mib_size_log2;
tile->tile_row = row;
tile->mi_row_start = mi_row_start;
tile->mi_row_end = AOMMIN(mi_row_end, cm->mi_params.mi_rows);
@@ -142,9 +142,9 @@ void av1_tile_set_row(TileInfo *tile, const AV1_COMMON *cm, int row) {
void av1_tile_set_col(TileInfo *tile, const AV1_COMMON *cm, int col) {
assert(col < cm->tiles.cols);
int mi_col_start = cm->tiles.col_start_sb[col]
- << cm->seq_params.mib_size_log2;
+ << cm->seq_params->mib_size_log2;
int mi_col_end = cm->tiles.col_start_sb[col + 1]
- << cm->seq_params.mib_size_log2;
+ << cm->seq_params->mib_size_log2;
tile->tile_col = col;
tile->mi_col_start = mi_col_start;
tile->mi_col_end = AOMMIN(mi_col_end, cm->mi_params.mi_cols);
@@ -153,16 +153,16 @@ void av1_tile_set_col(TileInfo *tile, const AV1_COMMON *cm, int col) {
int av1_get_sb_rows_in_tile(AV1_COMMON *cm, TileInfo tile) {
int mi_rows_aligned_to_sb = ALIGN_POWER_OF_TWO(
- tile.mi_row_end - tile.mi_row_start, cm->seq_params.mib_size_log2);
- int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params.mib_size_log2;
+ tile.mi_row_end - tile.mi_row_start, cm->seq_params->mib_size_log2);
+ int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params->mib_size_log2;
return sb_rows;
}
int av1_get_sb_cols_in_tile(AV1_COMMON *cm, TileInfo tile) {
int mi_cols_aligned_to_sb = ALIGN_POWER_OF_TWO(
- tile.mi_col_end - tile.mi_col_start, cm->seq_params.mib_size_log2);
- int sb_cols = mi_cols_aligned_to_sb >> cm->seq_params.mib_size_log2;
+ tile.mi_col_end - tile.mi_col_start, cm->seq_params->mib_size_log2);
+ int sb_cols = mi_cols_aligned_to_sb >> cm->seq_params->mib_size_log2;
return sb_cols;
}
@@ -195,8 +195,8 @@ AV1PixelRect av1_get_tile_rect(const TileInfo *tile_info, const AV1_COMMON *cm,
r.bottom = AOMMIN(r.bottom, frame_h);
// Convert to coordinates in the appropriate plane
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
+ const int ss_x = is_uv && cm->seq_params->subsampling_x;
+ const int ss_y = is_uv && cm->seq_params->subsampling_y;
r.left = ROUND_POWER_OF_TWO(r.left, ss_x);
r.right = ROUND_POWER_OF_TWO(r.right, ss_x);
@@ -215,7 +215,7 @@ void av1_get_uniform_tile_size(const AV1_COMMON *cm, int *w, int *h) {
for (int i = 0; i < tiles->cols; ++i) {
const int tile_width_sb =
tiles->col_start_sb[i + 1] - tiles->col_start_sb[i];
- const int tile_w = tile_width_sb * cm->seq_params.mib_size;
+ const int tile_w = tile_width_sb * cm->seq_params->mib_size;
assert(i == 0 || tile_w == *w); // ensure all tiles have same dimension
*w = tile_w;
}
@@ -223,7 +223,7 @@ void av1_get_uniform_tile_size(const AV1_COMMON *cm, int *w, int *h) {
for (int i = 0; i < tiles->rows; ++i) {
const int tile_height_sb =
tiles->row_start_sb[i + 1] - tiles->row_start_sb[i];
- const int tile_h = tile_height_sb * cm->seq_params.mib_size;
+ const int tile_h = tile_height_sb * cm->seq_params->mib_size;
assert(i == 0 || tile_h == *h); // ensure all tiles have same dimension
*h = tile_h;
}
diff --git a/third_party/libaom/source/libaom/av1/decoder/decodeframe.c b/third_party/libaom/source/libaom/av1/decoder/decodeframe.c
index b364714e0a..9ca7d3cd35 100644
--- a/third_party/libaom/source/libaom/av1/decoder/decodeframe.c
+++ b/third_party/libaom/source/libaom/av1/decoder/decodeframe.c
@@ -76,12 +76,11 @@
// Checks that the remaining bits start with a 1 and ends with 0s.
// It consumes an additional byte, if already byte aligned before the check.
int av1_check_trailing_bits(AV1Decoder *pbi, struct aom_read_bit_buffer *rb) {
- AV1_COMMON *const cm = &pbi->common;
// bit_offset is set to 0 (mod 8) when the reader is already byte aligned
int bits_before_alignment = 8 - rb->bit_offset % 8;
int trailing = aom_rb_read_literal(rb, bits_before_alignment);
if (trailing != (1 << (bits_before_alignment - 1))) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
return 0;
@@ -304,16 +303,18 @@ static AOM_INLINE void decode_reconstruct_tx(
const int bsw = tx_size_wide_unit[sub_txs];
const int bsh = tx_size_high_unit[sub_txs];
const int sub_step = bsw * bsh;
+ const int row_end =
+ AOMMIN(tx_size_high_unit[tx_size], max_blocks_high - blk_row);
+ const int col_end =
+ AOMMIN(tx_size_wide_unit[tx_size], max_blocks_wide - blk_col);
assert(bsw > 0 && bsh > 0);
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- const int offsetr = blk_row + row;
+ for (int row = 0; row < row_end; row += bsh) {
+ const int offsetr = blk_row + row;
+ for (int col = 0; col < col_end; col += bsw) {
const int offsetc = blk_col + col;
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-
decode_reconstruct_tx(cm, td, r, mbmi, plane, plane_bsize, offsetr,
offsetc, block, sub_txs, eob_total);
block += sub_step;
@@ -362,7 +363,7 @@ static AOM_INLINE void decode_mbmi_block(AV1Decoder *const pbi,
PARTITION_TYPE partition,
BLOCK_SIZE bsize) {
AV1_COMMON *const cm = &pbi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const int bw = mi_size_wide[bsize];
const int bh = mi_size_high[bsize];
const int x_mis = AOMMIN(bw, cm->mi_params.mi_cols - mi_col);
@@ -914,6 +915,16 @@ static AOM_INLINE void decode_token_recon_block(AV1Decoder *const pbi,
if (plane && !xd->is_chroma_ref) break;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const TX_SIZE tx_size = av1_get_tx_size(plane, xd);
+#if CONFIG_REALTIME_ONLY
+ // Realtime only build doesn't support 4x rectangular txfm sizes.
+ if (tx_size == TX_4X16 || tx_size == TX_16X4 || tx_size == TX_8X32 ||
+ tx_size == TX_32X8 || tx_size == TX_16X64 ||
+ tx_size == TX_64X16) {
+ aom_internal_error(
+ xd->error_info, AOM_CODEC_UNSUP_FEATURE,
+ "Realtime only build doesn't support rectangular txfm sizes");
+ }
+#endif
const int stepr = tx_size_high_unit[tx_size];
const int stepc = tx_size_wide_unit[tx_size];
@@ -1219,9 +1230,9 @@ static AOM_INLINE void parse_decode_block(AV1Decoder *const pbi,
: (j == 1 ? quant_params->u_ac_delta_q
: quant_params->v_ac_delta_q);
xd->plane[j].seg_dequant_QTX[i][0] = av1_dc_quant_QTX(
- current_qindex, dc_delta_q, cm->seq_params.bit_depth);
+ current_qindex, dc_delta_q, cm->seq_params->bit_depth);
xd->plane[j].seg_dequant_QTX[i][1] = av1_ac_quant_QTX(
- current_qindex, ac_delta_q, cm->seq_params.bit_depth);
+ current_qindex, ac_delta_q, cm->seq_params->bit_depth);
}
}
}
@@ -1554,9 +1565,9 @@ static AOM_INLINE void decode_restoration_mode(AV1_COMMON *cm,
}
}
if (!all_none) {
- assert(cm->seq_params.sb_size == BLOCK_64X64 ||
- cm->seq_params.sb_size == BLOCK_128X128);
- const int sb_size = cm->seq_params.sb_size == BLOCK_128X128 ? 128 : 64;
+ assert(cm->seq_params->sb_size == BLOCK_64X64 ||
+ cm->seq_params->sb_size == BLOCK_128X128);
+ const int sb_size = cm->seq_params->sb_size == BLOCK_128X128 ? 128 : 64;
for (int p = 0; p < num_planes; ++p)
cm->rst_info[p].restoration_unit_size = sb_size;
@@ -1576,7 +1587,8 @@ static AOM_INLINE void decode_restoration_mode(AV1_COMMON *cm,
}
if (num_planes > 1) {
- int s = AOMMIN(cm->seq_params.subsampling_x, cm->seq_params.subsampling_y);
+ int s =
+ AOMMIN(cm->seq_params->subsampling_x, cm->seq_params->subsampling_y);
if (s && !chroma_none) {
cm->rst_info[1].restoration_unit_size =
cm->rst_info[0].restoration_unit_size >> (aom_rb_read_bit(rb) * s);
@@ -1847,7 +1859,7 @@ static AOM_INLINE void setup_quantization(CommonQuantParams *quant_params,
// Build y/uv dequant values based on segmentation.
static AOM_INLINE void setup_segmentation_dequant(AV1_COMMON *const cm,
MACROBLOCKD *const xd) {
- const int bit_depth = cm->seq_params.bit_depth;
+ const int bit_depth = cm->seq_params->bit_depth;
// When segmentation is disabled, only the first value is used. The
// remaining are don't cares.
const int max_segments = cm->seg.enabled ? MAX_SEGMENTS : 1;
@@ -1909,7 +1921,7 @@ static AOM_INLINE void setup_superres(AV1_COMMON *const cm,
cm->superres_upscaled_width = *width;
cm->superres_upscaled_height = *height;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
if (!seq_params->enable_superres) return;
if (aom_rb_read_bit(rb)) {
@@ -1930,7 +1942,7 @@ static AOM_INLINE void resize_context_buffers(AV1_COMMON *cm, int width,
int height) {
#if CONFIG_SIZE_LIMIT
if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(cm->error, AOM_CODEC_CORRUPT_FRAME,
"Dimensions of %dx%d beyond allowed size of %dx%d.",
width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT);
#endif
@@ -1950,7 +1962,7 @@ static AOM_INLINE void resize_context_buffers(AV1_COMMON *cm, int width,
// consistent and to force a realloc next time.
cm->width = 0;
cm->height = 0;
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate context buffers");
}
} else {
@@ -1968,7 +1980,7 @@ static AOM_INLINE void resize_context_buffers(AV1_COMMON *cm, int width,
static AOM_INLINE void setup_buffer_pool(AV1_COMMON *cm) {
BufferPool *const pool = cm->buffer_pool;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
lock_buffer_pool(pool);
if (aom_realloc_frame_buffer(
@@ -1978,7 +1990,7 @@ static AOM_INLINE void setup_buffer_pool(AV1_COMMON *cm) {
&cm->cur_frame->raw_frame_buffer, pool->get_fb_cb, pool->cb_priv,
0)) {
unlock_buffer_pool(pool);
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
}
unlock_buffer_pool(pool);
@@ -1999,7 +2011,7 @@ static AOM_INLINE void setup_buffer_pool(AV1_COMMON *cm) {
static AOM_INLINE void setup_frame_size(AV1_COMMON *cm,
int frame_size_override_flag,
struct aom_read_bit_buffer *rb) {
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
int width, height;
if (frame_size_override_flag) {
@@ -2008,7 +2020,7 @@ static AOM_INLINE void setup_frame_size(AV1_COMMON *cm,
av1_read_frame_size(rb, num_bits_width, num_bits_height, &width, &height);
if (width > seq_params->max_frame_width ||
height > seq_params->max_frame_height) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(cm->error, AOM_CODEC_CORRUPT_FRAME,
"Frame dimensions are larger than the maximum values");
}
} else {
@@ -2049,7 +2061,7 @@ static AOM_INLINE void setup_frame_size_with_refs(
// the middle of a stream, and static analysis will error if we don't do
// a null check here.
if (ref_buf == NULL) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(cm->error, AOM_CODEC_CORRUPT_FRAME,
"Invalid condition: invalid reference buffer");
} else {
const YV12_BUFFER_CONFIG *const buf = &ref_buf->buf;
@@ -2065,7 +2077,7 @@ static AOM_INLINE void setup_frame_size_with_refs(
}
}
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
if (!found) {
int num_bits_width = seq_params->num_bits_width;
int num_bits_height = seq_params->num_bits_height;
@@ -2077,7 +2089,7 @@ static AOM_INLINE void setup_frame_size_with_refs(
}
if (width <= 0 || height <= 0)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(cm->error, AOM_CODEC_CORRUPT_FRAME,
"Invalid frame size");
// Check to make sure at least one of frames that this frame references
@@ -2089,7 +2101,7 @@ static AOM_INLINE void setup_frame_size_with_refs(
ref_frame->buf.y_crop_height, width, height);
}
if (!has_valid_ref_frame)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(cm->error, AOM_CODEC_CORRUPT_FRAME,
"Referenced frame has invalid size");
for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i) {
const RefCntBuffer *const ref_frame = get_ref_frame_buf(cm, i);
@@ -2097,7 +2109,7 @@ static AOM_INLINE void setup_frame_size_with_refs(
ref_frame->buf.bit_depth, ref_frame->buf.subsampling_x,
ref_frame->buf.subsampling_y, seq_params->bit_depth,
seq_params->subsampling_x, seq_params->subsampling_y))
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(cm->error, AOM_CODEC_CORRUPT_FRAME,
"Referenced frame has incompatible color format");
}
setup_buffer_pool(cm);
@@ -2117,7 +2129,7 @@ static int rb_read_uniform(struct aom_read_bit_buffer *const rb, int n) {
static AOM_INLINE void read_tile_info_max_tile(
AV1_COMMON *const cm, struct aom_read_bit_buffer *const rb) {
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
CommonTileParams *const tiles = &cm->tiles;
int width_mi =
ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, seq_params->mib_size_log2);
@@ -2213,7 +2225,7 @@ static AOM_INLINE void read_tile_info(AV1Decoder *const pbi,
pbi->context_update_tile_id =
aom_rb_read_literal(rb, cm->tiles.log2_rows + cm->tiles.log2_cols);
if (pbi->context_update_tile_id >= cm->tiles.rows * cm->tiles.cols) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Invalid context_update_tile_id");
}
// tile size magnitude
@@ -2366,7 +2378,7 @@ static const uint8_t *get_ls_tile_buffers(
// Get the whole of the last column, otherwise stop at the required tile.
for (int r = 0; r < (is_last ? tile_rows : tile_rows_end); ++r) {
- get_ls_tile_buffer(tile_col_data_end[c], &pbi->common.error, &data,
+ get_ls_tile_buffer(tile_col_data_end[c], &pbi->error, &data,
tile_buffers, tile_size_bytes, c, r, tile_copy_mode);
}
}
@@ -2378,7 +2390,7 @@ static const uint8_t *get_ls_tile_buffers(
data = tile_col_data_end[c - 1];
for (int r = 0; r < tile_rows; ++r) {
- get_ls_tile_buffer(tile_col_data_end[c], &pbi->common.error, &data,
+ get_ls_tile_buffer(tile_col_data_end[c], &pbi->error, &data,
tile_buffers, tile_size_bytes, c, r, tile_copy_mode);
}
}
@@ -2446,11 +2458,11 @@ static AOM_INLINE void get_tile_buffers(
if (tc < start_tile || tc > end_tile) continue;
if (data + hdr_offset >= data_end)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Data ended before all tiles were read.");
data += hdr_offset;
- get_tile_buffer(data_end, pbi->tile_size_bytes, is_last,
- &pbi->common.error, &data, buf);
+ get_tile_buffer(data_end, pbi->tile_size_bytes, is_last, &pbi->error,
+ &data, buf);
}
}
}
@@ -2460,7 +2472,7 @@ static AOM_INLINE void set_cb_buffer(AV1Decoder *pbi, DecoderCodingBlock *dcb,
const int num_planes, int mi_row,
int mi_col) {
AV1_COMMON *const cm = &pbi->common;
- int mib_size_log2 = cm->seq_params.mib_size_log2;
+ int mib_size_log2 = cm->seq_params->mib_size_log2;
int stride = (cm->mi_params.mi_cols >> mib_size_log2) + 1;
int offset = (mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2);
CB_BUFFER *cb_buffer = cb_buffer_base + offset;
@@ -2629,11 +2641,11 @@ static AOM_INLINE void decode_tile_sb_row(AV1Decoder *pbi, ThreadData *const td,
pbi->tile_data + tile_info.tile_row * cm->tiles.cols + tile_info.tile_col;
const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_info);
const int sb_row_in_tile =
- (mi_row - tile_info.mi_row_start) >> cm->seq_params.mib_size_log2;
+ (mi_row - tile_info.mi_row_start) >> cm->seq_params->mib_size_log2;
int sb_col_in_tile = 0;
for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
- mi_col += cm->seq_params.mib_size, sb_col_in_tile++) {
+ mi_col += cm->seq_params->mib_size, sb_col_in_tile++) {
set_cb_buffer(pbi, &td->dcb, pbi->cb_buffer_base, num_planes, mi_row,
mi_col);
@@ -2641,7 +2653,7 @@ static AOM_INLINE void decode_tile_sb_row(AV1Decoder *pbi, ThreadData *const td,
// Decoding of the super-block
decode_partition(pbi, td, mi_row, mi_col, td->bit_reader,
- cm->seq_params.sb_size, 0x2);
+ cm->seq_params->sb_size, 0x2);
sync_write(&tile_data->dec_row_mt_sync, sb_row_in_tile, sb_col_in_tile,
sb_cols_in_tile);
@@ -2711,16 +2723,16 @@ static AOM_INLINE void decode_tile(AV1Decoder *pbi, ThreadData *const td,
av1_reset_loop_restoration(xd, num_planes);
for (int mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
- mi_row += cm->seq_params.mib_size) {
+ mi_row += cm->seq_params->mib_size) {
av1_zero_left_context(xd);
for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
- mi_col += cm->seq_params.mib_size) {
+ mi_col += cm->seq_params->mib_size) {
set_cb_buffer(pbi, dcb, &td->cb_buffer_base, num_planes, 0, 0);
// Bit-stream parsing and decoding of the superblock
decode_partition(pbi, td, mi_row, mi_col, td->bit_reader,
- cm->seq_params.sb_size, 0x3);
+ cm->seq_params->sb_size, 0x3);
if (aom_reader_has_overflowed(td->bit_reader)) {
aom_merge_corrupted_flag(&dcb->corrupted, 1);
@@ -2801,6 +2813,10 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
if (pbi->tile_data == NULL || n_tiles != pbi->allocated_tiles) {
decoder_alloc_tile_data(pbi, n_tiles);
}
+ if (pbi->dcb.xd.seg_mask == NULL)
+ CHECK_MEM_ERROR(cm, pbi->dcb.xd.seg_mask,
+ (uint8_t *)aom_memalign(
+ 16, 2 * MAX_SB_SQUARE * sizeof(*pbi->dcb.xd.seg_mask)));
#if CONFIG_ACCOUNTING
if (pbi->acct_enabled) {
aom_accounting_reset(&pbi->accounting);
@@ -2837,7 +2853,7 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
av1_tile_init(&td->dcb.xd.tile, cm, row, col);
td->dcb.xd.current_base_qindex = cm->quant_params.base_qindex;
setup_bool_decoder(tile_bs_buf->data, data_end, tile_bs_buf->size,
- &cm->error, td->bit_reader, allow_update_cdf);
+ &pbi->error, td->bit_reader, allow_update_cdf);
#if CONFIG_ACCOUNTING
if (pbi->acct_enabled) {
td->bit_reader->accounting = &pbi->accounting;
@@ -2859,7 +2875,7 @@ static const uint8_t *decode_tiles(AV1Decoder *pbi, const uint8_t *data,
decode_tile(pbi, td, row, col);
aom_merge_corrupted_flag(&pbi->dcb.corrupted, td->dcb.corrupted);
if (pbi->dcb.corrupted)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Failed to decode tile data");
}
}
@@ -3017,7 +3033,7 @@ static int get_next_job_info(AV1Decoder *const pbi,
const int tile_cols_end = frame_row_mt_info->tile_cols_end;
const int start_tile = frame_row_mt_info->start_tile;
const int end_tile = frame_row_mt_info->end_tile;
- const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size];
+ const int sb_mi_size = mi_size_wide[cm->seq_params->sb_size];
int num_mis_to_decode, num_threads_working;
int num_mis_waiting_for_decode;
int min_threads_working = INT_MAX;
@@ -3135,7 +3151,7 @@ static INLINE void signal_parse_sb_row_done(AV1Decoder *const pbi,
static AOM_INLINE void parse_tile_row_mt(AV1Decoder *pbi, ThreadData *const td,
TileDataDec *const tile_data) {
AV1_COMMON *const cm = &pbi->common;
- const int sb_mi_size = mi_size_wide[cm->seq_params.sb_size];
+ const int sb_mi_size = mi_size_wide[cm->seq_params->sb_size];
const int num_planes = av1_num_planes(cm);
TileInfo tile_info = tile_data->tile_info;
int tile_row = tile_info.tile_row;
@@ -3148,16 +3164,16 @@ static AOM_INLINE void parse_tile_row_mt(AV1Decoder *pbi, ThreadData *const td,
av1_reset_loop_restoration(xd, num_planes);
for (int mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
- mi_row += cm->seq_params.mib_size) {
+ mi_row += cm->seq_params->mib_size) {
av1_zero_left_context(xd);
for (int mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
- mi_col += cm->seq_params.mib_size) {
+ mi_col += cm->seq_params->mib_size) {
set_cb_buffer(pbi, dcb, pbi->cb_buffer_base, num_planes, mi_row, mi_col);
// Bit-stream parsing of the superblock
decode_partition(pbi, td, mi_row, mi_col, td->bit_reader,
- cm->seq_params.sb_size, 0x1);
+ cm->seq_params->sb_size, 0x1);
if (aom_reader_has_overflowed(td->bit_reader)) {
aom_merge_corrupted_flag(&dcb->corrupted, 1);
@@ -3357,6 +3373,8 @@ void av1_free_mc_tmp_buf(ThreadData *thread_data) {
aom_free(thread_data->tmp_conv_dst);
thread_data->tmp_conv_dst = NULL;
+ aom_free(thread_data->seg_mask);
+ thread_data->seg_mask = NULL;
for (int i = 0; i < 2; ++i) {
aom_free(thread_data->tmp_obmc_bufs[i]);
thread_data->tmp_obmc_bufs[i] = NULL;
@@ -3389,6 +3407,10 @@ static AOM_INLINE void allocate_mc_tmp_buf(AV1_COMMON *const cm,
CHECK_MEM_ERROR(cm, thread_data->tmp_conv_dst,
aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
sizeof(*thread_data->tmp_conv_dst)));
+ CHECK_MEM_ERROR(cm, thread_data->seg_mask,
+ (uint8_t *)aom_memalign(
+ 16, 2 * MAX_SB_SQUARE * sizeof(*thread_data->seg_mask)));
+
for (int i = 0; i < 2; ++i) {
CHECK_MEM_ERROR(
cm, thread_data->tmp_obmc_bufs[i],
@@ -3411,6 +3433,8 @@ static AOM_INLINE void reset_dec_workers(AV1Decoder *pbi,
thread_data->td->dcb.mc_buf[0] = thread_data->td->mc_buf[0];
thread_data->td->dcb.mc_buf[1] = thread_data->td->mc_buf[1];
thread_data->td->dcb.xd.tmp_conv_dst = thread_data->td->tmp_conv_dst;
+ if (worker_idx)
+ thread_data->td->dcb.xd.seg_mask = thread_data->td->seg_mask;
for (int j = 0; j < 2; ++j) {
thread_data->td->dcb.xd.tmp_obmc_bufs[j] =
thread_data->td->tmp_obmc_bufs[j];
@@ -3481,7 +3505,7 @@ static AOM_INLINE void decode_mt_init(AV1Decoder *pbi) {
winterface->init(worker);
worker->thread_name = "aom tile worker";
if (worker_idx != 0 && !winterface->reset(worker)) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(&pbi->error, AOM_CODEC_ERROR,
"Tile decoder thread creation failed");
}
@@ -3498,7 +3522,7 @@ static AOM_INLINE void decode_mt_init(AV1Decoder *pbi) {
thread_data->error_info.setjmp = 0;
}
}
- const int use_highbd = cm->seq_params.use_highbitdepth;
+ const int use_highbd = cm->seq_params->use_highbitdepth;
const int buf_size = MC_TEMP_BUF_PELS << use_highbd;
for (worker_idx = 1; worker_idx < pbi->max_threads; ++worker_idx) {
DecWorkerData *const thread_data = pbi->thread_data + worker_idx;
@@ -3590,6 +3614,10 @@ static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data,
if (pbi->tile_data == NULL || n_tiles != pbi->allocated_tiles) {
decoder_alloc_tile_data(pbi, n_tiles);
}
+ if (pbi->dcb.xd.seg_mask == NULL)
+ CHECK_MEM_ERROR(cm, pbi->dcb.xd.seg_mask,
+ (uint8_t *)aom_memalign(
+ 16, 2 * MAX_SB_SQUARE * sizeof(*pbi->dcb.xd.seg_mask)));
for (int row = 0; row < tile_rows; row++) {
for (int col = 0; col < tile_cols; col++) {
@@ -3606,7 +3634,7 @@ static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data,
sync_dec_workers(pbi, num_workers);
if (pbi->dcb.corrupted)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Failed to decode tile data");
if (tiles->large_scale) {
@@ -3624,8 +3652,8 @@ static const uint8_t *decode_tiles_mt(AV1Decoder *pbi, const uint8_t *data,
static AOM_INLINE void dec_alloc_cb_buf(AV1Decoder *pbi) {
AV1_COMMON *const cm = &pbi->common;
- int size = ((cm->mi_params.mi_rows >> cm->seq_params.mib_size_log2) + 1) *
- ((cm->mi_params.mi_cols >> cm->seq_params.mib_size_log2) + 1);
+ int size = ((cm->mi_params.mi_rows >> cm->seq_params->mib_size_log2) + 1) *
+ ((cm->mi_params.mi_cols >> cm->seq_params->mib_size_log2) + 1);
if (pbi->cb_buffer_alloc_size < size) {
av1_dec_free_cb_buf(pbi);
@@ -3669,10 +3697,10 @@ static AOM_INLINE void row_mt_frame_init(AV1Decoder *pbi, int tile_rows_start,
tile_data->dec_row_mt_sync.num_threads_working = 0;
tile_data->dec_row_mt_sync.mi_rows =
ALIGN_POWER_OF_TWO(tile_info.mi_row_end - tile_info.mi_row_start,
- cm->seq_params.mib_size_log2);
+ cm->seq_params->mib_size_log2);
tile_data->dec_row_mt_sync.mi_cols =
ALIGN_POWER_OF_TWO(tile_info.mi_col_end - tile_info.mi_col_start,
- cm->seq_params.mib_size_log2);
+ cm->seq_params->mib_size_log2);
frame_row_mt_info->mi_rows_to_decode +=
tile_data->dec_row_mt_sync.mi_rows;
@@ -3776,6 +3804,10 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
}
decoder_alloc_tile_data(pbi, n_tiles);
}
+ if (pbi->dcb.xd.seg_mask == NULL)
+ CHECK_MEM_ERROR(cm, pbi->dcb.xd.seg_mask,
+ (uint8_t *)aom_memalign(
+ 16, 2 * MAX_SB_SQUARE * sizeof(*pbi->dcb.xd.seg_mask)));
for (int row = 0; row < tile_rows; row++) {
for (int col = 0; col < tile_cols; col++) {
@@ -3811,7 +3843,7 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
sync_dec_workers(pbi, num_workers);
if (pbi->dcb.corrupted)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Failed to decode tile data");
if (tiles->large_scale) {
@@ -3829,7 +3861,7 @@ static const uint8_t *decode_tiles_row_mt(AV1Decoder *pbi, const uint8_t *data,
static AOM_INLINE void error_handler(void *data) {
AV1_COMMON *const cm = (AV1_COMMON *)data;
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME, "Truncated packet");
+ aom_internal_error(cm->error, AOM_CODEC_CORRUPT_FRAME, "Truncated packet");
}
// Reads the high_bitdepth and twelve_bit fields in color_config() and sets
@@ -3860,7 +3892,7 @@ static AOM_INLINE void read_bitdepth(
void av1_read_film_grain_params(AV1_COMMON *cm,
struct aom_read_bit_buffer *rb) {
aom_film_grain_t *pars = &cm->film_grain_params;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
pars->apply_grain = aom_rb_read_bit(rb);
if (!pars->apply_grain) {
@@ -3890,7 +3922,7 @@ void av1_read_film_grain_params(AV1_COMMON *cm,
}
}
if (!found) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"Invalid film grain reference idx %d. ref_frame_idx = "
"{%d, %d, %d, %d, %d, %d, %d}",
film_grain_params_ref_idx, cm->remapped_ref_idx[0],
@@ -3900,11 +3932,11 @@ void av1_read_film_grain_params(AV1_COMMON *cm,
}
RefCntBuffer *const buf = cm->ref_frame_map[film_grain_params_ref_idx];
if (buf == NULL) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"Invalid Film grain reference idx");
}
if (!buf->film_grain_params_present) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"Film grain reference parameters not available");
}
uint16_t random_seed = pars->random_seed;
@@ -3916,13 +3948,13 @@ void av1_read_film_grain_params(AV1_COMMON *cm,
// Scaling functions parameters
pars->num_y_points = aom_rb_read_literal(rb, 4); // max 14
if (pars->num_y_points > 14)
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"Number of points for film grain luma scaling function "
"exceeds the maximum value.");
for (int i = 0; i < pars->num_y_points; i++) {
pars->scaling_points_y[i][0] = aom_rb_read_literal(rb, 8);
if (i && pars->scaling_points_y[i - 1][0] >= pars->scaling_points_y[i][0])
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"First coordinate of the scaling function points "
"shall be increasing.");
pars->scaling_points_y[i][1] = aom_rb_read_literal(rb, 8);
@@ -3941,14 +3973,14 @@ void av1_read_film_grain_params(AV1_COMMON *cm,
} else {
pars->num_cb_points = aom_rb_read_literal(rb, 4); // max 10
if (pars->num_cb_points > 10)
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"Number of points for film grain cb scaling function "
"exceeds the maximum value.");
for (int i = 0; i < pars->num_cb_points; i++) {
pars->scaling_points_cb[i][0] = aom_rb_read_literal(rb, 8);
if (i &&
pars->scaling_points_cb[i - 1][0] >= pars->scaling_points_cb[i][0])
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"First coordinate of the scaling function points "
"shall be increasing.");
pars->scaling_points_cb[i][1] = aom_rb_read_literal(rb, 8);
@@ -3956,14 +3988,14 @@ void av1_read_film_grain_params(AV1_COMMON *cm,
pars->num_cr_points = aom_rb_read_literal(rb, 4); // max 10
if (pars->num_cr_points > 10)
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"Number of points for film grain cr scaling function "
"exceeds the maximum value.");
for (int i = 0; i < pars->num_cr_points; i++) {
pars->scaling_points_cr[i][0] = aom_rb_read_literal(rb, 8);
if (i &&
pars->scaling_points_cr[i - 1][0] >= pars->scaling_points_cr[i][0])
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"First coordinate of the scaling function points "
"shall be increasing.");
pars->scaling_points_cr[i][1] = aom_rb_read_literal(rb, 8);
@@ -3972,7 +4004,7 @@ void av1_read_film_grain_params(AV1_COMMON *cm,
if ((seq_params->subsampling_x == 1) && (seq_params->subsampling_y == 1) &&
(((pars->num_cb_points == 0) && (pars->num_cr_points != 0)) ||
((pars->num_cb_points != 0) && (pars->num_cr_points == 0))))
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"In YCbCr 4:2:0, film grain shall be applied "
"to both chroma components or neither.");
}
@@ -4024,13 +4056,13 @@ void av1_read_film_grain_params(AV1_COMMON *cm,
static AOM_INLINE void read_film_grain(AV1_COMMON *cm,
struct aom_read_bit_buffer *rb) {
- if (cm->seq_params.film_grain_params_present &&
+ if (cm->seq_params->film_grain_params_present &&
(cm->show_frame || cm->showable_frame)) {
av1_read_film_grain_params(cm, rb);
} else {
memset(&cm->film_grain_params, 0, sizeof(cm->film_grain_params));
}
- cm->film_grain_params.bit_depth = cm->seq_params.bit_depth;
+ cm->film_grain_params.bit_depth = cm->seq_params->bit_depth;
memcpy(&cm->cur_frame->film_grain_params, &cm->film_grain_params,
sizeof(aom_film_grain_t));
}
@@ -4164,7 +4196,7 @@ void av1_read_op_parameters_info(aom_dec_model_op_parameters_t *op_params,
static AOM_INLINE void read_temporal_point_info(
AV1_COMMON *const cm, struct aom_read_bit_buffer *rb) {
cm->frame_presentation_time = aom_rb_read_unsigned_literal(
- rb, cm->seq_params.decoder_model_info.frame_presentation_time_length);
+ rb, cm->seq_params->decoder_model_info.frame_presentation_time_length);
}
void av1_read_sequence_header(AV1_COMMON *cm, struct aom_read_bit_buffer *rb,
@@ -4192,7 +4224,7 @@ void av1_read_sequence_header(AV1_COMMON *cm, struct aom_read_bit_buffer *rb,
seq_params->frame_id_length =
aom_rb_read_literal(rb, 3) + seq_params->delta_frame_id_length + 1;
if (seq_params->frame_id_length > 16)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(cm->error, AOM_CODEC_CORRUPT_FRAME,
"Invalid frame_id_length");
}
@@ -4446,7 +4478,7 @@ static INLINE void reset_frame_buffers(AV1_COMMON *cm) {
static int read_uncompressed_header(AV1Decoder *pbi,
struct aom_read_bit_buffer *rb) {
AV1_COMMON *const cm = &pbi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
CurrentFrame *const current_frame = &cm->current_frame;
FeatureFlags *const features = &cm->features;
MACROBLOCKD *const xd = &pbi->dcb.xd;
@@ -4457,7 +4489,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
sframe_info->is_s_frame_at_altref = 0;
if (!pbi->sequence_header_ready) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"No sequence header");
}
@@ -4479,14 +4511,14 @@ static int read_uncompressed_header(AV1Decoder *pbi,
if (cm->show_existing_frame) {
if (pbi->sequence_header_changed) {
aom_internal_error(
- &cm->error, AOM_CODEC_CORRUPT_FRAME,
+ &pbi->error, AOM_CODEC_CORRUPT_FRAME,
"New sequence header starts with a show_existing_frame.");
}
// Show an existing frame directly.
const int existing_frame_idx = aom_rb_read_literal(rb, 3);
RefCntBuffer *const frame_to_show = cm->ref_frame_map[existing_frame_idx];
if (frame_to_show == NULL) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(&pbi->error, AOM_CODEC_UNSUP_BITSTREAM,
"Buffer does not contain a decoded frame");
}
if (seq_params->decoder_model_info_present_flag &&
@@ -4500,7 +4532,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
* referencing */
if (display_frame_id != cm->ref_frame_id[existing_frame_idx] ||
pbi->valid_for_referencing[existing_frame_idx] == 0)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Reference buffer frame ID mismatch");
}
lock_buffer_pool(pool);
@@ -4526,7 +4558,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
// show_existing_frame is used to show a previous frame, that the value
// of showable_frame for the previous frame was equal to 1.
if (!frame_to_show->showable_frame) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(&pbi->error, AOM_CODEC_UNSUP_BITSTREAM,
"Buffer does not contain a showable frame");
}
// Section 6.8.2: It is a requirement of bitstream conformance that when
@@ -4554,7 +4586,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
pbi->decoding_first_frame = 1;
reset_frame_buffers(cm);
} else {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Sequence header has changed without a keyframe.");
}
}
@@ -4569,7 +4601,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
}
if (seq_params->still_picture &&
(current_frame->frame_type != KEY_FRAME || !cm->show_frame)) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Still pictures must be coded as shown keyframes");
}
cm->showable_frame = current_frame->frame_type != KEY_FRAME;
@@ -4641,7 +4673,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
/* Check current_frame_id for conformance */
if (prev_frame_id == cm->current_frame_id ||
diff_frame_id >= (1 << (frame_id_length - 1))) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Invalid value of current_frame_id");
}
}
@@ -4672,18 +4704,18 @@ static int read_uncompressed_header(AV1Decoder *pbi,
}
if (seq_params->decoder_model_info_present_flag) {
- cm->buffer_removal_time_present = aom_rb_read_bit(rb);
- if (cm->buffer_removal_time_present) {
+ pbi->buffer_removal_time_present = aom_rb_read_bit(rb);
+ if (pbi->buffer_removal_time_present) {
for (int op_num = 0;
op_num < seq_params->operating_points_cnt_minus_1 + 1; op_num++) {
if (seq_params->op_params[op_num].decoder_model_param_present_flag) {
- if ((((seq_params->operating_point_idc[op_num] >>
+ if (seq_params->operating_point_idc[op_num] == 0 ||
+ (((seq_params->operating_point_idc[op_num] >>
cm->temporal_layer_id) &
0x1) &&
((seq_params->operating_point_idc[op_num] >>
(cm->spatial_layer_id + 8)) &
- 0x1)) ||
- seq_params->operating_point_idc[op_num] == 0) {
+ 0x1))) {
cm->buffer_removal_times[op_num] = aom_rb_read_unsigned_literal(
rb, seq_params->decoder_model_info.buffer_removal_time_length);
} else {
@@ -4713,7 +4745,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
if (current_frame->frame_type == INTRA_ONLY_FRAME) {
current_frame->refresh_frame_flags = aom_rb_read_literal(rb, REF_FRAMES);
if (current_frame->refresh_frame_flags == 0xFF) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(&pbi->error, AOM_CODEC_UNSUP_BITSTREAM,
"Intra only frames cannot have refresh flags 0xFF");
}
if (pbi->need_resync) {
@@ -4747,7 +4779,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
// pixels set to neutral grey.
int buf_idx = get_free_fb(cm);
if (buf_idx == INVALID_IDX) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(&pbi->error, AOM_CODEC_MEM_ERROR,
"Unable to find free frame buffer");
}
buf = &frame_bufs[buf_idx];
@@ -4760,7 +4792,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
&buf->raw_frame_buffer, pool->get_fb_cb, pool->cb_priv, 0)) {
decrease_ref_count(buf, pool);
unlock_buffer_pool(pool);
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(&pbi->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
}
unlock_buffer_pool(pool);
@@ -4827,10 +4859,10 @@ static int read_uncompressed_header(AV1Decoder *pbi,
// reference to a slot that hasn't been set yet. That's what we are
// checking here.
if (lst_buf == NULL)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Inter frame requests nonexistent reference");
if (gld_buf == NULL)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Inter frame requests nonexistent reference");
av1_set_frame_refs(cm, cm->remapped_ref_idx, lst_ref, gld_ref);
@@ -4848,7 +4880,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
// reference to a slot that hasn't been set yet. That's what we are
// checking here.
if (cm->ref_frame_map[ref] == NULL)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Inter frame requests nonexistent reference");
cm->remapped_ref_idx[i] = ref;
} else {
@@ -4856,7 +4888,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
}
// Check valid for referencing
if (pbi->valid_for_referencing[ref] == 0)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Reference frame not valid for referencing");
cm->ref_frame_sign_bias[LAST_FRAME + i] = 0;
@@ -4872,7 +4904,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
// Compare values derived from delta_frame_id_minus_1 and
// refresh_frame_flags.
if (ref_frame_id != cm->ref_frame_id[ref])
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Reference buffer frame ID mismatch");
}
}
@@ -4895,7 +4927,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
cm->prev_frame = get_primary_ref_frame_buf(cm);
if (features->primary_ref_frame != PRIMARY_REF_NONE &&
get_primary_ref_frame_buf(cm) == NULL) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Reference frame containing this frame's initial "
"frame context is unavailable.");
}
@@ -4915,7 +4947,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
ref_scale_factors, ref_buf->buf.y_crop_width,
ref_buf->buf.y_crop_height, cm->width, cm->height);
if ((!av1_is_valid_scale(ref_scale_factors)))
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(&pbi->error, AOM_CODEC_UNSUP_BITSTREAM,
"Reference frame has invalid dimensions");
}
}
@@ -4952,7 +4984,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
cm->cur_frame->buf.render_height = cm->render_height;
if (pbi->need_resync) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Keyframe / intra-only frame required to reset decoder"
" state");
}
@@ -4973,13 +5005,13 @@ static int read_uncompressed_header(AV1Decoder *pbi,
read_tile_info(pbi, rb);
if (!av1_is_min_tile_width_satisfied(cm)) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Minimum tile width requirement not satisfied");
}
CommonQuantParams *const quant_params = &cm->quant_params;
setup_quantization(quant_params, av1_num_planes(cm),
- cm->seq_params.separate_uv_delta_q, rb);
+ cm->seq_params->separate_uv_delta_q, rb);
xd->bd = (int)seq_params->bit_depth;
CommonContexts *const above_contexts = &cm->above_contexts;
@@ -4990,7 +5022,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
if (av1_alloc_above_context_buffers(above_contexts, cm->tiles.rows,
cm->mi_params.mi_cols,
av1_num_planes(cm))) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(&pbi->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate context buffers");
}
}
@@ -5070,7 +5102,7 @@ static int read_uncompressed_header(AV1Decoder *pbi,
features->reduced_tx_set_used = aom_rb_read_bit(rb);
if (features->allow_ref_frame_mvs && !frame_might_allow_ref_frame_mvs(cm)) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Frame wrongly requests reference frame MVs");
}
@@ -5170,7 +5202,7 @@ uint32_t av1_decode_frame_headers_and_setup(AV1Decoder *pbi,
// Use the default frame context values.
*cm->fc = *cm->default_frame_context;
if (!cm->fc->initialized)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Uninitialized entropy context.");
}
return uncomp_hdr_size;
@@ -5180,8 +5212,8 @@ uint32_t av1_decode_frame_headers_and_setup(AV1Decoder *pbi,
av1_setup_motion_field(cm);
- av1_setup_block_planes(xd, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y, num_planes);
+ av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
+ cm->seq_params->subsampling_y, num_planes);
if (cm->features.primary_ref_frame == PRIMARY_REF_NONE) {
// use the default frame context values
*cm->fc = *cm->default_frame_context;
@@ -5189,7 +5221,7 @@ uint32_t av1_decode_frame_headers_and_setup(AV1Decoder *pbi,
*cm->fc = get_primary_ref_frame_buf(cm)->frame_context;
}
if (!cm->fc->initialized)
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Uninitialized entropy context.");
pbi->dcb.corrupted = 0;
@@ -5207,7 +5239,7 @@ static AOM_INLINE void setup_frame_info(AV1Decoder *pbi) {
av1_alloc_restoration_buffers(cm);
}
#endif
- const int use_highbd = cm->seq_params.use_highbitdepth;
+ const int use_highbd = cm->seq_params->use_highbitdepth;
const int buf_size = MC_TEMP_BUF_PELS << use_highbd;
if (pbi->td.mc_buf_size != buf_size) {
av1_free_mc_tmp_buf(&pbi->td);
@@ -5242,13 +5274,17 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
// If the bit stream is monochrome, set the U and V buffers to a constant.
if (num_planes < 3) {
- set_planes_to_neutral_grey(&cm->seq_params, xd->cur_buf, 1);
+ set_planes_to_neutral_grey(cm->seq_params, xd->cur_buf, 1);
}
if (end_tile != tiles->rows * tiles->cols - 1) {
return;
}
+ av1_alloc_cdef_buffers(cm, &pbi->cdef_worker, &pbi->cdef_sync,
+ pbi->num_workers);
+ av1_alloc_cdef_sync(cm, &pbi->cdef_sync, pbi->num_workers);
+
if (!cm->features.allow_intrabc && !tiles->single_tile_decoding) {
if (cm->lf.filter_level[0] || cm->lf.filter_level[1]) {
if (pbi->num_workers > 1) {
@@ -5257,13 +5293,13 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
#if CONFIG_LPF_MASK
1,
#endif
- pbi->tile_workers, pbi->num_workers, &pbi->lf_row_sync);
+ pbi->tile_workers, pbi->num_workers, &pbi->lf_row_sync, 0);
} else {
av1_loop_filter_frame(&cm->cur_frame->buf, cm, &pbi->dcb.xd,
#if CONFIG_LPF_MASK
1,
#endif
- 0, num_planes, 0);
+ 0, num_planes, 0, 0);
}
}
@@ -5285,7 +5321,14 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
cm, 0);
if (do_cdef) {
- av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd);
+ if (pbi->num_workers > 1) {
+ av1_cdef_frame_mt(cm, &pbi->dcb.xd, pbi->cdef_worker,
+ pbi->tile_workers, &pbi->cdef_sync,
+ pbi->num_workers, av1_cdef_init_fb_row_mt);
+ } else {
+ av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd,
+ av1_cdef_init_fb_row);
+ }
}
superres_post_decode(pbi);
@@ -5323,7 +5366,14 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
#else
if (!optimized_loop_restoration) {
if (do_cdef) {
- av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd);
+ if (pbi->num_workers > 1) {
+ av1_cdef_frame_mt(cm, &pbi->dcb.xd, pbi->cdef_worker,
+ pbi->tile_workers, &pbi->cdef_sync,
+ pbi->num_workers, av1_cdef_init_fb_row_mt);
+ } else {
+ av1_cdef_frame(&pbi->common.cur_frame->buf, cm, &pbi->dcb.xd,
+ av1_cdef_init_fb_row);
+ }
}
}
#endif // !CONFIG_REALTIME_ONLY
@@ -5339,7 +5389,7 @@ void av1_decode_tg_tiles_and_wrapup(AV1Decoder *pbi, const uint8_t *data,
av1_reset_cdf_symbol_counters(cm->fc);
}
} else {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Decode failed. Frame data is corrupted.");
}
diff --git a/third_party/libaom/source/libaom/av1/decoder/decodemv.c b/third_party/libaom/source/libaom/av1/decoder/decodemv.c
index 412be86989..839bda2be6 100644
--- a/third_party/libaom/source/libaom/av1/decoder/decodemv.c
+++ b/third_party/libaom/source/libaom/av1/decoder/decodemv.c
@@ -46,7 +46,7 @@ static void read_cdef(AV1_COMMON *cm, aom_reader *r, MACROBLOCKD *const xd) {
// At the start of a superblock, mark that we haven't yet read CDEF strengths
// for any of the CDEF units contained in this superblock.
- const int sb_mask = (cm->seq_params.mib_size - 1);
+ const int sb_mask = (cm->seq_params->mib_size - 1);
const int mi_row_in_sb = (xd->mi_row & sb_mask);
const int mi_col_in_sb = (xd->mi_col & sb_mask);
if (mi_row_in_sb == 0 && mi_col_in_sb == 0) {
@@ -61,7 +61,7 @@ static void read_cdef(AV1_COMMON *cm, aom_reader *r, MACROBLOCKD *const xd) {
const int index_mask = cdef_size;
const int cdef_unit_row_in_sb = ((xd->mi_row & index_mask) != 0);
const int cdef_unit_col_in_sb = ((xd->mi_col & index_mask) != 0);
- const int index = (cm->seq_params.sb_size == BLOCK_128X128)
+ const int index = (cm->seq_params->sb_size == BLOCK_128X128)
? cdef_unit_col_in_sb + 2 * cdef_unit_row_in_sb
: 0;
@@ -85,12 +85,12 @@ static int read_delta_qindex(AV1_COMMON *cm, const MACROBLOCKD *xd,
aom_reader *r, MB_MODE_INFO *const mbmi) {
int sign, abs, reduced_delta_qindex = 0;
BLOCK_SIZE bsize = mbmi->bsize;
- const int b_col = xd->mi_col & (cm->seq_params.mib_size - 1);
- const int b_row = xd->mi_row & (cm->seq_params.mib_size - 1);
+ const int b_col = xd->mi_col & (cm->seq_params->mib_size - 1);
+ const int b_row = xd->mi_row & (cm->seq_params->mib_size - 1);
const int read_delta_q_flag = (b_col == 0 && b_row == 0);
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
- if ((bsize != cm->seq_params.sb_size || mbmi->skip_txfm == 0) &&
+ if ((bsize != cm->seq_params->sb_size || mbmi->skip_txfm == 0) &&
read_delta_q_flag) {
abs = aom_read_symbol(r, ec_ctx->delta_q_cdf, DELTA_Q_PROBS + 1, ACCT_STR);
const int smallval = (abs < DELTA_Q_SMALL);
@@ -117,11 +117,11 @@ static int read_delta_lflevel(const AV1_COMMON *const cm, aom_reader *r,
int mi_row) {
int reduced_delta_lflevel = 0;
const BLOCK_SIZE bsize = mbmi->bsize;
- const int b_col = mi_col & (cm->seq_params.mib_size - 1);
- const int b_row = mi_row & (cm->seq_params.mib_size - 1);
+ const int b_col = mi_col & (cm->seq_params->mib_size - 1);
+ const int b_row = mi_row & (cm->seq_params->mib_size - 1);
const int read_delta_lf_flag = (b_col == 0 && b_row == 0);
- if ((bsize != cm->seq_params.sb_size || mbmi->skip_txfm == 0) &&
+ if ((bsize != cm->seq_params->sb_size || mbmi->skip_txfm == 0) &&
read_delta_lf_flag) {
int abs = aom_read_symbol(r, cdf, DELTA_LF_PROBS + 1, ACCT_STR);
const int smallval = (abs < DELTA_LF_SMALL);
@@ -579,7 +579,7 @@ static void read_palette_mode_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
aom_read_symbol(r, xd->tile_ctx->palette_y_size_cdf[bsize_ctx],
PALETTE_SIZES, ACCT_STR) +
2;
- read_palette_colors_y(xd, cm->seq_params.bit_depth, pmi, r);
+ read_palette_colors_y(xd, cm->seq_params->bit_depth, pmi, r);
}
}
if (num_planes > 1 && mbmi->uv_mode == UV_DC_PRED && xd->is_chroma_ref) {
@@ -591,7 +591,7 @@ static void read_palette_mode_info(AV1_COMMON *const cm, MACROBLOCKD *const xd,
aom_read_symbol(r, xd->tile_ctx->palette_uv_size_cdf[bsize_ctx],
PALETTE_SIZES, ACCT_STR) +
2;
- read_palette_colors_uv(xd, cm->seq_params.bit_depth, pmi, r);
+ read_palette_colors_uv(xd, cm->seq_params->bit_depth, pmi, r);
}
}
}
@@ -682,7 +682,7 @@ static INLINE int assign_dv(AV1_COMMON *cm, MACROBLOCKD *xd, int_mv *mv,
mv->as_mv.row = (mv->as_mv.row >> 3) * 8;
int valid = is_mv_valid(&mv->as_mv) &&
av1_is_dv_valid(mv->as_mv, cm, xd, mi_row, mi_col, bsize,
- cm->seq_params.mib_size_log2);
+ cm->seq_params->mib_size_log2);
return valid;
}
@@ -711,7 +711,7 @@ static void read_intrabc_info(AV1_COMMON *const cm, DecoderCodingBlock *dcb,
av1_find_best_ref_mvs(0, ref_mvs[INTRA_FRAME], &nearestmv, &nearmv, 0);
int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
if (dv_ref.as_int == 0)
- av1_find_ref_dv(&dv_ref, &xd->tile, cm->seq_params.mib_size, xd->mi_row);
+ av1_find_ref_dv(&dv_ref, &xd->tile, cm->seq_params->mib_size, xd->mi_row);
// Ref DV should not have sub-pel.
int valid_dv = (dv_ref.as_mv.col & 7) == 0 && (dv_ref.as_mv.row & 7) == 0;
dv_ref.as_mv.col = (dv_ref.as_mv.col >> 3) * 8;
@@ -816,7 +816,7 @@ static void read_intra_frame_mode_info(AV1_COMMON *const cm,
? read_angle_delta(r, ec_ctx->angle_delta_cdf[mbmi->mode - V_PRED])
: 0;
- if (!cm->seq_params.monochrome && xd->is_chroma_ref) {
+ if (!cm->seq_params->monochrome && xd->is_chroma_ref) {
mbmi->uv_mode =
read_intra_mode_uv(ec_ctx, r, is_cfl_allowed(xd), mbmi->mode);
if (mbmi->uv_mode == UV_CFL_PRED) {
@@ -1076,7 +1076,7 @@ static void read_intra_block_mode_info(AV1_COMMON *const cm,
use_angle_delta && av1_is_directional_mode(mbmi->mode)
? read_angle_delta(r, ec_ctx->angle_delta_cdf[mbmi->mode - V_PRED])
: 0;
- if (!cm->seq_params.monochrome && xd->is_chroma_ref) {
+ if (!cm->seq_params->monochrome && xd->is_chroma_ref) {
mbmi->uv_mode =
read_intra_mode_uv(ec_ctx, r, is_cfl_allowed(xd), mbmi->mode);
if (mbmi->uv_mode == UV_CFL_PRED) {
@@ -1375,7 +1375,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
aom_merge_corrupted_flag(&dcb->corrupted, mv_corrupted_flag);
mbmi->use_wedge_interintra = 0;
- if (cm->seq_params.enable_interintra_compound && !mbmi->skip_mode &&
+ if (cm->seq_params->enable_interintra_compound && !mbmi->skip_mode &&
is_interintra_allowed(mbmi)) {
const int bsize_group = size_group_lookup[bsize];
const int interintra =
@@ -1423,7 +1423,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
if (has_second_ref(mbmi) && !mbmi->skip_mode) {
// Read idx to indicate current compound inter prediction mode group
const int masked_compound_used = is_any_masked_compound_used(bsize) &&
- cm->seq_params.enable_masked_compound;
+ cm->seq_params->enable_masked_compound;
if (masked_compound_used) {
const int ctx_comp_group_idx = get_comp_group_idx_context(xd);
@@ -1432,7 +1432,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
}
if (mbmi->comp_group_idx == 0) {
- if (cm->seq_params.order_hint_info.enable_dist_wtd_comp) {
+ if (cm->seq_params->order_hint_info.enable_dist_wtd_comp) {
const int comp_index_ctx = get_comp_index_context(cm, xd);
mbmi->compound_idx = (uint8_t)aom_read_symbol(
r, ec_ctx->compound_index_cdf[comp_index_ctx], 2, ACCT_STR);
@@ -1473,7 +1473,7 @@ static void read_inter_block_mode_info(AV1Decoder *const pbi,
}
read_mb_interp_filter(xd, features->interp_filter,
- cm->seq_params.enable_dual_filter, mbmi, r);
+ cm->seq_params->enable_dual_filter, mbmi, r);
#if !CONFIG_REALTIME_ONLY
if (mbmi->motion_mode == WARPED_CAUSAL) {
@@ -1573,11 +1573,11 @@ void av1_read_mode_info(AV1Decoder *const pbi, DecoderCodingBlock *dcb,
if (frame_is_intra_only(cm)) {
read_intra_frame_mode_info(cm, dcb, r);
- if (cm->seq_params.order_hint_info.enable_ref_frame_mvs)
+ if (cm->seq_params->order_hint_info.enable_ref_frame_mvs)
intra_copy_frame_mvs(cm, xd->mi_row, xd->mi_col, x_mis, y_mis);
} else {
read_inter_frame_mode_info(pbi, dcb, r);
- if (cm->seq_params.order_hint_info.enable_ref_frame_mvs)
+ if (cm->seq_params->order_hint_info.enable_ref_frame_mvs)
av1_copy_frame_mvs(cm, mi, xd->mi_row, xd->mi_col, x_mis, y_mis);
}
}
diff --git a/third_party/libaom/source/libaom/av1/decoder/decoder.c b/third_party/libaom/source/libaom/av1/decoder/decoder.c
index 1680734a09..40dd71cea2 100644
--- a/third_party/libaom/source/libaom/av1/decoder/decoder.c
+++ b/third_party/libaom/source/libaom/av1/decoder/decoder.c
@@ -97,17 +97,19 @@ AV1Decoder *av1_decoder_create(BufferPool *const pool) {
av1_zero(*pbi);
AV1_COMMON *volatile const cm = &pbi->common;
+ cm->seq_params = &pbi->seq_params;
+ cm->error = &pbi->error;
// The jmp_buf is valid only for the duration of the function that calls
// setjmp(). Therefore, this function must reset the 'setjmp' field to 0
// before it returns.
- if (setjmp(cm->error.jmp)) {
- cm->error.setjmp = 0;
+ if (setjmp(pbi->error.jmp)) {
+ pbi->error.setjmp = 0;
av1_decoder_remove(pbi);
return NULL;
}
- cm->error.setjmp = 1;
+ pbi->error.setjmp = 1;
CHECK_MEM_ERROR(cm, cm->fc,
(FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
@@ -129,7 +131,7 @@ AV1Decoder *av1_decoder_create(BufferPool *const pool) {
pbi->decoding_first_frame = 1;
pbi->common.buffer_pool = pool;
- cm->seq_params.bit_depth = AOM_BITS_8;
+ cm->seq_params->bit_depth = AOM_BITS_8;
cm->mi_params.free_mi = dec_free_mi;
cm->mi_params.setup_mi = dec_setup_mi;
@@ -146,7 +148,7 @@ AV1Decoder *av1_decoder_create(BufferPool *const pool) {
aom_accounting_init(&pbi->accounting);
#endif
- cm->error.setjmp = 0;
+ pbi->error.setjmp = 0;
aom_get_worker_interface()->init(&pbi->lf_worker);
pbi->lf_worker.thread_name = "aom lf worker";
@@ -194,6 +196,7 @@ void av1_decoder_remove(AV1Decoder *pbi) {
}
aom_free(pbi->thread_data);
}
+ aom_free(pbi->dcb.xd.seg_mask);
for (i = 0; i < pbi->num_workers; ++i) {
AVxWorker *const worker = &pbi->tile_workers[i];
@@ -261,16 +264,16 @@ aom_codec_err_t av1_copy_reference_dec(AV1Decoder *pbi, int idx,
const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, idx);
if (cfg == NULL) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR, "No reference frame");
+ aom_internal_error(&pbi->error, AOM_CODEC_ERROR, "No reference frame");
return AOM_CODEC_ERROR;
}
if (!equal_dimensions(cfg, sd))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(&pbi->error, AOM_CODEC_ERROR,
"Incorrect buffer dimensions");
else
aom_yv12_copy_frame(cfg, sd, num_planes);
- return cm->error.error_code;
+ return pbi->error.error_code;
}
static int equal_dimensions_and_border(const YV12_BUFFER_CONFIG *a,
@@ -293,13 +296,13 @@ aom_codec_err_t av1_set_reference_dec(AV1_COMMON *cm, int idx,
ref_buf = get_ref_frame(cm, idx);
if (ref_buf == NULL) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR, "No reference frame");
+ aom_internal_error(cm->error, AOM_CODEC_ERROR, "No reference frame");
return AOM_CODEC_ERROR;
}
if (!use_external_ref) {
if (!equal_dimensions(ref_buf, sd)) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Incorrect buffer dimensions");
} else {
// Overwrite the reference frame buffer.
@@ -307,7 +310,7 @@ aom_codec_err_t av1_set_reference_dec(AV1_COMMON *cm, int idx,
}
} else {
if (!equal_dimensions_and_border(ref_buf, sd)) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Incorrect buffer dimensions");
} else {
// Overwrite the reference frame buffer pointers.
@@ -323,7 +326,7 @@ aom_codec_err_t av1_set_reference_dec(AV1_COMMON *cm, int idx,
}
}
- return cm->error.error_code;
+ return cm->error->error_code;
}
aom_codec_err_t av1_copy_new_frame_dec(AV1_COMMON *cm,
@@ -332,12 +335,12 @@ aom_codec_err_t av1_copy_new_frame_dec(AV1_COMMON *cm,
const int num_planes = av1_num_planes(cm);
if (!equal_dimensions_and_border(new_frame, sd))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Incorrect buffer dimensions");
else
aom_yv12_copy_frame(new_frame, sd, num_planes);
- return cm->error.error_code;
+ return cm->error->error_code;
}
static void release_current_frame(AV1Decoder *pbi) {
@@ -355,7 +358,7 @@ static void release_current_frame(AV1Decoder *pbi) {
// Consumes a reference to cm->cur_frame.
//
// This functions returns void. It reports failure by setting
-// cm->error.error_code.
+// pbi->error.error_code.
static void update_frame_buffers(AV1Decoder *pbi, int frame_decoded) {
int ref_index = 0, mask;
AV1_COMMON *const cm = &pbi->common;
@@ -388,7 +391,7 @@ static void update_frame_buffers(AV1Decoder *pbi, int frame_decoded) {
// error
cm->cur_frame->buf.corrupted = 1;
decrease_ref_count(cm->cur_frame, pool);
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
+ pbi->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
} else {
pbi->output_frames[pbi->num_output_frames] = cm->cur_frame;
pbi->num_output_frames++;
@@ -427,8 +430,8 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size,
const uint8_t **psource) {
AV1_COMMON *volatile const cm = &pbi->common;
const uint8_t *source = *psource;
- cm->error.error_code = AOM_CODEC_OK;
- cm->error.has_detail = 0;
+ pbi->error.error_code = AOM_CODEC_OK;
+ pbi->error.has_detail = 0;
if (size == 0) {
// This is used to signal that we are missing frames.
@@ -444,18 +447,18 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size,
}
if (assign_cur_frame_new_fb(cm) == NULL) {
- cm->error.error_code = AOM_CODEC_MEM_ERROR;
+ pbi->error.error_code = AOM_CODEC_MEM_ERROR;
return 1;
}
// The jmp_buf is valid only for the duration of the function that calls
// setjmp(). Therefore, this function must reset the 'setjmp' field to 0
// before it returns.
- if (setjmp(cm->error.jmp)) {
+ if (setjmp(pbi->error.jmp)) {
const AVxWorkerInterface *const winterface = aom_get_worker_interface();
int i;
- cm->error.setjmp = 0;
+ pbi->error.setjmp = 0;
// Synchronize all threads immediately as a subsequent decode call may
// cause a resize invalidating some allocations.
@@ -469,15 +472,15 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size,
return -1;
}
- cm->error.setjmp = 1;
+ pbi->error.setjmp = 1;
int frame_decoded =
aom_decode_frame_from_obus(pbi, source, source + size, psource);
if (frame_decoded < 0) {
- assert(cm->error.error_code != AOM_CODEC_OK);
+ assert(pbi->error.error_code != AOM_CODEC_OK);
release_current_frame(pbi);
- cm->error.setjmp = 0;
+ pbi->error.setjmp = 0;
return 1;
}
@@ -498,8 +501,8 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size,
pbi->decoding_first_frame = 0;
}
- if (cm->error.error_code != AOM_CODEC_OK) {
- cm->error.setjmp = 0;
+ if (pbi->error.error_code != AOM_CODEC_OK) {
+ pbi->error.setjmp = 0;
return 1;
}
@@ -518,7 +521,7 @@ int av1_receive_compressed_data(AV1Decoder *pbi, size_t size,
}
// Update progress in frame parallel decode.
- cm->error.setjmp = 0;
+ pbi->error.setjmp = 0;
return 0;
}
diff --git a/third_party/libaom/source/libaom/av1/decoder/decoder.h b/third_party/libaom/source/libaom/av1/decoder/decoder.h
index b20e9c1dda..226b9dca85 100644
--- a/third_party/libaom/source/libaom/av1/decoder/decoder.h
+++ b/third_party/libaom/source/libaom/av1/decoder/decoder.h
@@ -112,6 +112,8 @@ typedef struct ThreadData {
// Motion compensation buffer used to get a prediction buffer with extended
// borders. One buffer for each of the two possible references.
uint8_t *mc_buf[2];
+ // Mask for this block used for compound prediction.
+ uint8_t *seg_mask;
// Allocated size of 'mc_buf'.
int32_t mc_buf_size;
// If true, the pointers in 'mc_buf' were converted from highbd pointers.
@@ -227,6 +229,8 @@ typedef struct AV1Decoder {
AV1LfSync lf_row_sync;
AV1LrSync lr_row_sync;
AV1LrStruct lr_ctxt;
+ AV1CdefSync cdef_sync;
+ AV1CdefWorkerData *cdef_worker;
AVxWorker *tile_workers;
int num_workers;
DecWorkerData *thread_data;
@@ -330,6 +334,32 @@ typedef struct AV1Decoder {
int is_arf_frame_present;
int num_tile_groups;
aom_s_frame_info sframe_info;
+
+ /*!
+ * Elements part of the sequence header, that are applicable for all the
+ * frames in the video.
+ */
+ SequenceHeader seq_params;
+
+ /*!
+ * If true, buffer removal times are present.
+ */
+ bool buffer_removal_time_present;
+
+ /*!
+ * Code and details about current error status.
+ */
+ struct aom_internal_error_info error;
+
+ /*!
+ * Number of temporal layers: may be > 1 for SVC (scalable vector coding).
+ */
+ unsigned int number_temporal_layers;
+
+ /*!
+ * Number of spatial layers: may be > 1 for SVC (scalable vector coding).
+ */
+ unsigned int number_spatial_layers;
} AV1Decoder;
// Returns 0 on success. Sets pbi->common.error.error_code to a nonzero error
diff --git a/third_party/libaom/source/libaom/av1/decoder/obu.c b/third_party/libaom/source/libaom/av1/decoder/obu.c
index d3d1f0e8be..6c80148cc9 100644
--- a/third_party/libaom/source/libaom/av1/decoder/obu.c
+++ b/third_party/libaom/source/libaom/av1/decoder/obu.c
@@ -69,7 +69,7 @@ static int byte_alignment(AV1_COMMON *const cm,
struct aom_read_bit_buffer *const rb) {
while (rb->bit_offset & 7) {
if (aom_rb_read_bit(rb)) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ cm->error->error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
}
@@ -110,12 +110,12 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
// Use a local variable to store the information as we decode. At the end,
// if no errors have occurred, cm->seq_params is updated.
- SequenceHeader sh = cm->seq_params;
+ SequenceHeader sh = *cm->seq_params;
SequenceHeader *const seq_params = &sh;
seq_params->profile = av1_read_profile(rb);
if (seq_params->profile > CONFIG_MAX_DECODE_PROFILE) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
+ pbi->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
return 0;
}
@@ -124,7 +124,7 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
seq_params->reduced_still_picture_hdr = aom_rb_read_bit(rb);
// Video must have reduced_still_picture_hdr = 0
if (!seq_params->still_picture && seq_params->reduced_still_picture_hdr) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
+ pbi->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
return 0;
}
@@ -135,7 +135,7 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
seq_params->operating_points_cnt_minus_1 = 0;
seq_params->operating_point_idc[0] = 0;
if (!read_bitstream_level(&seq_params->seq_level_idx[0], rb)) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
+ pbi->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
return 0;
}
seq_params->tier[0] = 0;
@@ -144,7 +144,7 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
} else {
seq_params->timing_info_present = aom_rb_read_bit(rb);
if (seq_params->timing_info_present) {
- av1_read_timing_info_header(&seq_params->timing_info, &cm->error, rb);
+ av1_read_timing_info_header(&seq_params->timing_info, &pbi->error, rb);
seq_params->decoder_model_info_present_flag = aom_rb_read_bit(rb);
if (seq_params->decoder_model_info_present_flag)
@@ -159,7 +159,7 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
seq_params->operating_point_idc[i] =
aom_rb_read_literal(rb, OP_POINTS_IDC_BITS);
if (!read_bitstream_level(&seq_params->seq_level_idx[i], rb)) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
+ pbi->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
return 0;
}
// This is the seq_level_idx[i] > 7 check in the spec. seq_level_idx 7
@@ -188,7 +188,7 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
// Level with seq_level_idx = 31 returns a high "dummy" bitrate to pass
// the check
if (seq_params->op_params[i].bitrate == 0)
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(&pbi->error, AOM_CODEC_UNSUP_BITSTREAM,
"AV1 does not support this combination of "
"profile, level, and tier.");
// Buffer size in bits/s is bitrate in bits/s * 1 s
@@ -212,7 +212,7 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
aom_rb_read_literal(rb, 4) + 1;
if (seq_params->op_params[i].initial_display_delay > 10)
aom_internal_error(
- &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ &pbi->error, AOM_CODEC_UNSUP_BITSTREAM,
"AV1 does not support more than 10 decoded frames delay");
} else {
seq_params->op_params[i].initial_display_delay = 10;
@@ -232,19 +232,19 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
pbi->current_operating_point =
seq_params->operating_point_idc[operating_point];
if (aom_get_num_layers_from_operating_point_idc(
- pbi->current_operating_point, &cm->number_spatial_layers,
- &cm->number_temporal_layers) != AOM_CODEC_OK) {
- cm->error.error_code = AOM_CODEC_ERROR;
+ pbi->current_operating_point, &pbi->number_spatial_layers,
+ &pbi->number_temporal_layers) != AOM_CODEC_OK) {
+ pbi->error.error_code = AOM_CODEC_ERROR;
return 0;
}
av1_read_sequence_header(cm, rb, seq_params);
- av1_read_color_config(rb, pbi->allow_lowbitdepth, seq_params, &cm->error);
+ av1_read_color_config(rb, pbi->allow_lowbitdepth, seq_params, &pbi->error);
if (!(seq_params->subsampling_x == 0 && seq_params->subsampling_y == 0) &&
!(seq_params->subsampling_x == 1 && seq_params->subsampling_y == 1) &&
!(seq_params->subsampling_x == 1 && seq_params->subsampling_y == 0)) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(&pbi->error, AOM_CODEC_UNSUP_BITSTREAM,
"Only 4:4:4, 4:2:2 and 4:2:0 are currently supported, "
"%d %d subsampling is not supported.\n",
seq_params->subsampling_x, seq_params->subsampling_y);
@@ -253,18 +253,18 @@ static uint32_t read_sequence_header_obu(AV1Decoder *pbi,
seq_params->film_grain_params_present = aom_rb_read_bit(rb);
if (av1_check_trailing_bits(pbi, rb) != 0) {
- // cm->error.error_code is already set.
+ // pbi->error.error_code is already set.
return 0;
}
// If a sequence header has been decoded before, we check if the new
// one is consistent with the old one.
if (pbi->sequence_header_ready) {
- if (!are_seq_headers_consistent(&cm->seq_params, seq_params))
+ if (!are_seq_headers_consistent(cm->seq_params, seq_params))
pbi->sequence_header_changed = 1;
}
- cm->seq_params = *seq_params;
+ *cm->seq_params = *seq_params;
pbi->sequence_header_ready = 1;
return ((rb->bit_offset - saved_bit_offset + 7) >> 3);
@@ -303,7 +303,7 @@ static int32_t read_tile_group_header(AV1Decoder *pbi,
tile_start_and_end_present_flag = aom_rb_read_bit(rb);
if (tile_start_implicit && tile_start_and_end_present_flag) {
aom_internal_error(
- &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ &pbi->error, AOM_CODEC_UNSUP_BITSTREAM,
"For OBU_FRAME type obu tile_start_and_end_present_flag must be 0");
return -1;
}
@@ -318,20 +318,20 @@ static int32_t read_tile_group_header(AV1Decoder *pbi,
*end_tile = aom_rb_read_literal(rb, tile_bits);
}
if (*start_tile != pbi->next_start_tile) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"tg_start (%d) must be equal to %d", *start_tile,
pbi->next_start_tile);
return -1;
}
if (*start_tile > *end_tile) {
aom_internal_error(
- &cm->error, AOM_CODEC_CORRUPT_FRAME,
+ &pbi->error, AOM_CODEC_CORRUPT_FRAME,
"tg_end (%d) must be greater than or equal to tg_start (%d)", *end_tile,
*start_tile);
return -1;
}
if (*end_tile >= num_tiles) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"tg_end (%d) must be less than NumTiles (%d)", *end_tile,
num_tiles);
return -1;
@@ -388,15 +388,16 @@ static void alloc_tile_list_buffer(AV1Decoder *pbi) {
(pbi->output_frame_height_in_tiles_minus_1 + 1));
// Allocate the tile list output buffer.
- // Note: if cm->seq_params.use_highbitdepth is 1 and cm->seq_params.bit_depth
- // is 8, we could allocate less memory, namely, 8 bits/pixel.
+ // Note: if cm->seq_params->use_highbitdepth is 1 and
+ // cm->seq_params->bit_depth is 8, we could allocate less memory, namely, 8
+ // bits/pixel.
if (aom_alloc_frame_buffer(&pbi->tile_list_outbuf, output_frame_width,
- output_frame_height, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y,
- (cm->seq_params.use_highbitdepth &&
- (cm->seq_params.bit_depth > AOM_BITS_8)),
+ output_frame_height, cm->seq_params->subsampling_x,
+ cm->seq_params->subsampling_y,
+ (cm->seq_params->use_highbitdepth &&
+ (cm->seq_params->bit_depth > AOM_BITS_8)),
0, cm->features.byte_alignment))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(&pbi->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate the tile list output buffer");
}
@@ -430,8 +431,8 @@ static void copy_decoded_tile_to_tile_list_buffer(AV1Decoder *pbi,
av1_get_uniform_tile_size(cm, &tile_width, &tile_height);
const int tile_width_in_pixels = tile_width * MI_SIZE;
const int tile_height_in_pixels = tile_height * MI_SIZE;
- const int ssy = cm->seq_params.subsampling_y;
- const int ssx = cm->seq_params.subsampling_x;
+ const int ssy = cm->seq_params->subsampling_y;
+ const int ssx = cm->seq_params->subsampling_x;
const int num_planes = av1_num_planes(cm);
YV12_BUFFER_CONFIG *cur_frame = &cm->cur_frame->buf;
@@ -455,8 +456,8 @@ static void copy_decoded_tile_to_tile_list_buffer(AV1Decoder *pbi,
int vstart2 = tr * h;
int hstart2 = tc * w;
- if (cm->seq_params.use_highbitdepth &&
- cm->seq_params.bit_depth == AOM_BITS_8) {
+ if (cm->seq_params->use_highbitdepth &&
+ cm->seq_params->bit_depth == AOM_BITS_8) {
yv12_tile_copy(cur_frame, hstart1, hend1, vstart1, vend1,
&pbi->tile_list_outbuf, hstart2, vstart2, plane);
} else {
@@ -501,7 +502,7 @@ static uint32_t read_and_decode_one_tile_list(AV1Decoder *pbi,
pbi->output_frame_height_in_tiles_minus_1 = aom_rb_read_literal(rb, 8);
pbi->tile_count_minus_1 = aom_rb_read_literal(rb, 16);
if (pbi->tile_count_minus_1 > MAX_TILES - 1) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return 0;
}
@@ -524,7 +525,7 @@ static uint32_t read_and_decode_one_tile_list(AV1Decoder *pbi,
// Set reference for each tile.
int ref_idx = aom_rb_read_literal(rb, 8);
if (ref_idx >= MAX_EXTERNAL_REFERENCES) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return 0;
}
av1_set_reference_dec(cm, cm->remapped_ref_idx[0], 1,
@@ -535,14 +536,14 @@ static uint32_t read_and_decode_one_tile_list(AV1Decoder *pbi,
if (pbi->dec_tile_row < 0 || pbi->dec_tile_col < 0 ||
pbi->dec_tile_row >= cm->tiles.rows ||
pbi->dec_tile_col >= cm->tiles.cols) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return 0;
}
pbi->coded_tile_data_size = aom_rb_read_literal(rb, 16) + 1;
data += tile_info_bytes;
if ((size_t)(data_end - data) < pbi->coded_tile_data_size) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return 0;
}
@@ -581,18 +582,17 @@ static void alloc_read_metadata(AV1Decoder *const pbi,
OBU_METADATA_TYPE metadata_type,
const uint8_t *data, size_t sz,
aom_metadata_insert_flags_t insert_flag) {
- AV1_COMMON *const cm = &pbi->common;
if (!pbi->metadata) {
pbi->metadata = aom_img_metadata_array_alloc(0);
if (!pbi->metadata) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(&pbi->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate metadata array");
}
}
aom_metadata_t *metadata =
aom_img_metadata_alloc(metadata_type, data, sz, insert_flag);
if (!metadata) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(&pbi->error, AOM_CODEC_MEM_ERROR,
"Error allocating metadata");
}
aom_metadata_t **metadata_array =
@@ -600,7 +600,7 @@ static void alloc_read_metadata(AV1Decoder *const pbi,
(pbi->metadata->sz + 1) * sizeof(metadata));
if (!metadata_array) {
aom_img_metadata_free(metadata);
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(&pbi->error, AOM_CODEC_MEM_ERROR,
"Error growing metadata array");
}
pbi->metadata->metadata_array = metadata_array;
@@ -611,22 +611,21 @@ static void alloc_read_metadata(AV1Decoder *const pbi,
// On failure, calls aom_internal_error() and does not return.
static void read_metadata_itut_t35(AV1Decoder *const pbi, const uint8_t *data,
size_t sz) {
- AV1_COMMON *const cm = &pbi->common;
if (sz == 0) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"itu_t_t35_country_code is missing");
}
int country_code_size = 1;
if (*data == 0xFF) {
if (sz == 1) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"itu_t_t35_country_code_extension_byte is missing");
}
++country_code_size;
}
int end_index = get_last_nonzero_byte_index(data, sz);
if (end_index < country_code_size) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"No trailing bits found in ITU-T T.35 metadata OBU");
}
// itu_t_t35_payload_bytes is byte aligned. Section 6.7.2 of the spec says:
@@ -634,7 +633,7 @@ static void read_metadata_itut_t35(AV1Decoder *const pbi, const uint8_t *data,
// specified in Recommendation ITU-T T.35.
// Therefore the first trailing byte should be 0x80.
if (data[end_index] != 0x80) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"The last nonzero byte of the ITU-T T.35 metadata OBU "
"is 0x%02x, should be 0x80.",
data[end_index]);
@@ -648,9 +647,8 @@ static void read_metadata_itut_t35(AV1Decoder *const pbi, const uint8_t *data,
static size_t read_metadata_hdr_cll(AV1Decoder *const pbi, const uint8_t *data,
size_t sz) {
const size_t kHdrCllPayloadSize = 4;
- AV1_COMMON *const cm = &pbi->common;
if (sz < kHdrCllPayloadSize) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Incorrect HDR CLL metadata payload size");
}
alloc_read_metadata(pbi, OBU_METADATA_TYPE_HDR_CLL, data, kHdrCllPayloadSize,
@@ -663,9 +661,8 @@ static size_t read_metadata_hdr_cll(AV1Decoder *const pbi, const uint8_t *data,
static size_t read_metadata_hdr_mdcv(AV1Decoder *const pbi, const uint8_t *data,
size_t sz) {
const size_t kMdcvPayloadSize = 24;
- AV1_COMMON *const cm = &pbi->common;
if (sz < kMdcvPayloadSize) {
- aom_internal_error(&cm->error, AOM_CODEC_CORRUPT_FRAME,
+ aom_internal_error(&pbi->error, AOM_CODEC_CORRUPT_FRAME,
"Incorrect HDR MDCV metadata payload size");
}
alloc_read_metadata(pbi, OBU_METADATA_TYPE_HDR_MDCV, data, kMdcvPayloadSize,
@@ -770,11 +767,10 @@ static uint8_t get_last_nonzero_byte(const uint8_t *data, size_t sz) {
// pbi->common.error.error_code and returns 0, or calls aom_internal_error()
// and does not return.
static size_t read_metadata(AV1Decoder *pbi, const uint8_t *data, size_t sz) {
- AV1_COMMON *const cm = &pbi->common;
size_t type_length;
uint64_t type_value;
if (aom_uleb_decode(data, sz, &type_value, &type_length) < 0) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return 0;
}
const OBU_METADATA_TYPE metadata_type = (OBU_METADATA_TYPE)type_value;
@@ -782,7 +778,7 @@ static size_t read_metadata(AV1Decoder *pbi, const uint8_t *data, size_t sz) {
// If metadata_type is reserved for future use or a user private value,
// ignore the entire OBU and just check trailing bits.
if (get_last_nonzero_byte(data + type_length, sz - type_length) == 0) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return 0;
}
return sz;
@@ -796,7 +792,7 @@ static size_t read_metadata(AV1Decoder *pbi, const uint8_t *data, size_t sz) {
type_length +
read_metadata_hdr_cll(pbi, data + type_length, sz - type_length);
if (get_last_nonzero_byte(data + bytes_read, sz - bytes_read) != 0x80) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return 0;
}
return sz;
@@ -805,7 +801,7 @@ static size_t read_metadata(AV1Decoder *pbi, const uint8_t *data, size_t sz) {
type_length +
read_metadata_hdr_mdcv(pbi, data + type_length, sz - type_length);
if (get_last_nonzero_byte(data + bytes_read, sz - bytes_read) != 0x80) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return 0;
}
return sz;
@@ -820,7 +816,7 @@ static size_t read_metadata(AV1Decoder *pbi, const uint8_t *data, size_t sz) {
read_metadata_timecode(&rb);
}
if (av1_check_trailing_bits(pbi, &rb) != 0) {
- // cm->error.error_code is already set.
+ // pbi->error.error_code is already set.
return 0;
}
assert((rb.bit_offset & 7) == 0);
@@ -838,7 +834,7 @@ static size_t read_padding(AV1_COMMON *const cm, const uint8_t *data,
// trailing byte should be 0x80. See https://crbug.com/aomedia/2393.
const uint8_t last_nonzero_byte = get_last_nonzero_byte(data, sz);
if (last_nonzero_byte != 0x80) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ cm->error->error_code = AOM_CODEC_CORRUPT_FRAME;
return 0;
}
}
@@ -846,7 +842,7 @@ static size_t read_padding(AV1_COMMON *const cm, const uint8_t *data,
}
// On success, returns a boolean that indicates whether the decoding of the
-// current frame is finished. On failure, sets cm->error.error_code and
+// current frame is finished. On failure, sets pbi->error.error_code and
// returns -1.
int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
const uint8_t *data_end,
@@ -872,7 +868,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
pbi->num_tile_groups = 0;
if (data_end < data) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
@@ -880,7 +876,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
if (!cm->tiles.large_scale) pbi->camera_frame_header_ready = 0;
// decode frame as a series of OBUs
- while (!frame_decoding_finished && cm->error.error_code == AOM_CODEC_OK) {
+ while (!frame_decoding_finished && pbi->error.error_code == AOM_CODEC_OK) {
struct aom_read_bit_buffer rb;
size_t payload_size = 0;
size_t decoded_payload_size = 0;
@@ -890,7 +886,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
if (bytes_available == 0 && !pbi->seen_frame_header) {
*p_data_end = data;
- cm->error.error_code = AOM_CODEC_OK;
+ pbi->error.error_code = AOM_CODEC_OK;
break;
}
@@ -899,7 +895,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
&obu_header, &payload_size, &bytes_read);
if (status != AOM_CODEC_OK) {
- cm->error.error_code = status;
+ pbi->error.error_code = status;
return -1;
}
@@ -912,7 +908,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
data += bytes_read;
if ((size_t)(data_end - data) < payload_size) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
@@ -936,16 +932,16 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
if (pbi->seen_frame_header) {
// A new temporal unit has started, but the frame in the previous
// temporal unit is incomplete.
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
break;
case OBU_SEQUENCE_HEADER:
decoded_payload_size = read_sequence_header_obu(pbi, &rb);
- if (cm->error.error_code != AOM_CODEC_OK) return -1;
+ if (pbi->error.error_code != AOM_CODEC_OK) return -1;
// The sequence header should not change in the middle of a frame.
if (pbi->sequence_header_changed && pbi->seen_frame_header) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
break;
@@ -954,13 +950,13 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
case OBU_FRAME:
if (obu_header.type == OBU_REDUNDANT_FRAME_HEADER) {
if (!pbi->seen_frame_header) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
} else {
// OBU_FRAME_HEADER or OBU_FRAME.
if (pbi->seen_frame_header) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
}
@@ -978,7 +974,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
// frame_header_obu.
if (frame_header_size > payload_size ||
memcmp(data, frame_header, frame_header_size) != 0) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
assert(rb.bit_offset == 0);
@@ -990,7 +986,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
if (cm->show_existing_frame) {
if (obu_header.type == OBU_FRAME) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
+ pbi->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
return -1;
}
frame_decoding_finished = 1;
@@ -1012,23 +1008,23 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
if (obu_header.type != OBU_FRAME) break;
obu_payload_offset = frame_header_size;
// Byte align the reader before reading the tile group.
- // byte_alignment() has set cm->error.error_code if it returns -1.
+ // byte_alignment() has set pbi->error.error_code if it returns -1.
if (byte_alignment(cm, &rb)) return -1;
AOM_FALLTHROUGH_INTENDED; // fall through to read tile group.
case OBU_TILE_GROUP:
if (!pbi->seen_frame_header) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
if (obu_payload_offset > payload_size) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
decoded_payload_size += read_one_tile_group_obu(
pbi, &rb, is_first_tg_obu_received, data + obu_payload_offset,
data + payload_size, p_data_end, &frame_decoding_finished,
obu_header.type == OBU_FRAME);
- if (cm->error.error_code != AOM_CODEC_OK) return -1;
+ if (pbi->error.error_code != AOM_CODEC_OK) return -1;
is_first_tg_obu_received = 0;
if (frame_decoding_finished) {
pbi->seen_frame_header = 0;
@@ -1038,18 +1034,18 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
break;
case OBU_METADATA:
decoded_payload_size = read_metadata(pbi, data, payload_size);
- if (cm->error.error_code != AOM_CODEC_OK) return -1;
+ if (pbi->error.error_code != AOM_CODEC_OK) return -1;
break;
case OBU_TILE_LIST:
if (CONFIG_NORMAL_TILE_MODE) {
- cm->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
+ pbi->error.error_code = AOM_CODEC_UNSUP_BITSTREAM;
return -1;
}
// This OBU type is purely for the large scale tile coding mode.
// The common camera frame header has to be already decoded.
if (!pbi->camera_frame_header_ready) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
@@ -1058,17 +1054,17 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
decoded_payload_size =
read_and_decode_one_tile_list(pbi, &rb, data, data + payload_size,
p_data_end, &frame_decoding_finished);
- if (cm->error.error_code != AOM_CODEC_OK) return -1;
+ if (pbi->error.error_code != AOM_CODEC_OK) return -1;
break;
case OBU_PADDING:
decoded_payload_size = read_padding(cm, data, payload_size);
- if (cm->error.error_code != AOM_CODEC_OK) return -1;
+ if (pbi->error.error_code != AOM_CODEC_OK) return -1;
break;
default:
// Skip unrecognized OBUs
if (payload_size > 0 &&
get_last_nonzero_byte(data, payload_size) == 0) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
decoded_payload_size = payload_size;
@@ -1077,7 +1073,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
// Check that the signalled OBU size matches the actual amount of data read
if (decoded_payload_size > payload_size) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
@@ -1085,7 +1081,7 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
while (decoded_payload_size < payload_size) {
uint8_t padding_byte = data[decoded_payload_size++];
if (padding_byte != 0) {
- cm->error.error_code = AOM_CODEC_CORRUPT_FRAME;
+ pbi->error.error_code = AOM_CODEC_CORRUPT_FRAME;
return -1;
}
}
@@ -1093,6 +1089,6 @@ int aom_decode_frame_from_obus(struct AV1Decoder *pbi, const uint8_t *data,
data += payload_size;
}
- if (cm->error.error_code != AOM_CODEC_OK) return -1;
+ if (pbi->error.error_code != AOM_CODEC_OK) return -1;
return frame_decoding_finished;
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/aq_complexity.c b/third_party/libaom/source/libaom/av1/encoder/aq_complexity.c
index 3ea5f63020..278e1ca92f 100644
--- a/third_party/libaom/source/libaom/av1/encoder/aq_complexity.c
+++ b/third_party/libaom/source/libaom/av1/encoder/aq_complexity.c
@@ -81,7 +81,7 @@ void av1_setup_in_frame_q_adj(AV1_COMP *cpi) {
if (is_frame_aq_enabled(cpi)) {
int segment;
const int aq_strength =
- get_aq_c_strength(base_qindex, cm->seq_params.bit_depth);
+ get_aq_c_strength(base_qindex, cm->seq_params->bit_depth);
// Clear down the segment map.
memset(cpi->enc_seg.map, DEFAULT_AQ2_SEG,
@@ -108,7 +108,7 @@ void av1_setup_in_frame_q_adj(AV1_COMP *cpi) {
qindex_delta = av1_compute_qdelta_by_rate(
&cpi->rc, cm->current_frame.frame_type, base_qindex,
aq_c_q_adj_factor[aq_strength][segment], cpi->is_screen_content_type,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
// For AQ complexity mode, we dont allow Q0 in a segment if the base
// Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment
@@ -150,17 +150,17 @@ void av1_caq_select_segment(const AV1_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs,
// It is converted to bits << AV1_PROB_COST_SHIFT units.
const int64_t num = (int64_t)(cpi->rc.sb64_target_rate * xmis * ymis)
<< AV1_PROB_COST_SHIFT;
- const int denom = cm->seq_params.mib_size * cm->seq_params.mib_size;
+ const int denom = cm->seq_params->mib_size * cm->seq_params->mib_size;
const int target_rate = (int)(num / denom);
double logvar;
double low_var_thresh;
const int aq_strength = get_aq_c_strength(cm->quant_params.base_qindex,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
aom_clear_system_state();
low_var_thresh =
(is_stat_consumption_stage_twopass(cpi))
- ? AOMMAX(exp(cpi->twopass.mb_av_energy), MIN_DEFAULT_LV_THRESH)
+ ? AOMMAX(exp(cpi->ppi->twopass.mb_av_energy), MIN_DEFAULT_LV_THRESH)
: DEFAULT_LV_THRESH;
av1_setup_src_planes(mb, cpi->source, mi_row, mi_col, num_planes, bs);
diff --git a/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.c b/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.c
index c7abe43c87..40b8c254d4 100644
--- a/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.c
+++ b/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.c
@@ -12,6 +12,7 @@
#include <limits.h>
#include <math.h>
+#include "av1/common/pred_common.h"
#include "av1/common/seg_common.h"
#include "av1/encoder/aq_cyclicrefresh.h"
#include "av1/encoder/ratectrl.h"
@@ -82,7 +83,7 @@ static int compute_deltaq(const AV1_COMP *cpi, int q, double rate_factor) {
const RATE_CONTROL *const rc = &cpi->rc;
int deltaq = av1_compute_qdelta_by_rate(
rc, cpi->common.current_frame.frame_type, q, rate_factor,
- cpi->is_screen_content_type, cpi->common.seq_params.bit_depth);
+ cpi->is_screen_content_type, cpi->common.seq_params->bit_depth);
if ((-deltaq) > cr->max_qdelta_perc * q / 100) {
deltaq = -cr->max_qdelta_perc * q / 100;
}
@@ -94,7 +95,7 @@ int av1_cyclic_refresh_estimate_bits_at_q(const AV1_COMP *cpi,
const AV1_COMMON *const cm = &cpi->common;
const FRAME_TYPE frame_type = cm->current_frame.frame_type;
const int base_qindex = cm->quant_params.base_qindex;
- const int bit_depth = cm->seq_params.bit_depth;
+ const int bit_depth = cm->seq_params->bit_depth;
const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
const int mbs = cm->mi_params.MBs;
const int num4x4bl = mbs << 4;
@@ -138,15 +139,51 @@ int av1_cyclic_refresh_rc_bits_per_mb(const AV1_COMP *cpi, int i,
bits_per_mb =
(int)((1.0 - weight_segment) *
av1_rc_bits_per_mb(cm->current_frame.frame_type, i,
- correction_factor, cm->seq_params.bit_depth,
+ correction_factor, cm->seq_params->bit_depth,
cpi->is_screen_content_type) +
weight_segment * av1_rc_bits_per_mb(cm->current_frame.frame_type,
i + deltaq, correction_factor,
- cm->seq_params.bit_depth,
+ cm->seq_params->bit_depth,
cpi->is_screen_content_type));
return bits_per_mb;
}
+void av1_cyclic_reset_segment_skip(const AV1_COMP *cpi, MACROBLOCK *const x,
+ int mi_row, int mi_col, BLOCK_SIZE bsize) {
+ int cdf_num;
+ const AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MB_MODE_INFO *const mbmi = xd->mi[0];
+ const int prev_segment_id = mbmi->segment_id;
+ mbmi->segment_id = av1_get_spatial_seg_pred(cm, xd, &cdf_num);
+ if (prev_segment_id != mbmi->segment_id) {
+ CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
+ const int bw = mi_size_wide[bsize];
+ const int bh = mi_size_high[bsize];
+ const int xmis = AOMMIN(cm->mi_params.mi_cols - mi_col, bw);
+ const int ymis = AOMMIN(cm->mi_params.mi_rows - mi_row, bh);
+ const int block_index = mi_row * cm->mi_params.mi_cols + mi_col;
+ for (int mi_y = 0; mi_y < ymis; mi_y++) {
+ for (int mi_x = 0; mi_x < xmis; mi_x++) {
+ const int map_offset =
+ block_index + mi_y * cm->mi_params.mi_cols + mi_x;
+ cr->map[map_offset] = 0;
+ cpi->enc_seg.map[map_offset] = mbmi->segment_id;
+ cm->cur_frame->seg_map[map_offset] = mbmi->segment_id;
+ }
+ }
+ if (cyclic_refresh_segment_id(prev_segment_id) == CR_SEGMENT_ID_BOOST1)
+ x->actual_num_seg1_blocks -= xmis * ymis;
+ else if (cyclic_refresh_segment_id(prev_segment_id) == CR_SEGMENT_ID_BOOST2)
+ x->actual_num_seg2_blocks -= xmis * ymis;
+ if (cyclic_refresh_segment_id(mbmi->segment_id) == CR_SEGMENT_ID_BOOST1)
+ x->actual_num_seg1_blocks += xmis * ymis;
+ else if (cyclic_refresh_segment_id(mbmi->segment_id) ==
+ CR_SEGMENT_ID_BOOST2)
+ x->actual_num_seg2_blocks += xmis * ymis;
+ }
+}
+
void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi, MACROBLOCK *const x,
int mi_row, int mi_col, BLOCK_SIZE bsize,
int64_t rate, int64_t dist, int skip,
@@ -191,22 +228,21 @@ void av1_cyclic_refresh_update_segment(const AV1_COMP *cpi, MACROBLOCK *const x,
// Update entries in the cyclic refresh map with new_map_value, and
// copy mbmi->segment_id into global segmentation map.
- // 8x8 is smallest coding block size for non-key frames.
- const int sh = bw << 1;
- for (int mi_y = 0; mi_y < ymis; mi_y += 2) {
- for (int mi_x = 0; mi_x < xmis; mi_x += 2) {
- int map_offset = block_index + mi_y * cm->mi_params.mi_cols + mi_x;
+ for (int mi_y = 0; mi_y < ymis; mi_y++) {
+ for (int mi_x = 0; mi_x < xmis; mi_x++) {
+ const int map_offset = block_index + mi_y * cm->mi_params.mi_cols + mi_x;
cr->map[map_offset] = new_map_value;
cpi->enc_seg.map[map_offset] = mbmi->segment_id;
+ cm->cur_frame->seg_map[map_offset] = mbmi->segment_id;
}
- // Accumulate cyclic refresh update counters.
- if (!dry_run && !frame_is_intra_only(cm)) {
- if (cyclic_refresh_segment_id(mbmi->segment_id) == CR_SEGMENT_ID_BOOST1)
- x->actual_num_seg1_blocks += sh;
- else if (cyclic_refresh_segment_id(mbmi->segment_id) ==
- CR_SEGMENT_ID_BOOST2)
- x->actual_num_seg2_blocks += sh;
- }
+ }
+ // Accumulate cyclic refresh update counters.
+ if (!dry_run) {
+ if (cyclic_refresh_segment_id(mbmi->segment_id) == CR_SEGMENT_ID_BOOST1)
+ x->actual_num_seg1_blocks += xmis * ymis;
+ else if (cyclic_refresh_segment_id(mbmi->segment_id) ==
+ CR_SEGMENT_ID_BOOST2)
+ x->actual_num_seg2_blocks += xmis * ymis;
}
}
@@ -234,15 +270,15 @@ void av1_cyclic_refresh_postencode(AV1_COMP *const cpi) {
const int avg_cnt_zeromv =
100 * cr->cnt_zeromv / (mi_params->mi_rows * mi_params->mi_cols);
- if (!cpi->use_svc ||
- (cpi->use_svc &&
+ if (!cpi->ppi->use_svc ||
+ (cpi->ppi->use_svc &&
!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) {
rc->avg_frame_low_motion =
(3 * rc->avg_frame_low_motion + avg_cnt_zeromv) / 4;
// For SVC: set avg_frame_low_motion (only computed on top spatial layer)
// to all lower spatial layers.
- if (cpi->use_svc &&
+ if (cpi->ppi->use_svc &&
svc->spatial_layer_id == svc->number_spatial_layers - 1) {
for (int i = 0; i < svc->number_spatial_layers - 1; ++i) {
const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id,
@@ -257,15 +293,16 @@ void av1_cyclic_refresh_postencode(AV1_COMP *const cpi) {
void av1_cyclic_refresh_set_golden_update(AV1_COMP *const cpi) {
RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
CYCLIC_REFRESH *const cr = cpi->cyclic_refresh;
// Set minimum gf_interval for GF update to a multiple of the refresh period,
// with some max limit. Depending on past encoding stats, GF flag may be
// reset and update may not occur until next baseline_gf_interval.
if (cr->percent_refresh > 0)
- rc->baseline_gf_interval = AOMMIN(2 * (100 / cr->percent_refresh), 40);
+ p_rc->baseline_gf_interval = AOMMIN(2 * (100 / cr->percent_refresh), 40);
else
- rc->baseline_gf_interval = 20;
- if (rc->avg_frame_low_motion < 40) rc->baseline_gf_interval = 8;
+ p_rc->baseline_gf_interval = 20;
+ if (rc->avg_frame_low_motion < 40) p_rc->baseline_gf_interval = 8;
}
// Update the segmentation map, and related quantities: cyclic refresh map,
@@ -282,10 +319,10 @@ static void cyclic_refresh_update_map(AV1_COMP *const cpi) {
int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame;
int xmis, ymis, x, y;
memset(seg_map, CR_SEGMENT_ID_BASE, mi_params->mi_rows * mi_params->mi_cols);
- sb_cols = (mi_params->mi_cols + cm->seq_params.mib_size - 1) /
- cm->seq_params.mib_size;
- sb_rows = (mi_params->mi_rows + cm->seq_params.mib_size - 1) /
- cm->seq_params.mib_size;
+ sb_cols = (mi_params->mi_cols + cm->seq_params->mib_size - 1) /
+ cm->seq_params->mib_size;
+ sb_rows = (mi_params->mi_rows + cm->seq_params->mib_size - 1) /
+ cm->seq_params->mib_size;
sbs_in_frame = sb_cols * sb_rows;
// Number of target blocks to get the q delta (segment 1).
block_count =
@@ -302,8 +339,8 @@ static void cyclic_refresh_update_map(AV1_COMP *const cpi) {
// Get the mi_row/mi_col corresponding to superblock index i.
int sb_row_index = (i / sb_cols);
int sb_col_index = i - sb_row_index * sb_cols;
- int mi_row = sb_row_index * cm->seq_params.mib_size;
- int mi_col = sb_col_index * cm->seq_params.mib_size;
+ int mi_row = sb_row_index * cm->seq_params->mib_size;
+ int mi_col = sb_col_index * cm->seq_params->mib_size;
// TODO(any): Ensure the population of
// cpi->common.features.allow_screen_content_tools and use the same instead
// of cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN
@@ -315,8 +352,8 @@ static void cyclic_refresh_update_map(AV1_COMP *const cpi) {
assert(mi_col >= 0 && mi_col < mi_params->mi_cols);
bl_index = mi_row * mi_params->mi_cols + mi_col;
// Loop through all MI blocks in superblock and update map.
- xmis = AOMMIN(mi_params->mi_cols - mi_col, cm->seq_params.mib_size);
- ymis = AOMMIN(mi_params->mi_rows - mi_row, cm->seq_params.mib_size);
+ xmis = AOMMIN(mi_params->mi_cols - mi_col, cm->seq_params->mib_size);
+ ymis = AOMMIN(mi_params->mi_rows - mi_row, cm->seq_params->mib_size);
// cr_map only needed at 8x8 blocks.
for (y = 0; y < ymis; y += 2) {
for (x = 0; x < xmis; x += 2) {
@@ -361,11 +398,20 @@ void av1_cyclic_refresh_update_parameters(AV1_COMP *const cpi) {
int qp_thresh = AOMMIN(20, rc->best_quality << 1);
int qp_max_thresh = 118 * MAXQ >> 7;
cr->apply_cyclic_refresh = 1;
+ int avg_frame_qindex_inter_frame;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ avg_frame_qindex_inter_frame =
+ (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
+ ? cpi->ppi->temp_avg_frame_qindex[INTER_FRAME]
+ : rc->avg_frame_qindex[INTER_FRAME];
+#else
+ avg_frame_qindex_inter_frame = rc->avg_frame_qindex[INTER_FRAME];
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
if (frame_is_intra_only(cm) || is_lossless_requested(&cpi->oxcf.rc_cfg) ||
cpi->svc.temporal_layer_id > 0 ||
- rc->avg_frame_qindex[INTER_FRAME] < qp_thresh ||
+ avg_frame_qindex_inter_frame < qp_thresh ||
(rc->frames_since_key > 20 &&
- rc->avg_frame_qindex[INTER_FRAME] > qp_max_thresh) ||
+ avg_frame_qindex_inter_frame > qp_max_thresh) ||
(rc->avg_frame_low_motion < 45 && rc->frames_since_key > 40)) {
cr->apply_cyclic_refresh = 0;
return;
@@ -446,7 +492,7 @@ void av1_cyclic_refresh_setup(AV1_COMP *const cpi) {
return;
} else {
const double q = av1_convert_qindex_to_q(cm->quant_params.base_qindex,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
aom_clear_system_state();
// Set rate threshold to some multiple (set to 2 for now) of the target
// rate (target is given by sb64_target_rate and scaled by 256).
diff --git a/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.h b/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.h
index 97bd6f26b1..1c0d5cb4d7 100644
--- a/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.h
+++ b/third_party/libaom/source/libaom/av1/encoder/aq_cyclicrefresh.h
@@ -161,6 +161,30 @@ int av1_cyclic_refresh_estimate_bits_at_q(const struct AV1_COMP *cpi,
int av1_cyclic_refresh_rc_bits_per_mb(const struct AV1_COMP *cpi, int i,
double correction_factor);
+/*!\brief Update segment_id for blocks are skipped.
+ *
+ * After encoding a given prediction block, of size bsize at (mi_row, mi_col),
+ * check if we should reset the segment_id based on skip_txfm,
+ * and update the cyclic_refresh map and segmentation counters.
+ *
+ * \ingroup cyclic_refresh
+ * \callgraph
+ * \callergraph
+ *
+ * \param[in] cpi Top level encoder structure
+ * \param[in] x Pointer to MACROBLOCK structure
+ * \param[in] mi_row Row coordinate of the block in a step size of MI_SIZE
+ * \param[in] mi_col Col coordinate of the block in a step size of MI_SIZE
+ * \param[in] bsize Block size
+ *
+ * \return Update the \c mbmi->segment_id, the \c cpi->cyclic_refresh and
+ * the \c cm->cpi->enc_seg.map.
+ */
+
+void av1_cyclic_reset_segment_skip(const struct AV1_COMP *cpi,
+ MACROBLOCK *const x, int mi_row, int mi_col,
+ BLOCK_SIZE bsize);
+
/*!\brief Update segment_id for block based on mode selected.
*
* Prior to coding a given prediction block, of size bsize at (mi_row, mi_col),
diff --git a/third_party/libaom/source/libaom/av1/encoder/aq_variance.c b/third_party/libaom/source/libaom/av1/encoder/aq_variance.c
index 92d7ad172d..79bf9f8419 100644
--- a/third_party/libaom/source/libaom/av1/encoder/aq_variance.c
+++ b/third_party/libaom/source/libaom/av1/encoder/aq_variance.c
@@ -52,7 +52,7 @@ void av1_vaq_frame_setup(AV1_COMP *cpi) {
int resolution_change =
cm->prev_frame && (cm->width != cm->prev_frame->width ||
cm->height != cm->prev_frame->height);
- int avg_energy = (int)(cpi->twopass.mb_av_energy - 2);
+ int avg_energy = (int)(cpi->ppi->twopass.mb_av_energy - 2);
double avg_ratio;
if (avg_energy > 7) avg_energy = 7;
if (avg_energy < 0) avg_energy = 0;
@@ -81,7 +81,7 @@ void av1_vaq_frame_setup(AV1_COMP *cpi) {
int qindex_delta = av1_compute_qdelta_by_rate(
&cpi->rc, cm->current_frame.frame_type, base_qindex,
rate_ratio[i] / avg_ratio, cpi->is_screen_content_type,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
// We don't allow qindex 0 in a segment if the base value is not 0.
// Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment
@@ -126,14 +126,14 @@ int av1_log_block_var(const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) {
for (j = 0; j < bw; j += 4) {
if (is_cur_buf_hbd(xd)) {
var +=
- log(1.0 + cpi->fn_ptr[BLOCK_4X4].vf(
+ log(1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
x->plane[0].src.buf + i * x->plane[0].src.stride + j,
x->plane[0].src.stride,
CONVERT_TO_BYTEPTR(av1_highbd_all_zeros), 0, &sse) /
16);
} else {
var +=
- log(1.0 + cpi->fn_ptr[BLOCK_4X4].vf(
+ log(1.0 + cpi->ppi->fn_ptr[BLOCK_4X4].vf(
x->plane[0].src.buf + i * x->plane[0].src.stride + j,
x->plane[0].src.stride, av1_all_zeros, 0, &sse) /
16);
@@ -154,15 +154,12 @@ static unsigned int haar_ac_energy(MACROBLOCK *x, BLOCK_SIZE bs) {
MACROBLOCKD *xd = &x->e_mbd;
int stride = x->plane[0].src.stride;
uint8_t *buf = x->plane[0].src.buf;
- const int bw = MI_SIZE * mi_size_wide[bs];
- const int bh = MI_SIZE * mi_size_high[bs];
+ const int num_8x8_cols = block_size_wide[bs] / 8;
+ const int num_8x8_rows = block_size_high[bs] / 8;
const int hbd = is_cur_buf_hbd(xd);
- int var = 0;
- for (int r = 0; r < bh; r += 8)
- for (int c = 0; c < bw; c += 8) {
- var += av1_haar_ac_sad_8x8_uint8_input(buf + c + r * stride, stride, hbd);
- }
+ int64_t var = av1_haar_ac_sad_mxn_uint8_input(buf, stride, hbd, num_8x8_rows,
+ num_8x8_cols);
return (unsigned int)((uint64_t)var * 256) >> num_pels_log2_lookup[bs];
}
@@ -178,7 +175,7 @@ int av1_block_wavelet_energy_level(const AV1_COMP *cpi, MACROBLOCK *x,
double energy, energy_midpoint;
aom_clear_system_state();
energy_midpoint = (is_stat_consumption_stage_twopass(cpi))
- ? cpi->twopass.frame_avg_haar_energy
+ ? cpi->ppi->twopass.frame_avg_haar_energy
: DEFAULT_E_MIDPOINT;
energy = av1_log_block_wavelet_energy(x, bs) - energy_midpoint;
return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX);
@@ -199,7 +196,7 @@ int av1_compute_q_from_energy_level_deltaq_mode(const AV1_COMP *const cpi,
int qindex_delta = av1_compute_qdelta_by_rate(
&cpi->rc, cm->current_frame.frame_type, base_qindex,
deltaq_rate_ratio[rate_level], cpi->is_screen_content_type,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
if ((base_qindex != 0) && ((base_qindex + qindex_delta) == 0)) {
qindex_delta = -base_qindex + 1;
diff --git a/third_party/libaom/source/libaom/av1/encoder/av1_noise_estimate.c b/third_party/libaom/source/libaom/av1/encoder/av1_noise_estimate.c
index dbc86c5034..8b2fc38923 100644
--- a/third_party/libaom/source/libaom/av1/encoder/av1_noise_estimate.c
+++ b/third_party/libaom/source/libaom/av1/encoder/av1_noise_estimate.c
@@ -27,8 +27,8 @@
#if CONFIG_AV1_TEMPORAL_DENOISING
// For SVC: only do noise estimation on top spatial layer.
static INLINE int noise_est_svc(const struct AV1_COMP *const cpi) {
- return (!cpi->use_svc ||
- (cpi->use_svc &&
+ return (!cpi->ppi->use_svc ||
+ (cpi->ppi->use_svc &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1));
}
#endif
@@ -61,7 +61,7 @@ static int enable_noise_estimation(AV1_COMP *const cpi) {
cpi->common.height != resize_pending_params->height));
#if CONFIG_AV1_HIGHBITDEPTH
- if (cpi->common.seq_params.use_highbitdepth) return 0;
+ if (cpi->common.seq_params->use_highbitdepth) return 0;
#endif
// Enable noise estimation if denoising is on.
#if CONFIG_AV1_TEMPORAL_DENOISING
@@ -75,7 +75,7 @@ static int enable_noise_estimation(AV1_COMP *const cpi) {
// Not enabled for low resolutions.
if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_cfg.mode == AOM_CBR &&
cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.speed >= 5 &&
- resize_pending == 0 && !cpi->use_svc &&
+ resize_pending == 0 && !cpi->ppi->use_svc &&
cpi->oxcf.tune_cfg.content != AOM_CONTENT_SCREEN &&
cpi->common.width * cpi->common.height >= 640 * 360)
return 1;
@@ -227,7 +227,7 @@ void av1_update_noise_estimate(AV1_COMP *const cpi) {
unsigned int sse;
// Compute variance between co-located blocks from current and
// last input frames.
- unsigned int variance = cpi->fn_ptr[bsize].vf(
+ unsigned int variance = cpi->ppi->fn_ptr[bsize].vf(
src_y, src_ystride, last_src_y, last_src_ystride, &sse);
unsigned int hist_index = variance / bin_size;
if (hist_index < MAX_VAR_HIST_BINS)
diff --git a/third_party/libaom/source/libaom/av1/encoder/av1_quantize.c b/third_party/libaom/source/libaom/av1/encoder/av1_quantize.c
index 9d38e2d77d..2b07e4c71b 100644
--- a/third_party/libaom/source/libaom/av1/encoder/av1_quantize.c
+++ b/third_party/libaom/source/libaom/av1/encoder/av1_quantize.c
@@ -33,6 +33,40 @@ void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
*eob_ptr = 0;
}
+int av1_quantize_fp_no_qmatrix(const int16_t quant_ptr[2],
+ const int16_t dequant_ptr[2],
+ const int16_t round_ptr[2], int log_scale,
+ const int16_t *scan, int coeff_count,
+ const tran_low_t *coeff_ptr,
+ tran_low_t *qcoeff_ptr,
+ tran_low_t *dqcoeff_ptr) {
+ memset(qcoeff_ptr, 0, coeff_count * sizeof(*qcoeff_ptr));
+ memset(dqcoeff_ptr, 0, coeff_count * sizeof(*dqcoeff_ptr));
+ const int rounding[2] = { ROUND_POWER_OF_TWO(round_ptr[0], log_scale),
+ ROUND_POWER_OF_TWO(round_ptr[1], log_scale) };
+ int eob = 0;
+ for (int i = 0; i < coeff_count; i++) {
+ const int rc = scan[i];
+ const int32_t thresh = (int32_t)(dequant_ptr[rc != 0]);
+ const int coeff = coeff_ptr[rc];
+ const int coeff_sign = AOMSIGN(coeff);
+ int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
+ int tmp32 = 0;
+ if ((abs_coeff << (1 + log_scale)) >= thresh) {
+ abs_coeff = clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX);
+ tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
+ if (tmp32) {
+ qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
+ const tran_low_t abs_dqcoeff =
+ (tmp32 * dequant_ptr[rc != 0]) >> log_scale;
+ dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
+ }
+ }
+ if (tmp32) eob = i + 1;
+ }
+ return eob;
+}
+
static void quantize_fp_helper_c(
const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr,
const int16_t *round_ptr, const int16_t *quant_ptr,
@@ -53,26 +87,9 @@ static void quantize_fp_helper_c(
memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr));
if (qm_ptr == NULL && iqm_ptr == NULL) {
- for (i = 0; i < n_coeffs; i++) {
- const int rc = scan[i];
- const int32_t thresh = (int32_t)(dequant_ptr[rc != 0]);
- const int coeff = coeff_ptr[rc];
- const int coeff_sign = AOMSIGN(coeff);
- int64_t abs_coeff = (coeff ^ coeff_sign) - coeff_sign;
- int tmp32 = 0;
- if ((abs_coeff << (1 + log_scale)) >= thresh) {
- abs_coeff =
- clamp64(abs_coeff + rounding[rc != 0], INT16_MIN, INT16_MAX);
- tmp32 = (int)((abs_coeff * quant_ptr[rc != 0]) >> (16 - log_scale));
- if (tmp32) {
- qcoeff_ptr[rc] = (tmp32 ^ coeff_sign) - coeff_sign;
- const tran_low_t abs_dqcoeff =
- (tmp32 * dequant_ptr[rc != 0]) >> log_scale;
- dqcoeff_ptr[rc] = (abs_dqcoeff ^ coeff_sign) - coeff_sign;
- }
- }
- if (tmp32) eob = i;
- }
+ *eob_ptr = av1_quantize_fp_no_qmatrix(quant_ptr, dequant_ptr, round_ptr,
+ log_scale, scan, (int)n_coeffs,
+ coeff_ptr, qcoeff_ptr, dqcoeff_ptr);
} else {
// Quantization pass: All coefficients with index >= zero_flag are
// skippable. Note: zero_flag can be zero.
@@ -100,8 +117,8 @@ static void quantize_fp_helper_c(
if (tmp32) eob = i;
}
+ *eob_ptr = eob + 1;
}
- *eob_ptr = eob + 1;
}
#if CONFIG_AV1_HIGHBITDEPTH
@@ -767,7 +784,7 @@ void av1_set_quantizer(AV1_COMMON *const cm, int min_qmlevel, int max_qmlevel,
aom_get_qmlevel(quant_params->base_qindex + quant_params->u_ac_delta_q,
min_qmlevel, max_qmlevel);
- if (!cm->seq_params.separate_uv_delta_q)
+ if (!cm->seq_params->separate_uv_delta_q)
quant_params->qmatrix_level_v = quant_params->qmatrix_level_u;
else
quant_params->qmatrix_level_v =
diff --git a/third_party/libaom/source/libaom/av1/encoder/av1_quantize.h b/third_party/libaom/source/libaom/av1/encoder/av1_quantize.h
index ad9619747a..215feb0603 100644
--- a/third_party/libaom/source/libaom/av1/encoder/av1_quantize.h
+++ b/third_party/libaom/source/libaom/av1/encoder/av1_quantize.h
@@ -118,6 +118,32 @@ int av1_qindex_to_quantizer(int qindex);
void av1_quantize_skip(intptr_t n_coeffs, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr);
+/*!\brief Quantize transform coefficients without using qmatrix
+ *
+ * quant_ptr, dequant_ptr and round_ptr are size 2 arrays,
+ * where index 0 corresponds to dc coeff and index 1 corresponds to ac coeffs.
+ *
+ * \param[in] quant_ptr 16-bit fixed point representation of inverse
+ * quantize step size, i.e. 2^16/dequant
+ * \param[in] dequant_ptr quantize step size
+ * \param[in] round_ptr rounding
+ * \param[in] log_scale the relative log scale of the transform
+ * coefficients
+ * \param[in] scan scan[i] indicates the position of ith to-be-coded
+ * coefficient
+ * \param[in] coeff_count number of coefficients
+ * \param[out] qcoeff_ptr quantized coefficients
+ * \param[out] dqcoeff_ptr dequantized coefficients
+ *
+ * \return The last non-zero coefficient's scan index plus 1
+ */
+int av1_quantize_fp_no_qmatrix(const int16_t quant_ptr[2],
+ const int16_t dequant_ptr[2],
+ const int16_t round_ptr[2], int log_scale,
+ const int16_t *scan, int coeff_count,
+ const tran_low_t *coeff_ptr,
+ tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr);
+
void av1_quantize_fp_facade(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
const MACROBLOCK_PLANE *p, tran_low_t *qcoeff_ptr,
tran_low_t *dqcoeff_ptr, uint16_t *eob_ptr,
diff --git a/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.c b/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.c
index 6c5bb930e1..96f3d7dcfe 100644
--- a/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.c
+++ b/third_party/libaom/source/libaom/av1/encoder/av1_temporal_denoiser.c
@@ -349,7 +349,7 @@ void av1_denoiser_denoise(AV1_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col,
&cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx,
motion_magnitude, &zeromv_filter, cpi->svc.number_spatial_layers,
cpi->source->y_width, cpi->svc.ref_idx[0], cpi->svc.ref_idx[3],
- cpi->use_svc, cpi->svc.spatial_layer_id, use_gf_temporal_ref);
+ cpi->ppi->use_svc, cpi->svc.spatial_layer_id, use_gf_temporal_ref);
if (decision == FILTER_BLOCK) {
decision = av1_denoiser_filter(src.buf, src.stride, mc_avg_start,
@@ -415,7 +415,7 @@ void av1_denoiser_update_frame_info(
return;
}
- if (svc->external_ref_frame_config) {
+ if (svc->set_ref_frame_config) {
int i;
for (i = 0; i < REF_FRAMES; i++) {
if (svc->refresh[svc->spatial_layer_id] & (1 << i))
@@ -485,8 +485,8 @@ static int av1_denoiser_realloc_svc_helper(AV1_COMMON *cm,
if (denoiser->running_avg_y[fb_idx].buffer_alloc == NULL) {
fail = aom_alloc_frame_buffer(
&denoiser->running_avg_y[fb_idx], cm->width, cm->height,
- cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
- cm->seq_params.use_highbitdepth, AOM_BORDER_IN_PIXELS,
+ cm->seq_params->subsampling_x, cm->seq_params->subsampling_y,
+ cm->seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
cm->features.byte_alignment);
if (fail) {
av1_denoiser_free(denoiser);
@@ -501,7 +501,7 @@ int av1_denoiser_realloc_svc(AV1_COMMON *cm, AV1_DENOISER *denoiser,
int refresh_alt, int refresh_gld, int refresh_lst,
int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) {
int fail = 0;
- if (svc->external_ref_frame_config) {
+ if (svc->set_ref_frame_config) {
int i;
for (i = 0; i < REF_FRAMES; i++) {
if (cm->current_frame.frame_type == KEY_FRAME ||
@@ -724,7 +724,7 @@ void av1_denoiser_update_ref_frame(AV1_COMP *const cpi) {
(cpi->common.width != cpi->resize_pending_params.width ||
cpi->common.height != cpi->resize_pending_params.height));
- if (cpi->use_svc) {
+ if (cpi->ppi->use_svc) {
// TODO(kyslov) Enable when SVC temporal denosing is implemented
#if 0
const int svc_buf_shift =
@@ -746,7 +746,7 @@ void av1_denoiser_update_ref_frame(AV1_COMP *const cpi) {
cpi->refresh_golden_frame,
cpi->refresh_last_frame, cpi->alt_fb_idx,
cpi->gld_fb_idx, cpi->lst_fb_idx))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to re-allocate denoiser for SVC");
#endif
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/bitstream.c b/third_party/libaom/source/libaom/av1/encoder/bitstream.c
index 2b583790ff..85c0183b17 100644
--- a/third_party/libaom/source/libaom/av1/encoder/bitstream.c
+++ b/third_party/libaom/source/libaom/av1/encoder/bitstream.c
@@ -41,6 +41,7 @@
#include "av1/encoder/cost.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encodetxb.h"
+#include "av1/encoder/ethread.h"
#include "av1/encoder/mcomp.h"
#include "av1/encoder/palette.h"
#include "av1/encoder/segmentation.h"
@@ -185,12 +186,13 @@ static AOM_INLINE void write_tx_size_vartx(MACROBLOCKD *xd,
}
assert(bsw > 0 && bsh > 0);
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh)
+ for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
+ const int offsetr = blk_row + row;
for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- int offsetr = blk_row + row;
- int offsetc = blk_col + col;
+ const int offsetc = blk_col + col;
write_tx_size_vartx(xd, mbmi, sub_txs, depth + 1, offsetr, offsetc, w);
}
+ }
}
}
@@ -313,14 +315,16 @@ static AOM_INLINE void write_delta_qindex(const MACROBLOCKD *xd,
static AOM_INLINE void write_delta_lflevel(const AV1_COMMON *cm,
const MACROBLOCKD *xd, int lf_id,
- int delta_lflevel, aom_writer *w) {
+ int delta_lflevel,
+ int delta_lf_multi, aom_writer *w) {
int sign = delta_lflevel < 0;
int abs = sign ? -delta_lflevel : delta_lflevel;
int rem_bits, thr;
int smallval = abs < DELTA_LF_SMALL ? 1 : 0;
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
+ (void)cm;
- if (cm->delta_q_info.delta_lf_multi) {
+ if (delta_lf_multi) {
assert(lf_id >= 0 && lf_id < (av1_num_planes(cm) > 1 ? FRAME_LF_COUNT
: FRAME_LF_COUNT - 2));
aom_write_symbol(w, AOMMIN(abs, DELTA_LF_SMALL),
@@ -380,7 +384,6 @@ static AOM_INLINE void pack_txb_tokens(
#if CONFIG_RD_DEBUG
TOKEN_STATS tmp_token_stats;
init_token_stats(&tmp_token_stats);
- token_stats->txb_coeff_cost_map[blk_row][blk_col] = tmp_token_stats.cost;
token_stats->cost += tmp_token_stats.cost;
#endif
} else {
@@ -388,14 +391,17 @@ static AOM_INLINE void pack_txb_tokens(
const int bsw = tx_size_wide_unit[sub_txs];
const int bsh = tx_size_high_unit[sub_txs];
const int step = bsh * bsw;
+ const int row_end =
+ AOMMIN(tx_size_high_unit[tx_size], max_blocks_high - blk_row);
+ const int col_end =
+ AOMMIN(tx_size_wide_unit[tx_size], max_blocks_wide - blk_col);
assert(bsw > 0 && bsh > 0);
- for (int r = 0; r < tx_size_high_unit[tx_size]; r += bsh) {
- for (int c = 0; c < tx_size_wide_unit[tx_size]; c += bsw) {
- const int offsetr = blk_row + r;
+ for (int r = 0; r < row_end; r += bsh) {
+ const int offsetr = blk_row + r;
+ for (int c = 0; c < col_end; c += bsw) {
const int offsetc = blk_col + c;
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
pack_txb_tokens(w, cm, x, tp, tok_end, xd, mbmi, plane, plane_bsize,
bit_depth, block, offsetr, offsetc, sub_txs,
token_stats);
@@ -445,7 +451,7 @@ int av1_neg_interleave(int x, int ref, int max) {
}
}
-static AOM_INLINE void write_segment_id(AV1_COMP *cpi,
+static AOM_INLINE void write_segment_id(AV1_COMP *cpi, MACROBLOCKD *const xd,
const MB_MODE_INFO *const mbmi,
aom_writer *w,
const struct segmentation *seg,
@@ -454,7 +460,6 @@ static AOM_INLINE void write_segment_id(AV1_COMP *cpi,
if (!seg->enabled || !seg->update_map) return;
AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
int cdf_num;
const int pred = av1_get_spatial_seg_pred(cm, xd, &cdf_num);
const int mi_row = xd->mi_row;
@@ -613,8 +618,8 @@ static AOM_INLINE void write_angle_delta(aom_writer *w, int angle_delta,
}
static AOM_INLINE void write_mb_interp_filter(AV1_COMMON *const cm,
- const MACROBLOCKD *xd,
- aom_writer *w) {
+ ThreadData *td, aom_writer *w) {
+ const MACROBLOCKD *xd = &td->mb.e_mbd;
const MB_MODE_INFO *const mbmi = xd->mi[0];
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
@@ -633,8 +638,8 @@ static AOM_INLINE void write_mb_interp_filter(AV1_COMMON *const cm,
av1_extract_interp_filter(mbmi->interp_filters, dir);
aom_write_symbol(w, filter, ec_ctx->switchable_interp_cdf[ctx],
SWITCHABLE_FILTERS);
- ++cm->cur_frame->interp_filter_selected[filter];
- if (cm->seq_params.enable_dual_filter == 0) return;
+ ++td->interp_filter_selected[filter];
+ if (cm->seq_params->enable_dual_filter == 0) return;
}
}
}
@@ -777,7 +782,7 @@ static AOM_INLINE void write_palette_mode_info(const AV1_COMMON *cm,
aom_write_symbol(w, n - PALETTE_MIN_SIZE,
xd->tile_ctx->palette_y_size_cdf[bsize_ctx],
PALETTE_SIZES);
- write_palette_colors_y(xd, pmi, cm->seq_params.bit_depth, w);
+ write_palette_colors_y(xd, pmi, cm->seq_params->bit_depth, w);
}
}
@@ -792,7 +797,7 @@ static AOM_INLINE void write_palette_mode_info(const AV1_COMMON *cm,
aom_write_symbol(w, n - PALETTE_MIN_SIZE,
xd->tile_ctx->palette_uv_size_cdf[bsize_ctx],
PALETTE_SIZES);
- write_palette_colors_uv(xd, pmi, cm->seq_params.bit_depth, w);
+ write_palette_colors_uv(xd, pmi, cm->seq_params->bit_depth, w);
}
}
}
@@ -874,7 +879,7 @@ static AOM_INLINE void write_cdef(AV1_COMMON *cm, MACROBLOCKD *const xd,
// At the start of a superblock, mark that we haven't yet written CDEF
// strengths for any of the CDEF units contained in this superblock.
- const int sb_mask = (cm->seq_params.mib_size - 1);
+ const int sb_mask = (cm->seq_params->mib_size - 1);
const int mi_row_in_sb = (xd->mi_row & sb_mask);
const int mi_col_in_sb = (xd->mi_col & sb_mask);
if (mi_row_in_sb == 0 && mi_col_in_sb == 0) {
@@ -889,7 +894,7 @@ static AOM_INLINE void write_cdef(AV1_COMMON *cm, MACROBLOCKD *const xd,
const int index_mask = cdef_size;
const int cdef_unit_row_in_sb = ((xd->mi_row & index_mask) != 0);
const int cdef_unit_col_in_sb = ((xd->mi_col & index_mask) != 0);
- const int index = (cm->seq_params.sb_size == BLOCK_128X128)
+ const int index = (cm->seq_params->sb_size == BLOCK_128X128)
? cdef_unit_col_in_sb + 2 * cdef_unit_row_in_sb
: 0;
@@ -909,9 +914,9 @@ static AOM_INLINE void write_cdef(AV1_COMMON *cm, MACROBLOCKD *const xd,
}
static AOM_INLINE void write_inter_segment_id(
- AV1_COMP *cpi, aom_writer *w, const struct segmentation *const seg,
- struct segmentation_probs *const segp, int skip, int preskip) {
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ AV1_COMP *cpi, MACROBLOCKD *const xd, aom_writer *w,
+ const struct segmentation *const seg, struct segmentation_probs *const segp,
+ int skip, int preskip) {
MB_MODE_INFO *const mbmi = xd->mi[0];
AV1_COMMON *const cm = &cpi->common;
const int mi_row = xd->mi_row;
@@ -923,7 +928,7 @@ static AOM_INLINE void write_inter_segment_id(
} else {
if (seg->segid_preskip) return;
if (skip) {
- write_segment_id(cpi, mbmi, w, seg, segp, 1);
+ write_segment_id(cpi, xd, mbmi, w, seg, segp, 1);
if (seg->temporal_update) mbmi->seg_id_predicted = 0;
return;
}
@@ -933,35 +938,33 @@ static AOM_INLINE void write_inter_segment_id(
aom_cdf_prob *pred_cdf = av1_get_pred_cdf_seg_id(segp, xd);
aom_write_symbol(w, pred_flag, pred_cdf, 2);
if (!pred_flag) {
- write_segment_id(cpi, mbmi, w, seg, segp, 0);
+ write_segment_id(cpi, xd, mbmi, w, seg, segp, 0);
}
if (pred_flag) {
set_spatial_segment_id(&cm->mi_params, cm->cur_frame->seg_map,
mbmi->bsize, mi_row, mi_col, mbmi->segment_id);
}
} else {
- write_segment_id(cpi, mbmi, w, seg, segp, 0);
+ write_segment_id(cpi, xd, mbmi, w, seg, segp, 0);
}
}
}
// If delta q is present, writes delta_q index.
// Also writes delta_q loop filter levels, if present.
-static AOM_INLINE void write_delta_q_params(AV1_COMP *cpi, int skip,
+static AOM_INLINE void write_delta_q_params(AV1_COMMON *const cm,
+ MACROBLOCKD *const xd, int skip,
aom_writer *w) {
- AV1_COMMON *const cm = &cpi->common;
const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
if (delta_q_info->delta_q_present_flag) {
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
const MB_MODE_INFO *const mbmi = xd->mi[0];
const BLOCK_SIZE bsize = mbmi->bsize;
const int super_block_upper_left =
- ((xd->mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
- ((xd->mi_col & (cm->seq_params.mib_size - 1)) == 0);
+ ((xd->mi_row & (cm->seq_params->mib_size - 1)) == 0) &&
+ ((xd->mi_col & (cm->seq_params->mib_size - 1)) == 0);
- if ((bsize != cm->seq_params.sb_size || skip == 0) &&
+ if ((bsize != cm->seq_params->sb_size || skip == 0) &&
super_block_upper_left) {
assert(mbmi->current_qindex > 0);
const int reduced_delta_qindex =
@@ -977,14 +980,14 @@ static AOM_INLINE void write_delta_q_params(AV1_COMP *cpi, int skip,
int reduced_delta_lflevel =
(mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) /
delta_q_info->delta_lf_res;
- write_delta_lflevel(cm, xd, lf_id, reduced_delta_lflevel, w);
+ write_delta_lflevel(cm, xd, lf_id, reduced_delta_lflevel, 1, w);
xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
}
} else {
int reduced_delta_lflevel =
(mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
delta_q_info->delta_lf_res;
- write_delta_lflevel(cm, xd, -1, reduced_delta_lflevel, w);
+ write_delta_lflevel(cm, xd, -1, reduced_delta_lflevel, 0, w);
xd->delta_lf_from_base = mbmi->delta_lf_from_base;
}
}
@@ -992,12 +995,10 @@ static AOM_INLINE void write_delta_q_params(AV1_COMP *cpi, int skip,
}
}
-static AOM_INLINE void write_intra_prediction_modes(AV1_COMP *cpi,
+static AOM_INLINE void write_intra_prediction_modes(const AV1_COMMON *cm,
+ MACROBLOCKD *const xd,
int is_keyframe,
aom_writer *w) {
- const AV1_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->td.mb;
- MACROBLOCKD *const xd = &x->e_mbd;
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
const MB_MODE_INFO *const mbmi = xd->mi[0];
const PREDICTION_MODE mode = mbmi->mode;
@@ -1020,7 +1021,7 @@ static AOM_INLINE void write_intra_prediction_modes(AV1_COMP *cpi,
}
// UV mode and UV angle delta.
- if (!cm->seq_params.monochrome && xd->is_chroma_ref) {
+ if (!cm->seq_params->monochrome && xd->is_chroma_ref) {
const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
write_intra_uv_mode(ec_ctx, uv_mode, mode, is_cfl_allowed(xd), w);
if (uv_mode == UV_CFL_PRED)
@@ -1082,9 +1083,10 @@ static INLINE int_mv get_ref_mv(const MACROBLOCK *x, int ref_idx) {
x->mbmi_ext_frame);
}
-static AOM_INLINE void pack_inter_mode_mvs(AV1_COMP *cpi, aom_writer *w) {
+static AOM_INLINE void pack_inter_mode_mvs(AV1_COMP *cpi, ThreadData *const td,
+ aom_writer *w) {
AV1_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->td.mb;
+ MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
FRAME_CONTEXT *ec_ctx = xd->tile_ctx;
const struct segmentation *const seg = &cm->seg;
@@ -1099,7 +1101,7 @@ static AOM_INLINE void pack_inter_mode_mvs(AV1_COMP *cpi, aom_writer *w) {
const int is_compound = has_second_ref(mbmi);
int ref;
- write_inter_segment_id(cpi, w, seg, segp, 0, 1);
+ write_inter_segment_id(cpi, xd, w, seg, segp, 0, 1);
write_skip_mode(cm, xd, segment_id, mbmi, w);
@@ -1107,18 +1109,18 @@ static AOM_INLINE void pack_inter_mode_mvs(AV1_COMP *cpi, aom_writer *w) {
const int skip =
mbmi->skip_mode ? 1 : write_skip(cm, xd, segment_id, mbmi, w);
- write_inter_segment_id(cpi, w, seg, segp, skip, 0);
+ write_inter_segment_id(cpi, xd, w, seg, segp, skip, 0);
write_cdef(cm, xd, w, skip);
- write_delta_q_params(cpi, skip, w);
+ write_delta_q_params(cm, xd, skip, w);
if (!mbmi->skip_mode) write_is_inter(cm, xd, mbmi->segment_id, w, is_inter);
if (mbmi->skip_mode) return;
if (!is_inter) {
- write_intra_prediction_modes(cpi, 0, w);
+ write_intra_prediction_modes(cm, xd, 0, w);
} else {
int16_t mode_ctx;
@@ -1146,21 +1148,23 @@ static AOM_INLINE void pack_inter_mode_mvs(AV1_COMP *cpi, aom_writer *w) {
for (ref = 0; ref < 1 + is_compound; ++ref) {
nmv_context *nmvc = &ec_ctx->nmvc;
const int_mv ref_mv = get_ref_mv(x, ref);
- av1_encode_mv(cpi, w, &mbmi->mv[ref].as_mv, &ref_mv.as_mv, nmvc,
+ av1_encode_mv(cpi, w, td, &mbmi->mv[ref].as_mv, &ref_mv.as_mv, nmvc,
allow_hp);
}
} else if (mode == NEAREST_NEWMV || mode == NEAR_NEWMV) {
nmv_context *nmvc = &ec_ctx->nmvc;
const int_mv ref_mv = get_ref_mv(x, 1);
- av1_encode_mv(cpi, w, &mbmi->mv[1].as_mv, &ref_mv.as_mv, nmvc, allow_hp);
+ av1_encode_mv(cpi, w, td, &mbmi->mv[1].as_mv, &ref_mv.as_mv, nmvc,
+ allow_hp);
} else if (mode == NEW_NEARESTMV || mode == NEW_NEARMV) {
nmv_context *nmvc = &ec_ctx->nmvc;
const int_mv ref_mv = get_ref_mv(x, 0);
- av1_encode_mv(cpi, w, &mbmi->mv[0].as_mv, &ref_mv.as_mv, nmvc, allow_hp);
+ av1_encode_mv(cpi, w, td, &mbmi->mv[0].as_mv, &ref_mv.as_mv, nmvc,
+ allow_hp);
}
if (cpi->common.current_frame.reference_mode != COMPOUND_REFERENCE &&
- cpi->common.seq_params.enable_interintra_compound &&
+ cpi->common.seq_params->enable_interintra_compound &&
is_interintra_allowed(mbmi)) {
const int interintra = mbmi->ref_frame[1] == INTRA_FRAME;
const int bsize_group = size_group_lookup[bsize];
@@ -1187,7 +1191,7 @@ static AOM_INLINE void pack_inter_mode_mvs(AV1_COMP *cpi, aom_writer *w) {
// Group B (1): interintra, compound_diffwtd, wedge
if (has_second_ref(mbmi)) {
const int masked_compound_used = is_any_masked_compound_used(bsize) &&
- cm->seq_params.enable_masked_compound;
+ cm->seq_params->enable_masked_compound;
if (masked_compound_used) {
const int ctx_comp_group_idx = get_comp_group_idx_context(xd);
@@ -1201,7 +1205,7 @@ static AOM_INLINE void pack_inter_mode_mvs(AV1_COMP *cpi, aom_writer *w) {
if (mbmi->compound_idx)
assert(mbmi->interinter_comp.type == COMPOUND_AVERAGE);
- if (cm->seq_params.order_hint_info.enable_dist_wtd_comp) {
+ if (cm->seq_params->order_hint_info.enable_dist_wtd_comp) {
const int comp_index_ctx = get_comp_index_context(cm, xd);
aom_write_symbol(w, mbmi->compound_idx,
ec_ctx->compound_index_cdf[comp_index_ctx], 2);
@@ -1234,7 +1238,7 @@ static AOM_INLINE void pack_inter_mode_mvs(AV1_COMP *cpi, aom_writer *w) {
}
}
}
- write_mb_interp_filter(cm, xd, w);
+ write_mb_interp_filter(cm, td, w);
}
}
@@ -1264,23 +1268,23 @@ static AOM_INLINE void write_mb_modes_kf(
const MB_MODE_INFO *const mbmi = xd->mi[0];
if (seg->segid_preskip && seg->update_map)
- write_segment_id(cpi, mbmi, w, seg, segp, 0);
+ write_segment_id(cpi, xd, mbmi, w, seg, segp, 0);
const int skip = write_skip(cm, xd, mbmi->segment_id, mbmi, w);
if (!seg->segid_preskip && seg->update_map)
- write_segment_id(cpi, mbmi, w, seg, segp, skip);
+ write_segment_id(cpi, xd, mbmi, w, seg, segp, skip);
write_cdef(cm, xd, w, skip);
- write_delta_q_params(cpi, skip, w);
+ write_delta_q_params(cm, xd, skip, w);
if (av1_allow_intrabc(cm)) {
write_intrabc_info(xd, mbmi_ext_frame, w);
if (is_intrabc_block(mbmi)) return;
}
- write_intra_prediction_modes(cpi, 1, w);
+ write_intra_prediction_modes(cm, xd, 1, w);
}
#if CONFIG_RD_DEBUG
@@ -1295,24 +1299,8 @@ static AOM_INLINE void dump_mode_info(MB_MODE_INFO *mi) {
static int rd_token_stats_mismatch(RD_STATS *rd_stats, TOKEN_STATS *token_stats,
int plane) {
if (rd_stats->txb_coeff_cost[plane] != token_stats->cost) {
- int r, c;
printf("\nplane %d rd_stats->txb_coeff_cost %d token_stats->cost %d\n",
plane, rd_stats->txb_coeff_cost[plane], token_stats->cost);
- printf("rd txb_coeff_cost_map\n");
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) {
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
- printf("%d ", rd_stats->txb_coeff_cost_map[plane][r][c]);
- }
- printf("\n");
- }
-
- printf("pack txb_coeff_cost_map\n");
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r) {
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
- printf("%d ", token_stats->txb_coeff_cost_map[r][c]);
- }
- printf("\n");
- }
return 1;
}
return 0;
@@ -1376,13 +1364,14 @@ static AOM_INLINE void enc_dump_logs(
}
#endif // ENC_MISMATCH_DEBUG
-static AOM_INLINE void write_mbmi_b(AV1_COMP *cpi, aom_writer *w) {
+static AOM_INLINE void write_mbmi_b(AV1_COMP *cpi, ThreadData *const td,
+ aom_writer *w) {
AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ MACROBLOCKD *const xd = &td->mb.e_mbd;
MB_MODE_INFO *m = xd->mi[0];
if (frame_is_intra_only(cm)) {
- write_mb_modes_kf(cpi, xd, cpi->td.mb.mbmi_ext_frame, w);
+ write_mb_modes_kf(cpi, xd, td->mb.mbmi_ext_frame, w);
} else {
// has_subpel_mv_component needs the ref frame buffers set up to look
// up if they are scaled. has_subpel_mv_component is in turn needed by
@@ -1393,7 +1382,7 @@ static AOM_INLINE void write_mbmi_b(AV1_COMP *cpi, aom_writer *w) {
enc_dump_logs(cm, &cpi->mbmi_ext_info, xd->mi_row, xd->mi_col);
#endif // ENC_MISMATCH_DEBUG
- pack_inter_mode_mvs(cpi, w);
+ pack_inter_mode_mvs(cpi, td, w);
}
}
@@ -1426,18 +1415,17 @@ static AOM_INLINE void write_inter_txb_coeff(
for (int blk_row = row >> ss_y; blk_row < unit_height; blk_row += bkh) {
for (int blk_col = col >> ss_x; blk_col < unit_width; blk_col += bkw) {
pack_txb_tokens(w, cm, x, tok, tok_end, xd, mbmi, plane, plane_bsize,
- cm->seq_params.bit_depth, *block, blk_row, blk_col,
+ cm->seq_params->bit_depth, *block, blk_row, blk_col,
max_tx_size, token_stats);
*block += step;
}
}
}
-static AOM_INLINE void write_tokens_b(AV1_COMP *cpi, aom_writer *w,
- const TokenExtra **tok,
+static AOM_INLINE void write_tokens_b(AV1_COMP *cpi, MACROBLOCK *const x,
+ aom_writer *w, const TokenExtra **tok,
const TokenExtra *const tok_end) {
AV1_COMMON *const cm = &cpi->common;
- MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
const BLOCK_SIZE bsize = mbmi->bsize;
@@ -1487,17 +1475,18 @@ static AOM_INLINE void write_tokens_b(AV1_COMP *cpi, aom_writer *w,
}
}
-static AOM_INLINE void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
- aom_writer *w, const TokenExtra **tok,
+static AOM_INLINE void write_modes_b(AV1_COMP *cpi, ThreadData *const td,
+ const TileInfo *const tile, aom_writer *w,
+ const TokenExtra **tok,
const TokenExtra *const tok_end,
int mi_row, int mi_col) {
const AV1_COMMON *cm = &cpi->common;
const CommonModeInfoParams *const mi_params = &cm->mi_params;
- MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
+ MACROBLOCKD *xd = &td->mb.e_mbd;
FRAME_CONTEXT *tile_ctx = xd->tile_ctx;
const int grid_idx = mi_row * mi_params->mi_stride + mi_col;
xd->mi = mi_params->mi_grid_base + grid_idx;
- cpi->td.mb.mbmi_ext_frame =
+ td->mb.mbmi_ext_frame =
cpi->mbmi_ext_info.frame_base +
get_mi_ext_idx(mi_row, mi_col, cm->mi_params.mi_alloc_bsize,
cpi->mbmi_ext_info.stride);
@@ -1506,7 +1495,7 @@ static AOM_INLINE void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
const MB_MODE_INFO *mbmi = xd->mi[0];
const BLOCK_SIZE bsize = mbmi->bsize;
- assert(bsize <= cm->seq_params.sb_size ||
+ assert(bsize <= cm->seq_params->sb_size ||
(bsize >= BLOCK_SIZES && bsize < BLOCK_SIZES_ALL));
const int bh = mi_size_high[bsize];
@@ -1518,7 +1507,7 @@ static AOM_INLINE void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
xd->left_txfm_context =
xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
- write_mbmi_b(cpi, w);
+ write_mbmi_b(cpi, td, w);
for (int plane = 0; plane < AOMMIN(2, av1_num_planes(cm)); ++plane) {
const uint8_t palette_size_plane =
@@ -1567,10 +1556,10 @@ static AOM_INLINE void write_modes_b(AV1_COMP *cpi, const TileInfo *const tile,
if (!mbmi->skip_txfm) {
int start = aom_tell_size(w);
- write_tokens_b(cpi, w, tok, tok_end);
+ write_tokens_b(cpi, &td->mb, w, tok, tok_end);
const int end = aom_tell_size(w);
- cpi->rc.coefficient_size += end - start;
+ td->coefficient_size += end - start;
}
}
@@ -1612,12 +1601,12 @@ static AOM_INLINE void write_partition(const AV1_COMMON *const cm,
}
static AOM_INLINE void write_modes_sb(
- AV1_COMP *const cpi, const TileInfo *const tile, aom_writer *const w,
- const TokenExtra **tok, const TokenExtra *const tok_end, int mi_row,
- int mi_col, BLOCK_SIZE bsize) {
+ AV1_COMP *const cpi, ThreadData *const td, const TileInfo *const tile,
+ aom_writer *const w, const TokenExtra **tok,
+ const TokenExtra *const tok_end, int mi_row, int mi_col, BLOCK_SIZE bsize) {
const AV1_COMMON *const cm = &cpi->common;
const CommonModeInfoParams *const mi_params = &cm->mi_params;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ MACROBLOCKD *const xd = &td->mb.e_mbd;
assert(bsize < BLOCK_SIZES_ALL);
const int hbs = mi_size_wide[bsize] / 2;
const int quarter_step = mi_size_wide[bsize] / 4;
@@ -1639,8 +1628,7 @@ static AOM_INLINE void write_modes_sb(
const int runit_idx = rcol + rrow * rstride;
const RestorationUnitInfo *rui =
&cm->rst_info[plane].unit_info[runit_idx];
- loop_restoration_write_sb_coeffs(cm, xd, rui, w, plane,
- cpi->td.counts);
+ loop_restoration_write_sb_coeffs(cm, xd, rui, w, plane, td->counts);
}
}
}
@@ -1650,51 +1638,53 @@ static AOM_INLINE void write_modes_sb(
write_partition(cm, xd, hbs, mi_row, mi_col, partition, bsize, w);
switch (partition) {
case PARTITION_NONE:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col);
break;
case PARTITION_HORZ:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col);
if (mi_row + hbs < mi_params->mi_rows)
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row + hbs, mi_col);
break;
case PARTITION_VERT:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col);
if (mi_col + hbs < mi_params->mi_cols)
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col + hbs);
break;
case PARTITION_SPLIT:
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col, subsize);
- write_modes_sb(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs,
+ write_modes_sb(cpi, td, tile, w, tok, tok_end, mi_row, mi_col, subsize);
+ write_modes_sb(cpi, td, tile, w, tok, tok_end, mi_row, mi_col + hbs,
+ subsize);
+ write_modes_sb(cpi, td, tile, w, tok, tok_end, mi_row + hbs, mi_col,
+ subsize);
+ write_modes_sb(cpi, td, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs,
subsize);
break;
case PARTITION_HORZ_A:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col + hbs);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row + hbs, mi_col);
break;
case PARTITION_HORZ_B:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row + hbs, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs);
break;
case PARTITION_VERT_A:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row + hbs, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col + hbs);
break;
case PARTITION_VERT_B:
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, mi_col + hbs);
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, mi_col + hbs);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row + hbs, mi_col + hbs);
break;
case PARTITION_HORZ_4:
for (i = 0; i < 4; ++i) {
int this_mi_row = mi_row + i * quarter_step;
if (i > 0 && this_mi_row >= mi_params->mi_rows) break;
- write_modes_b(cpi, tile, w, tok, tok_end, this_mi_row, mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, this_mi_row, mi_col);
}
break;
case PARTITION_VERT_4:
@@ -1702,7 +1692,7 @@ static AOM_INLINE void write_modes_sb(
int this_mi_col = mi_col + i * quarter_step;
if (i > 0 && this_mi_col >= mi_params->mi_cols) break;
- write_modes_b(cpi, tile, w, tok, tok_end, mi_row, this_mi_col);
+ write_modes_b(cpi, td, tile, w, tok, tok_end, mi_row, this_mi_col);
}
break;
default: assert(0);
@@ -1712,12 +1702,12 @@ static AOM_INLINE void write_modes_sb(
update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
}
-static AOM_INLINE void write_modes(AV1_COMP *const cpi,
+static AOM_INLINE void write_modes(AV1_COMP *const cpi, ThreadData *const td,
const TileInfo *const tile,
aom_writer *const w, int tile_row,
int tile_col) {
AV1_COMMON *const cm = &cpi->common;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ MACROBLOCKD *const xd = &td->mb.e_mbd;
const int mi_row_start = tile->mi_row_start;
const int mi_row_end = tile->mi_row_end;
const int mi_col_start = tile->mi_col_start;
@@ -1735,9 +1725,9 @@ static AOM_INLINE void write_modes(AV1_COMP *const cpi,
}
for (int mi_row = mi_row_start; mi_row < mi_row_end;
- mi_row += cm->seq_params.mib_size) {
+ mi_row += cm->seq_params->mib_size) {
const int sb_row_in_tile =
- (mi_row - tile->mi_row_start) >> cm->seq_params.mib_size_log2;
+ (mi_row - tile->mi_row_start) >> cm->seq_params->mib_size_log2;
const TokenExtra *tok =
cpi->token_info.tplist[tile_row][tile_col][sb_row_in_tile].start;
const TokenExtra *tok_end =
@@ -1746,10 +1736,10 @@ static AOM_INLINE void write_modes(AV1_COMP *const cpi,
av1_zero_left_context(xd);
for (int mi_col = mi_col_start; mi_col < mi_col_end;
- mi_col += cm->seq_params.mib_size) {
- cpi->td.mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
- write_modes_sb(cpi, tile, w, &tok, tok_end, mi_row, mi_col,
- cm->seq_params.sb_size);
+ mi_col += cm->seq_params->mib_size) {
+ td->mb.cb_coef_buff = av1_get_cb_coeff_buffer(cpi, mi_row, mi_col);
+ write_modes_sb(cpi, td, tile, w, &tok, tok_end, mi_row, mi_col,
+ cm->seq_params->sb_size);
}
assert(tok == tok_end);
}
@@ -1758,7 +1748,7 @@ static AOM_INLINE void write_modes(AV1_COMP *const cpi,
static AOM_INLINE void encode_restoration_mode(
AV1_COMMON *cm, struct aom_write_bit_buffer *wb) {
assert(!cm->features.all_lossless);
- if (!cm->seq_params.enable_restoration) return;
+ if (!cm->seq_params->enable_restoration) return;
if (cm->features.allow_intrabc) return;
const int num_planes = av1_num_planes(cm);
int all_none = 1, chroma_none = 1;
@@ -1789,9 +1779,9 @@ static AOM_INLINE void encode_restoration_mode(
}
}
if (!all_none) {
- assert(cm->seq_params.sb_size == BLOCK_64X64 ||
- cm->seq_params.sb_size == BLOCK_128X128);
- const int sb_size = cm->seq_params.sb_size == BLOCK_128X128 ? 128 : 64;
+ assert(cm->seq_params->sb_size == BLOCK_64X64 ||
+ cm->seq_params->sb_size == BLOCK_128X128);
+ const int sb_size = cm->seq_params->sb_size == BLOCK_128X128 ? 128 : 64;
RestorationInfo *rsi = &cm->rst_info[0];
@@ -1807,7 +1797,8 @@ static AOM_INLINE void encode_restoration_mode(
}
if (num_planes > 1) {
- int s = AOMMIN(cm->seq_params.subsampling_x, cm->seq_params.subsampling_y);
+ int s =
+ AOMMIN(cm->seq_params->subsampling_x, cm->seq_params->subsampling_y);
if (s && !chroma_none) {
aom_wb_write_bit(wb, cm->rst_info[1].restoration_unit_size !=
cm->rst_info[0].restoration_unit_size);
@@ -2040,7 +2031,7 @@ static AOM_INLINE void encode_loopfilter(AV1_COMMON *cm,
static AOM_INLINE void encode_cdef(const AV1_COMMON *cm,
struct aom_write_bit_buffer *wb) {
assert(!cm->features.coded_lossless);
- if (!cm->seq_params.enable_cdef) return;
+ if (!cm->seq_params->enable_cdef) return;
if (cm->features.allow_intrabc) return;
const int num_planes = av1_num_planes(cm);
int i;
@@ -2093,7 +2084,7 @@ static AOM_INLINE void encode_quantization(
}
}
-static AOM_INLINE void encode_segmentation(AV1_COMMON *cm, MACROBLOCKD *xd,
+static AOM_INLINE void encode_segmentation(AV1_COMMON *cm,
struct aom_write_bit_buffer *wb) {
int i, j;
struct segmentation *seg = &cm->seg;
@@ -2102,17 +2093,9 @@ static AOM_INLINE void encode_segmentation(AV1_COMMON *cm, MACROBLOCKD *xd,
if (!seg->enabled) return;
// Write update flags
- if (cm->features.primary_ref_frame == PRIMARY_REF_NONE) {
- assert(seg->update_map == 1);
- seg->temporal_update = 0;
- assert(seg->update_data == 1);
- } else {
+ if (cm->features.primary_ref_frame != PRIMARY_REF_NONE) {
aom_wb_write_bit(wb, seg->update_map);
- if (seg->update_map) {
- // Select the coding strategy (temporal or spatial)
- av1_choose_segmap_coding_method(cm, xd);
- aom_wb_write_bit(wb, seg->temporal_update);
- }
+ if (seg->update_map) aom_wb_write_bit(wb, seg->temporal_update);
aom_wb_write_bit(wb, seg->update_data);
}
@@ -2163,11 +2146,11 @@ static AOM_INLINE void wb_write_uniform(struct aom_write_bit_buffer *wb, int n,
static AOM_INLINE void write_tile_info_max_tile(
const AV1_COMMON *const cm, struct aom_write_bit_buffer *wb) {
int width_mi =
- ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params.mib_size_log2);
+ ALIGN_POWER_OF_TWO(cm->mi_params.mi_cols, cm->seq_params->mib_size_log2);
int height_mi =
- ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, cm->seq_params.mib_size_log2);
- int width_sb = width_mi >> cm->seq_params.mib_size_log2;
- int height_sb = height_mi >> cm->seq_params.mib_size_log2;
+ ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, cm->seq_params->mib_size_log2);
+ int width_sb = width_mi >> cm->seq_params->mib_size_log2;
+ int height_sb = height_mi >> cm->seq_params->mib_size_log2;
int size_sb, i;
const CommonTileParams *const tiles = &cm->tiles;
@@ -2244,13 +2227,6 @@ static AOM_INLINE void write_ext_tile_info(
}
}
-// Stores the location and size of a tile's data in the bitstream. Used for
-// later identifying identical tiles
-typedef struct TileBufferEnc {
- uint8_t *data;
- size_t size;
-} TileBufferEnc;
-
static INLINE int find_identical_tile(
const int tile_row, const int tile_col,
TileBufferEnc (*const tile_buffers)[MAX_TILE_COLS]) {
@@ -2314,7 +2290,7 @@ static AOM_INLINE void write_render_size(const AV1_COMMON *cm,
static AOM_INLINE void write_superres_scale(const AV1_COMMON *const cm,
struct aom_write_bit_buffer *wb) {
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
if (!seq_params->enable_superres) {
assert(cm->superres_scale_denominator == SCALE_NUMERATOR);
return;
@@ -2341,7 +2317,7 @@ static AOM_INLINE void write_frame_size(const AV1_COMMON *cm,
const int coded_height = cm->superres_upscaled_height - 1;
if (frame_size_override) {
- const SequenceHeader *seq_params = &cm->seq_params;
+ const SequenceHeader *seq_params = cm->seq_params;
int num_bits_width = seq_params->num_bits_width;
int num_bits_height = seq_params->num_bits_height;
aom_wb_write_literal(wb, coded_width, num_bits_width);
@@ -2499,7 +2475,7 @@ static AOM_INLINE void write_tu_pts_info(AV1_COMMON *const cm,
struct aom_write_bit_buffer *wb) {
aom_wb_write_unsigned_literal(
wb, cm->frame_presentation_time,
- cm->seq_params.decoder_model_info.frame_presentation_time_length);
+ cm->seq_params->decoder_model_info.frame_presentation_time_length);
}
static AOM_INLINE void write_film_grain_params(
@@ -2537,15 +2513,15 @@ static AOM_INLINE void write_film_grain_params(
aom_wb_write_literal(wb, pars->scaling_points_y[i][1], 8);
}
- if (!cm->seq_params.monochrome) {
+ if (!cm->seq_params->monochrome) {
aom_wb_write_bit(wb, pars->chroma_scaling_from_luma);
} else {
assert(!pars->chroma_scaling_from_luma);
}
- if (cm->seq_params.monochrome || pars->chroma_scaling_from_luma ||
- ((cm->seq_params.subsampling_x == 1) &&
- (cm->seq_params.subsampling_y == 1) && (pars->num_y_points == 0))) {
+ if (cm->seq_params->monochrome || pars->chroma_scaling_from_luma ||
+ ((cm->seq_params->subsampling_x == 1) &&
+ (cm->seq_params->subsampling_y == 1) && (pars->num_y_points == 0))) {
assert(pars->num_cb_points == 0 && pars->num_cr_points == 0);
} else {
aom_wb_write_literal(wb, pars->num_cb_points, 4); // max 10
@@ -2841,12 +2817,11 @@ static int check_frame_refs_short_signaling(AV1_COMMON *const cm) {
// New function based on HLS R18
static AOM_INLINE void write_uncompressed_header_obu(
- AV1_COMP *cpi, struct aom_write_bit_buffer *saved_wb,
+ AV1_COMP *cpi, MACROBLOCKD *const xd, struct aom_write_bit_buffer *saved_wb,
struct aom_write_bit_buffer *wb) {
AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const CommonQuantParams *quant_params = &cm->quant_params;
- MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
CurrentFrame *const current_frame = &cm->current_frame;
FeatureFlags *const features = &cm->features;
@@ -2925,7 +2900,7 @@ static AOM_INLINE void write_uncompressed_header_obu(
if (cm->superres_upscaled_width > seq_params->max_frame_width ||
cm->superres_upscaled_height > seq_params->max_frame_height) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"Frame dimensions are larger than the maximum values");
}
@@ -2947,24 +2922,24 @@ static AOM_INLINE void write_uncompressed_header_obu(
}
if (seq_params->decoder_model_info_present_flag) {
- aom_wb_write_bit(wb, cm->buffer_removal_time_present);
- if (cm->buffer_removal_time_present) {
+ aom_wb_write_bit(wb, cpi->ppi->buffer_removal_time_present);
+ if (cpi->ppi->buffer_removal_time_present) {
for (int op_num = 0;
op_num < seq_params->operating_points_cnt_minus_1 + 1; op_num++) {
if (seq_params->op_params[op_num].decoder_model_param_present_flag) {
- if (((seq_params->operating_point_idc[op_num] >>
+ if (seq_params->operating_point_idc[op_num] == 0 ||
+ ((seq_params->operating_point_idc[op_num] >>
cm->temporal_layer_id) &
0x1 &&
(seq_params->operating_point_idc[op_num] >>
(cm->spatial_layer_id + 8)) &
- 0x1) ||
- seq_params->operating_point_idc[op_num] == 0) {
+ 0x1)) {
aom_wb_write_unsigned_literal(
wb, cm->buffer_removal_times[op_num],
seq_params->decoder_model_info.buffer_removal_time_length);
cm->buffer_removal_times[op_num]++;
if (cm->buffer_removal_times[op_num] == 0) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"buffer_removal_time overflowed");
}
}
@@ -3051,7 +3026,7 @@ static AOM_INLINE void write_uncompressed_header_obu(
1;
if (delta_frame_id_minus_1 < 0 ||
delta_frame_id_minus_1 >= (1 << diff_len)) {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Invalid delta_frame_id_minus_1");
}
aom_wb_write_literal(wb, delta_frame_id_minus_1, diff_len);
@@ -3088,8 +3063,8 @@ static AOM_INLINE void write_uncompressed_header_obu(
write_tile_info(cm, saved_wb, wb);
encode_quantization(quant_params, av1_num_planes(cm),
- cm->seq_params.separate_uv_delta_q, wb);
- encode_segmentation(cm, xd, wb);
+ cm->seq_params->separate_uv_delta_q, wb);
+ encode_segmentation(cm, wb);
const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
if (delta_q_info->delta_q_present_flag) assert(quant_params->base_qindex > 0);
@@ -3288,11 +3263,11 @@ static int remux_tiles(const CommonTileParams *const tiles, uint8_t *dst,
}
uint32_t av1_write_obu_header(AV1LevelParams *const level_params,
- OBU_TYPE obu_type, int obu_extension,
- uint8_t *const dst) {
+ int *frame_header_count, OBU_TYPE obu_type,
+ int obu_extension, uint8_t *const dst) {
if (level_params->keep_level_stats &&
(obu_type == OBU_FRAME || obu_type == OBU_FRAME_HEADER))
- ++level_params->frame_header_count;
+ ++(*frame_header_count);
struct aom_write_bit_buffer wb = { dst, 0 };
uint32_t size = 0;
@@ -3326,8 +3301,8 @@ int av1_write_uleb_obu_size(size_t obu_header_size, size_t obu_payload_size,
return AOM_CODEC_OK;
}
-static size_t obu_memmove(size_t obu_header_size, size_t obu_payload_size,
- uint8_t *data) {
+size_t av1_obu_memmove(size_t obu_header_size, size_t obu_payload_size,
+ uint8_t *data) {
const size_t length_field_size = aom_uleb_size_in_bytes(obu_payload_size);
const size_t move_dst_offset = length_field_size + obu_header_size;
const size_t move_src_offset = obu_header_size;
@@ -3426,12 +3401,12 @@ uint32_t av1_write_sequence_header_obu(const SequenceHeader *seq_params,
return size;
}
-static uint32_t write_frame_header_obu(AV1_COMP *cpi,
+static uint32_t write_frame_header_obu(AV1_COMP *cpi, MACROBLOCKD *const xd,
struct aom_write_bit_buffer *saved_wb,
uint8_t *const dst,
int append_trailing_bits) {
struct aom_write_bit_buffer wb = { dst, 0 };
- write_uncompressed_header_obu(cpi, saved_wb, &wb);
+ write_uncompressed_header_obu(cpi, xd, saved_wb, &wb);
if (append_trailing_bits) add_trailing_bits(&wb);
return aom_wb_bytes_written(&wb);
}
@@ -3455,12 +3430,6 @@ static uint32_t write_tile_group_header(uint8_t *const dst, int start_tile,
return size;
}
-typedef struct {
- uint8_t *frame_header;
- size_t obu_header_byte_offset;
- size_t total_length;
-} FrameHeaderInfo;
-
extern void av1_print_uncompressed_frame_header(const uint8_t *data, int size,
const char *filename);
@@ -3473,16 +3442,17 @@ typedef struct {
static uint32_t init_large_scale_tile_obu_header(
AV1_COMP *const cpi, uint8_t **data, struct aom_write_bit_buffer *saved_wb,
LargeTileFrameOBU *lst_obu) {
- AV1LevelParams *const level_params = &cpi->level_params;
+ AV1LevelParams *const level_params = &cpi->ppi->level_params;
CurrentFrame *const current_frame = &cpi->common.current_frame;
// For large_scale_tile case, we always have only one tile group, so it can
// be written as an OBU_FRAME.
const OBU_TYPE obu_type = OBU_FRAME;
- lst_obu->tg_hdr_size = av1_write_obu_header(level_params, obu_type, 0, *data);
+ lst_obu->tg_hdr_size = av1_write_obu_header(
+ level_params, &cpi->frame_header_count, obu_type, 0, *data);
*data += lst_obu->tg_hdr_size;
const uint32_t frame_header_size =
- write_frame_header_obu(cpi, saved_wb, *data, 0);
+ write_frame_header_obu(cpi, &cpi->td.mb.e_mbd, saved_wb, *data, 0);
*data += frame_header_size;
lst_obu->frame_header_size = frame_header_size;
// (yunqing) This test ensures the correctness of large scale tile coding.
@@ -3520,7 +3490,7 @@ static void write_large_scale_tile_obu_size(
*total_size += lst_obu->tg_hdr_size;
const uint32_t obu_payload_size = *total_size - lst_obu->tg_hdr_size;
const size_t length_field_size =
- obu_memmove(lst_obu->tg_hdr_size, obu_payload_size, dst);
+ av1_obu_memmove(lst_obu->tg_hdr_size, obu_payload_size, dst);
if (av1_write_uleb_obu_size(lst_obu->tg_hdr_size, obu_payload_size, dst) !=
AOM_CODEC_OK)
assert(0);
@@ -3551,6 +3521,7 @@ static void write_large_scale_tile_obu(
const int tile_rows = tiles->rows;
unsigned int tile_size = 0;
+ av1_reset_pack_bs_thread_data(&cpi->td);
for (int tile_col = 0; tile_col < tile_cols; tile_col++) {
TileInfo tile_info;
const int is_last_col = (tile_col == tile_cols - 1);
@@ -3579,7 +3550,7 @@ static void write_large_scale_tile_obu(
mode_bc.allow_update_cdf =
mode_bc.allow_update_cdf && !cm->features.disable_cdf_update;
aom_start_encode(&mode_bc, buf->data + data_offset);
- write_modes(cpi, &tile_info, &mode_bc, tile_row, tile_col);
+ write_modes(cpi, &cpi->td, &tile_info, &mode_bc, tile_row, tile_col);
aom_stop_encode(&mode_bc);
tile_size = mode_bc.pos;
buf->size = tile_size;
@@ -3627,6 +3598,7 @@ static void write_large_scale_tile_obu(
*max_tile_col_size = AOMMAX(*max_tile_col_size, col_size);
}
}
+ av1_accumulate_pack_bs_thread_data(cpi, &cpi->td);
}
// Packs information in the obu header for large scale tiles.
@@ -3656,147 +3628,236 @@ static INLINE uint32_t pack_large_scale_tiles_in_tg_obus(
return total_size;
}
+// Writes obu, tile group and uncompressed headers to bitstream.
+void av1_write_obu_tg_tile_headers(AV1_COMP *const cpi, MACROBLOCKD *const xd,
+ PackBSParams *const pack_bs_params,
+ const int tile_idx) {
+ AV1_COMMON *const cm = &cpi->common;
+ const CommonTileParams *const tiles = &cm->tiles;
+ int *const curr_tg_hdr_size = &pack_bs_params->curr_tg_hdr_size;
+ const int tg_size =
+ (tiles->rows * tiles->cols + cpi->num_tg - 1) / cpi->num_tg;
+
+ // Write Tile group, frame and OBU header
+ // A new tile group begins at this tile. Write the obu header and
+ // tile group header
+ const OBU_TYPE obu_type = (cpi->num_tg == 1) ? OBU_FRAME : OBU_TILE_GROUP;
+ *curr_tg_hdr_size = av1_write_obu_header(
+ &cpi->ppi->level_params, &cpi->frame_header_count, obu_type,
+ pack_bs_params->obu_extn_header, pack_bs_params->tile_data_curr);
+ pack_bs_params->obu_header_size = *curr_tg_hdr_size;
+
+ if (cpi->num_tg == 1)
+ *curr_tg_hdr_size += write_frame_header_obu(
+ cpi, xd, pack_bs_params->saved_wb,
+ pack_bs_params->tile_data_curr + *curr_tg_hdr_size, 0);
+ *curr_tg_hdr_size += write_tile_group_header(
+ pack_bs_params->tile_data_curr + *curr_tg_hdr_size, tile_idx,
+ AOMMIN(tile_idx + tg_size - 1, tiles->cols * tiles->rows - 1),
+ (tiles->log2_rows + tiles->log2_cols), cpi->num_tg > 1);
+ *pack_bs_params->total_size += *curr_tg_hdr_size;
+}
+
+// Pack tile data in the bitstream with tile_group, frame
+// and OBU header.
+void av1_pack_tile_info(AV1_COMP *const cpi, ThreadData *const td,
+ PackBSParams *const pack_bs_params) {
+ aom_writer mode_bc;
+ AV1_COMMON *const cm = &cpi->common;
+ int tile_row = pack_bs_params->tile_row;
+ int tile_col = pack_bs_params->tile_col;
+ uint32_t *const total_size = pack_bs_params->total_size;
+ TileInfo tile_info;
+ av1_tile_set_col(&tile_info, cm, tile_col);
+ av1_tile_set_row(&tile_info, cm, tile_row);
+ mode_bc.allow_update_cdf = 1;
+ mode_bc.allow_update_cdf =
+ mode_bc.allow_update_cdf && !cm->features.disable_cdf_update;
+
+ unsigned int tile_size;
+
+ const int num_planes = av1_num_planes(cm);
+ av1_reset_loop_restoration(&td->mb.e_mbd, num_planes);
+
+ pack_bs_params->buf.data = pack_bs_params->dst + *total_size;
+
+ // The last tile of the tile group does not have a header.
+ if (!pack_bs_params->is_last_tile_in_tg) *total_size += 4;
+
+ // Pack tile data
+ aom_start_encode(&mode_bc, pack_bs_params->dst + *total_size);
+ write_modes(cpi, td, &tile_info, &mode_bc, tile_row, tile_col);
+ aom_stop_encode(&mode_bc);
+ tile_size = mode_bc.pos;
+ assert(tile_size >= AV1_MIN_TILE_SIZE_BYTES);
+
+ pack_bs_params->buf.size = tile_size;
+
+ // Write tile size
+ if (!pack_bs_params->is_last_tile_in_tg) {
+ // size of this tile
+ mem_put_le32(pack_bs_params->buf.data, tile_size - AV1_MIN_TILE_SIZE_BYTES);
+ }
+}
+
+void av1_write_last_tile_info(
+ AV1_COMP *const cpi, const FrameHeaderInfo *fh_info,
+ struct aom_write_bit_buffer *saved_wb, size_t *curr_tg_data_size,
+ uint8_t *curr_tg_start, uint32_t *const total_size,
+ uint8_t **tile_data_start, int *const largest_tile_id,
+ int *const is_first_tg, uint32_t obu_header_size, uint8_t obu_extn_header) {
+ // write current tile group size
+ const uint32_t obu_payload_size =
+ (uint32_t)(*curr_tg_data_size) - obu_header_size;
+ const size_t length_field_size =
+ av1_obu_memmove(obu_header_size, obu_payload_size, curr_tg_start);
+ if (av1_write_uleb_obu_size(obu_header_size, obu_payload_size,
+ curr_tg_start) != AOM_CODEC_OK) {
+ assert(0);
+ }
+ *curr_tg_data_size += (int)length_field_size;
+ *total_size += (uint32_t)length_field_size;
+ *tile_data_start += length_field_size;
+ if (cpi->num_tg == 1) {
+ // if this tg is combined with the frame header then update saved
+ // frame header base offset according to length field size
+ saved_wb->bit_buffer += length_field_size;
+ }
+
+ if (!(*is_first_tg) && cpi->common.features.error_resilient_mode) {
+ // Make room for a duplicate Frame Header OBU.
+ memmove(curr_tg_start + fh_info->total_length, curr_tg_start,
+ *curr_tg_data_size);
+
+ // Insert a copy of the Frame Header OBU.
+ memcpy(curr_tg_start, fh_info->frame_header, fh_info->total_length);
+
+ // Force context update tile to be the first tile in error
+ // resilient mode as the duplicate frame headers will have
+ // context_update_tile_id set to 0
+ *largest_tile_id = 0;
+
+ // Rewrite the OBU header to change the OBU type to Redundant Frame
+ // Header.
+ av1_write_obu_header(&cpi->ppi->level_params, &cpi->frame_header_count,
+ OBU_REDUNDANT_FRAME_HEADER, obu_extn_header,
+ &curr_tg_start[fh_info->obu_header_byte_offset]);
+
+ *curr_tg_data_size += (int)(fh_info->total_length);
+ *total_size += (uint32_t)(fh_info->total_length);
+ }
+ *is_first_tg = 0;
+}
+
+void av1_reset_pack_bs_thread_data(ThreadData *const td) {
+ td->coefficient_size = 0;
+ td->max_mv_magnitude = 0;
+ av1_zero(td->interp_filter_selected);
+}
+
+void av1_accumulate_pack_bs_thread_data(AV1_COMP *const cpi,
+ ThreadData const *td) {
+ int do_max_mv_magnitude_update = 1;
+ cpi->rc.coefficient_size += td->coefficient_size;
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Disable max_mv_magnitude update for parallel frames based on update flag.
+ if (!cpi->do_frame_data_update) do_max_mv_magnitude_update = 0;
+#endif
+
+ if (cpi->sf.mv_sf.auto_mv_step_size && do_max_mv_magnitude_update)
+ cpi->mv_search_params.max_mv_magnitude =
+ AOMMAX(cpi->mv_search_params.max_mv_magnitude, td->max_mv_magnitude);
+
+ for (InterpFilter filter = EIGHTTAP_REGULAR; filter < SWITCHABLE; filter++)
+ cpi->common.cur_frame->interp_filter_selected[filter] +=
+ td->interp_filter_selected[filter];
+}
+
// Store information related to each default tile in the OBU header.
static void write_tile_obu(
AV1_COMP *const cpi, uint8_t *const dst, uint32_t *total_size,
- struct aom_write_bit_buffer *saved_wb, uint8_t obu_extension_header,
+ struct aom_write_bit_buffer *saved_wb, uint8_t obu_extn_header,
const FrameHeaderInfo *fh_info, int *const largest_tile_id,
unsigned int *max_tile_size, uint32_t *const obu_header_size,
uint8_t **tile_data_start) {
AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
const CommonTileParams *const tiles = &cm->tiles;
- AV1LevelParams *const level_params = &cpi->level_params;
- TileBufferEnc tile_buffers[MAX_TILE_ROWS][MAX_TILE_COLS];
const int tile_cols = tiles->cols;
const int tile_rows = tiles->rows;
- unsigned int tile_size = 0;
// Fixed size tile groups for the moment
const int num_tg_hdrs = cpi->num_tg;
const int tg_size = (tile_rows * tile_cols + num_tg_hdrs - 1) / num_tg_hdrs;
int tile_count = 0;
- int curr_tg_data_size = 0;
- uint8_t *data = dst;
+ size_t curr_tg_data_size = 0;
+ uint8_t *tile_data_curr = dst;
int new_tg = 1;
- int first_tg = 1;
+ int is_first_tg = 1;
+ av1_reset_pack_bs_thread_data(&cpi->td);
for (int tile_row = 0; tile_row < tile_rows; tile_row++) {
for (int tile_col = 0; tile_col < tile_cols; tile_col++) {
- aom_writer mode_bc;
const int tile_idx = tile_row * tile_cols + tile_col;
- TileBufferEnc *const buf = &tile_buffers[tile_row][tile_col];
TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
- int is_last_tile_in_tg = 0;
+ int is_last_tile_in_tg = 0;
if (new_tg) {
- data = dst + *total_size;
-
- // A new tile group begins at this tile. Write the obu header and
- // tile group header
- const OBU_TYPE obu_type =
- (num_tg_hdrs == 1) ? OBU_FRAME : OBU_TILE_GROUP;
- curr_tg_data_size = av1_write_obu_header(level_params, obu_type,
- obu_extension_header, data);
- *obu_header_size = curr_tg_data_size;
-
- if (num_tg_hdrs == 1)
- curr_tg_data_size += write_frame_header_obu(
- cpi, saved_wb, data + curr_tg_data_size, 0);
- curr_tg_data_size += write_tile_group_header(
- data + curr_tg_data_size, tile_idx,
- AOMMIN(tile_idx + tg_size - 1, tile_cols * tile_rows - 1),
- (tiles->log2_rows + tiles->log2_cols), cpi->num_tg > 1);
- *total_size += curr_tg_data_size;
- *tile_data_start += curr_tg_data_size;
- new_tg = 0;
+ tile_data_curr = dst + *total_size;
tile_count = 0;
}
tile_count++;
- TileInfo tile_info;
- av1_tile_set_col(&tile_info, cm, tile_col);
- av1_tile_set_row(&tile_info, cm, tile_row);
- if (tile_count == tg_size || tile_idx == (tile_cols * tile_rows - 1)) {
+ if (tile_count == tg_size || tile_idx == (tile_cols * tile_rows - 1))
is_last_tile_in_tg = 1;
- new_tg = 1;
- } else {
- is_last_tile_in_tg = 0;
- }
- buf->data = dst + *total_size;
+ xd->tile_ctx = &this_tile->tctx;
- // The last tile of the tile group does not have a header.
- if (!is_last_tile_in_tg) *total_size += 4;
+ // PackBSParams stores all parameters required to pack tile and header
+ // info.
+ PackBSParams pack_bs_params;
+ pack_bs_params.dst = dst;
+ pack_bs_params.curr_tg_hdr_size = 0;
+ pack_bs_params.is_last_tile_in_tg = is_last_tile_in_tg;
+ pack_bs_params.new_tg = new_tg;
+ pack_bs_params.obu_extn_header = obu_extn_header;
+ pack_bs_params.obu_header_size = 0;
+ pack_bs_params.saved_wb = saved_wb;
+ pack_bs_params.tile_col = tile_col;
+ pack_bs_params.tile_row = tile_row;
+ pack_bs_params.tile_data_curr = tile_data_curr;
+ pack_bs_params.total_size = total_size;
- cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
- mode_bc.allow_update_cdf = 1;
- mode_bc.allow_update_cdf =
- mode_bc.allow_update_cdf && !cm->features.disable_cdf_update;
- const int num_planes = av1_num_planes(cm);
- av1_reset_loop_restoration(&cpi->td.mb.e_mbd, num_planes);
+ if (new_tg)
+ av1_write_obu_tg_tile_headers(cpi, xd, &pack_bs_params, tile_idx);
- aom_start_encode(&mode_bc, dst + *total_size);
- write_modes(cpi, &tile_info, &mode_bc, tile_row, tile_col);
- aom_stop_encode(&mode_bc);
- tile_size = mode_bc.pos;
- assert(tile_size >= AV1_MIN_TILE_SIZE_BYTES);
+ av1_pack_tile_info(cpi, &cpi->td, &pack_bs_params);
- curr_tg_data_size += (tile_size + (is_last_tile_in_tg ? 0 : 4));
- buf->size = tile_size;
- if (tile_size > *max_tile_size) {
- *largest_tile_id = tile_cols * tile_row + tile_col;
- *max_tile_size = tile_size;
+ if (new_tg) {
+ curr_tg_data_size = pack_bs_params.curr_tg_hdr_size;
+ *tile_data_start += pack_bs_params.curr_tg_hdr_size;
+ *obu_header_size = pack_bs_params.obu_header_size;
+ new_tg = 0;
}
+ if (is_last_tile_in_tg) new_tg = 1;
- if (!is_last_tile_in_tg) {
- // size of this tile
- mem_put_le32(buf->data, tile_size - AV1_MIN_TILE_SIZE_BYTES);
- } else {
- // write current tile group size
- const uint32_t obu_payload_size = curr_tg_data_size - *obu_header_size;
- const size_t length_field_size =
- obu_memmove(*obu_header_size, obu_payload_size, data);
- if (av1_write_uleb_obu_size(*obu_header_size, obu_payload_size, data) !=
- AOM_CODEC_OK) {
- assert(0);
- }
- curr_tg_data_size += (int)length_field_size;
- *total_size += (uint32_t)length_field_size;
- *tile_data_start += length_field_size;
- if (num_tg_hdrs == 1) {
- // if this tg is combined with the frame header then update saved
- // frame header base offset accroding to length field size
- saved_wb->bit_buffer += length_field_size;
- }
+ curr_tg_data_size +=
+ (pack_bs_params.buf.size + (is_last_tile_in_tg ? 0 : 4));
- if (!first_tg && cm->features.error_resilient_mode) {
- // Make room for a duplicate Frame Header OBU.
- memmove(data + fh_info->total_length, data, curr_tg_data_size);
-
- // Insert a copy of the Frame Header OBU.
- memcpy(data, fh_info->frame_header, fh_info->total_length);
-
- // Force context update tile to be the first tile in error
- // resiliant mode as the duplicate frame headers will have
- // context_update_tile_id set to 0
- *largest_tile_id = 0;
-
- // Rewrite the OBU header to change the OBU type to Redundant Frame
- // Header.
- av1_write_obu_header(level_params, OBU_REDUNDANT_FRAME_HEADER,
- obu_extension_header,
- &data[fh_info->obu_header_byte_offset]);
-
- data += fh_info->total_length;
-
- curr_tg_data_size += (int)(fh_info->total_length);
- *total_size += (uint32_t)(fh_info->total_length);
- }
- first_tg = 0;
+ if (pack_bs_params.buf.size > *max_tile_size) {
+ *largest_tile_id = tile_idx;
+ *max_tile_size = (unsigned int)pack_bs_params.buf.size;
}
- *total_size += tile_size;
+ if (is_last_tile_in_tg)
+ av1_write_last_tile_info(cpi, fh_info, saved_wb, &curr_tg_data_size,
+ tile_data_curr, total_size, tile_data_start,
+ largest_tile_id, &is_first_tg,
+ *obu_header_size, obu_extn_header);
+ *total_size += (uint32_t)pack_bs_params.buf.size;
}
}
+ av1_accumulate_pack_bs_thread_data(cpi, &cpi->td);
}
// Write total buffer size and related information into the OBU header for
@@ -3854,6 +3915,24 @@ static void write_tile_obu_size(AV1_COMP *const cpi, uint8_t *const dst,
}
}
+// As per the experiments, single-thread bitstream packing is better for
+// frames with a smaller bitstream size. This behavior is due to setup time
+// overhead of multithread function would be more than that of time required
+// to pack the smaller bitstream of such frames. We set a threshold on the
+// total absolute sum of transform coeffs to detect such frames and disable
+// Multithreading.
+int enable_pack_bitstream_mt(const TileDataEnc *tile_data, int num_tiles,
+ int num_workers) {
+ if (AOMMIN(num_workers, num_tiles) <= 1) return 0;
+
+ const int num_work_sqr = num_workers * num_workers;
+ const uint64_t thresh = 50;
+ uint64_t frame_abs_sum_level = 0;
+ for (int idx = 0; idx < num_tiles; idx++)
+ frame_abs_sum_level += tile_data[idx].abs_sum_level;
+ return ((frame_abs_sum_level > (num_work_sqr * thresh) / (num_workers - 1)));
+}
+
static INLINE uint32_t pack_tiles_in_tg_obus(
AV1_COMP *const cpi, uint8_t *const dst,
struct aom_write_bit_buffer *saved_wb, uint8_t obu_extension_header,
@@ -3863,16 +3942,25 @@ static INLINE uint32_t pack_tiles_in_tg_obus(
unsigned int max_tile_size = 0;
uint32_t obu_header_size = 0;
uint8_t *tile_data_start = dst;
-
- write_tile_obu(cpi, dst, &total_size, saved_wb, obu_extension_header, fh_info,
- largest_tile_id, &max_tile_size, &obu_header_size,
- &tile_data_start);
-
+ const int num_workers = cpi->mt_info.num_mod_workers[MOD_PACK_BS];
const int tile_cols = tiles->cols;
const int tile_rows = tiles->rows;
- const int have_tiles = tile_cols * tile_rows > 1;
+ const int num_tiles = tile_rows * tile_cols;
+
+ const int enable_mt =
+ enable_pack_bitstream_mt(cpi->tile_data, num_tiles, num_workers);
- if (have_tiles)
+ if (enable_mt) {
+ av1_write_tile_obu_mt(cpi, dst, &total_size, saved_wb, obu_extension_header,
+ fh_info, largest_tile_id, &max_tile_size,
+ &obu_header_size, &tile_data_start);
+ } else {
+ write_tile_obu(cpi, dst, &total_size, saved_wb, obu_extension_header,
+ fh_info, largest_tile_id, &max_tile_size, &obu_header_size,
+ &tile_data_start);
+ }
+
+ if (num_tiles > 1)
write_tile_obu_size(cpi, dst, saved_wb, *largest_tile_id, &total_size,
max_tile_size, obu_header_size, tile_data_start);
return total_size;
@@ -3887,6 +3975,9 @@ static uint32_t write_tiles_in_tg_obus(AV1_COMP *const cpi, uint8_t *const dst,
const CommonTileParams *const tiles = &cm->tiles;
*largest_tile_id = 0;
+ // Select the coding strategy (temporal or spatial)
+ if (cm->seg.enabled) av1_choose_segmap_coding_method(cm, &cpi->td.mb.e_mbd);
+
if (tiles->large_scale)
return pack_large_scale_tiles_in_tg_obus(cpi, dst, saved_wb,
largest_tile_id);
@@ -3926,18 +4017,20 @@ static size_t av1_write_metadata_array(AV1_COMP *const cpi, uint8_t *dst) {
(cm->current_frame.frame_type != KEY_FRAME &&
current_metadata->insert_flag == AOM_MIF_NON_KEY_FRAME) ||
current_metadata->insert_flag == AOM_MIF_ANY_FRAME) {
- obu_header_size =
- av1_write_obu_header(&cpi->level_params, OBU_METADATA, 0, dst);
+ obu_header_size = av1_write_obu_header(&cpi->ppi->level_params,
+ &cpi->frame_header_count,
+ OBU_METADATA, 0, dst);
obu_payload_size =
av1_write_metadata_obu(current_metadata, dst + obu_header_size);
- length_field_size = obu_memmove(obu_header_size, obu_payload_size, dst);
+ length_field_size =
+ av1_obu_memmove(obu_header_size, obu_payload_size, dst);
if (av1_write_uleb_obu_size(obu_header_size, obu_payload_size, dst) ==
AOM_CODEC_OK) {
const size_t obu_size = obu_header_size + obu_payload_size;
dst += obu_size + length_field_size;
total_bytes_written += obu_size + length_field_size;
} else {
- aom_internal_error(&cpi->common.error, AOM_CODEC_ERROR,
+ aom_internal_error(cpi->common.error, AOM_CODEC_ERROR,
"Error writing metadata OBU size");
}
}
@@ -3951,7 +4044,7 @@ int av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size,
uint8_t *data = dst;
uint32_t data_size;
AV1_COMMON *const cm = &cpi->common;
- AV1LevelParams *const level_params = &cpi->level_params;
+ AV1LevelParams *const level_params = &cpi->ppi->level_params;
uint32_t obu_header_size = 0;
uint32_t obu_payload_size = 0;
FrameHeaderInfo fh_info = { NULL, 0, 0 };
@@ -3967,19 +4060,19 @@ int av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size,
bitstream_queue_reset_write();
#endif
- level_params->frame_header_count = 0;
+ cpi->frame_header_count = 0;
// The TD is now written outside the frame encode loop
// write sequence header obu if KEY_FRAME, preceded by 4-byte size
if (cm->current_frame.frame_type == KEY_FRAME && !cpi->no_show_fwd_kf) {
- obu_header_size =
- av1_write_obu_header(level_params, OBU_SEQUENCE_HEADER, 0, data);
+ obu_header_size = av1_write_obu_header(
+ level_params, &cpi->frame_header_count, OBU_SEQUENCE_HEADER, 0, data);
obu_payload_size =
- av1_write_sequence_header_obu(&cm->seq_params, data + obu_header_size);
+ av1_write_sequence_header_obu(cm->seq_params, data + obu_header_size);
const size_t length_field_size =
- obu_memmove(obu_header_size, obu_payload_size, data);
+ av1_obu_memmove(obu_header_size, obu_payload_size, data);
if (av1_write_uleb_obu_size(obu_header_size, obu_payload_size, data) !=
AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
@@ -3998,12 +4091,13 @@ int av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size,
if (write_frame_header) {
// Write Frame Header OBU.
fh_info.frame_header = data;
- obu_header_size = av1_write_obu_header(level_params, OBU_FRAME_HEADER,
- obu_extension_header, data);
- obu_payload_size =
- write_frame_header_obu(cpi, &saved_wb, data + obu_header_size, 1);
+ obu_header_size =
+ av1_write_obu_header(level_params, &cpi->frame_header_count,
+ OBU_FRAME_HEADER, obu_extension_header, data);
+ obu_payload_size = write_frame_header_obu(cpi, &cpi->td.mb.e_mbd, &saved_wb,
+ data + obu_header_size, 1);
- length_field = obu_memmove(obu_header_size, obu_payload_size, data);
+ length_field = av1_obu_memmove(obu_header_size, obu_payload_size, data);
if (av1_write_uleb_obu_size(obu_header_size, obu_payload_size, data) !=
AOM_CODEC_OK) {
return AOM_CODEC_ERROR;
diff --git a/third_party/libaom/source/libaom/av1/encoder/bitstream.h b/third_party/libaom/source/libaom/av1/encoder/bitstream.h
index df35ecccfa..e32cd3bd19 100644
--- a/third_party/libaom/source/libaom/av1/encoder/bitstream.h
+++ b/third_party/libaom/source/libaom/av1/encoder/bitstream.h
@@ -16,9 +16,67 @@
extern "C" {
#endif
-#include "av1/encoder/encoder.h"
+#include "av1/common/av1_common_int.h"
+#include "av1/common/blockd.h"
+#include "av1/common/enums.h"
+#include "av1/encoder/level.h"
+#include "aom_dsp/bitwriter.h"
struct aom_write_bit_buffer;
+struct AV1_COMP;
+struct ThreadData;
+
+/*!\cond */
+
+// Stores the location and size of a tile's data in the bitstream. Used for
+// later identifying identical tiles
+typedef struct {
+ uint8_t *data;
+ size_t size;
+} TileBufferEnc;
+
+typedef struct {
+ uint8_t *frame_header;
+ size_t obu_header_byte_offset;
+ size_t total_length;
+} FrameHeaderInfo;
+
+typedef struct {
+ struct aom_write_bit_buffer *saved_wb; // Bit stream buffer writer structure
+ TileBufferEnc buf; // Structure to hold bitstream buffer and size
+ uint32_t *total_size; // Size of the bitstream buffer for the tile in bytes
+ uint8_t *dst; // Base address of tile bitstream buffer
+ uint8_t *tile_data_curr; // Base address of tile-group bitstream buffer
+ size_t tile_buf_size; // Available bitstream buffer for the tile in bytes
+ uint8_t obu_extn_header; // Presence of OBU extension header
+ uint32_t obu_header_size; // Size of the OBU header
+ int curr_tg_hdr_size; // Size of the obu, tg, frame headers
+ int tile_size_mi; // Tile size in mi units
+ int tile_row; // Number of tile rows
+ int tile_col; // Number of tile columns
+ int is_last_tile_in_tg; // Flag to indicate last tile in a tile-group
+ int new_tg; // Flag to indicate starting of a new tile-group
+} PackBSParams;
+
+typedef struct {
+ uint64_t abs_sum_level;
+ uint16_t tile_idx;
+} PackBSTileOrder;
+
+// Pack bitstream data for pack bitstream multi-threading.
+typedef struct {
+#if CONFIG_MULTITHREAD
+ // Mutex lock used while dispatching jobs.
+ pthread_mutex_t *mutex_;
+#endif
+ // Tile order structure of pack bitstream multithreading.
+ PackBSTileOrder pack_bs_tile_order[MAX_TILES];
+
+ // Index of next job to be processed.
+ int next_job_idx;
+} AV1EncPackBSSync;
+
+/*!\endcond */
// Writes only the OBU Sequence Header payload, and returns the size of the
// payload written to 'dst'. This function does not write the OBU header, the
@@ -29,23 +87,44 @@ uint32_t av1_write_sequence_header_obu(const SequenceHeader *seq_params,
// Writes the OBU header byte, and the OBU header extension byte when
// 'obu_extension' is non-zero. Returns number of bytes written to 'dst'.
uint32_t av1_write_obu_header(AV1LevelParams *const level_params,
- OBU_TYPE obu_type, int obu_extension,
- uint8_t *const dst);
+ int *frame_header_count, OBU_TYPE obu_type,
+ int obu_extension, uint8_t *const dst);
int av1_write_uleb_obu_size(size_t obu_header_size, size_t obu_payload_size,
uint8_t *dest);
+// Pack tile data in the bitstream with tile_group, frame
+// and OBU header.
+void av1_pack_tile_info(struct AV1_COMP *const cpi, struct ThreadData *const td,
+ PackBSParams *const pack_bs_params);
+
+void av1_write_last_tile_info(
+ struct AV1_COMP *const cpi, const FrameHeaderInfo *fh_info,
+ struct aom_write_bit_buffer *saved_wb, size_t *curr_tg_data_size,
+ uint8_t *curr_tg_start, uint32_t *const total_size,
+ uint8_t **tile_data_start, int *const largest_tile_id,
+ int *const is_first_tg, uint32_t obu_header_size, uint8_t obu_extn_header);
+
/*!\brief Pack the bitstream for one frame
*
* \ingroup high_level_algo
* \callgraph
*/
-int av1_pack_bitstream(AV1_COMP *const cpi, uint8_t *dst, size_t *size,
+int av1_pack_bitstream(struct AV1_COMP *const cpi, uint8_t *dst, size_t *size,
int *const largest_tile_id);
void av1_write_tx_type(const AV1_COMMON *const cm, const MACROBLOCKD *xd,
TX_TYPE tx_type, TX_SIZE tx_size, aom_writer *w);
+void av1_reset_pack_bs_thread_data(struct ThreadData *const td);
+
+void av1_accumulate_pack_bs_thread_data(struct AV1_COMP *const cpi,
+ struct ThreadData const *td);
+
+void av1_write_obu_tg_tile_headers(struct AV1_COMP *const cpi,
+ MACROBLOCKD *const xd,
+ PackBSParams *const pack_bs_params,
+ const int tile_idx);
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/libaom/source/libaom/av1/encoder/block.h b/third_party/libaom/source/libaom/av1/encoder/block.h
index 59353cfac3..aaf3654a5f 100644
--- a/third_party/libaom/source/libaom/av1/encoder/block.h
+++ b/third_party/libaom/source/libaom/av1/encoder/block.h
@@ -102,7 +102,7 @@ typedef struct {
*/
typedef struct macroblock_plane {
//! Stores source - pred so the txfm can be computed later
- DECLARE_ALIGNED(32, int16_t, src_diff[MAX_SB_SQUARE]);
+ int16_t *src_diff;
//! Dequantized coefficients
tran_low_t *dqcoeff;
//! Quantized coefficients
@@ -778,6 +778,23 @@ typedef struct {
/**@}*/
} MvCosts;
+/*! \brief Holds mv costs for intrabc.
+ */
+typedef struct {
+ /*! Costs for coding the joint mv. */
+ int joint_mv[MV_JOINTS];
+
+ /*! \brief Cost of transmitting the actual motion vector.
+ * dv_costs_alloc[0][i] is the cost of motion vector with horizontal
+ * component (mv_row) equal to i - MV_MAX. dv_costs_alloc[1][i] is the cost of
+ * motion vector with vertical component (mv_col) equal to i - MV_MAX.
+ */
+ int dv_costs_alloc[2][MV_VALS];
+
+ /*! Points to the middle of \ref dv_costs_alloc. */
+ int *dv_costs[2];
+} IntraBCMVCosts;
+
/*! \brief Holds the costs needed to encode the coefficients
*/
typedef struct {
@@ -817,6 +834,14 @@ typedef struct {
int lighting_change;
int low_sumdiff;
} CONTENT_STATE_SB;
+
+// Structure to hold pixel level gradient info.
+typedef struct {
+ uint16_t abs_dx_abs_dy_sum;
+ int8_t hist_bin_idx;
+ bool is_dx_zero;
+} PixelLevelGradientInfo;
+
/*!\endcond */
/*! \brief Encoder's parameters related to the current coding block.
@@ -945,6 +970,11 @@ typedef struct macroblock {
//! multipliers for motion search.
MvCosts *mv_costs;
+ /*! The rate needed to encode a new motion vector to the bitstream in intrabc
+ * mode.
+ */
+ IntraBCMVCosts *dv_costs;
+
//! The rate needed to signal the txfm coefficients to the bitstream.
CoeffCosts coeff_costs;
/**@}*/
@@ -1014,6 +1044,10 @@ typedef struct macroblock {
int pred_mv_sad[REF_FRAMES];
//! The minimum of \ref pred_mv_sad.
int best_pred_mv_sad;
+ //! The sad of the 1st mv ref (nearest).
+ int pred_mv0_sad[REF_FRAMES];
+ //! The sad of the 2nd mv ref (near).
+ int pred_mv1_sad[REF_FRAMES];
/*! \brief Disables certain ref frame pruning based on tpl.
*
@@ -1092,8 +1126,7 @@ typedef struct macroblock {
* In the second pass, we retry the winner modes with more thorough txfm
* options.
*/
- WinnerModeStats winner_mode_stats[AOMMAX(MAX_WINNER_MODE_COUNT_INTRA,
- MAX_WINNER_MODE_COUNT_INTER)];
+ WinnerModeStats *winner_mode_stats;
//! Tracks how many winner modes there are.
int winner_mode_count;
@@ -1147,10 +1180,20 @@ typedef struct macroblock {
*/
IntraBCHashInfo intrabc_hash_info;
- /*! \brief Whether to reuse the mode stored in intermode_cache. */
- int use_intermode_cache;
- /*! \brief The mode to reuse during \ref av1_rd_pick_inter_mode. */
- const MB_MODE_INFO *intermode_cache;
+ /*! \brief Whether to reuse the mode stored in mb_mode_cache. */
+ int use_mb_mode_cache;
+ /*! \brief The mode to reuse during \ref av1_rd_pick_intra_mode_sb and
+ * \ref av1_rd_pick_inter_mode. */
+ const MB_MODE_INFO *mb_mode_cache;
+ /*! \brief Pointer to the buffer which caches gradient information.
+ *
+ * Pointer to the array of structures to store gradient information of each
+ * pixel in a superblock. The buffer constitutes of MAX_SB_SQUARE pixel level
+ * structures for each of the plane types (PLANE_TYPE_Y and PLANE_TYPE_UV).
+ */
+ PixelLevelGradientInfo *pixel_gradient_info;
+ /*! \brief Flags indicating the availability of cached gradient info. */
+ bool is_sb_gradient_cached[PLANE_TYPES];
/**@}*/
/*****************************************************************************
@@ -1195,6 +1238,8 @@ typedef struct macroblock {
* Used in REALTIME coding mode to enhance the visual quality at the boundary
* of moving color objects.
*/
+ uint8_t color_sensitivity_sb[2];
+ //! Color sensitivity flag for the coding block.
uint8_t color_sensitivity[2];
/**@}*/
diff --git a/third_party/libaom/source/libaom/av1/encoder/compound_type.c b/third_party/libaom/source/libaom/av1/encoder/compound_type.c
index aacb7fc88a..00fa3890bf 100644
--- a/third_party/libaom/source/libaom/av1/encoder/compound_type.c
+++ b/third_party/libaom/source/libaom/av1/encoder/compound_type.c
@@ -48,31 +48,31 @@ static INLINE int is_comp_rd_match(const AV1_COMP *const cpi,
if (is_global_mv_block(mi, wm->wmtype) != st->is_global[i]) return 0;
}
- // Store the stats for COMPOUND_AVERAGE and COMPOUND_DISTWTD
- for (int comp_type = COMPOUND_AVERAGE; comp_type <= COMPOUND_DISTWTD;
- comp_type++) {
- comp_rate[comp_type] = st->rate[comp_type];
- comp_dist[comp_type] = st->dist[comp_type];
- comp_model_rate[comp_type] = st->model_rate[comp_type];
- comp_model_dist[comp_type] = st->model_dist[comp_type];
- comp_rs2[comp_type] = st->comp_rs2[comp_type];
- }
-
- // For compound wedge/segment, reuse data only if NEWMV is not present in
- // either of the directions
+ int reuse_data[COMPOUND_TYPES] = { 1, 1, 0, 0 };
+ // For compound wedge, reuse data if newmv search is disabled when NEWMV is
+ // present or if NEWMV is not present in either of the directions
if ((!have_newmv_in_inter_mode(mi->mode) &&
!have_newmv_in_inter_mode(st->mode)) ||
- (cpi->sf.inter_sf.disable_interinter_wedge_newmv_search)) {
- memcpy(&comp_rate[COMPOUND_WEDGE], &st->rate[COMPOUND_WEDGE],
- sizeof(comp_rate[COMPOUND_WEDGE]) * 2);
- memcpy(&comp_dist[COMPOUND_WEDGE], &st->dist[COMPOUND_WEDGE],
- sizeof(comp_dist[COMPOUND_WEDGE]) * 2);
- memcpy(&comp_model_rate[COMPOUND_WEDGE], &st->model_rate[COMPOUND_WEDGE],
- sizeof(comp_model_rate[COMPOUND_WEDGE]) * 2);
- memcpy(&comp_model_dist[COMPOUND_WEDGE], &st->model_dist[COMPOUND_WEDGE],
- sizeof(comp_model_dist[COMPOUND_WEDGE]) * 2);
- memcpy(&comp_rs2[COMPOUND_WEDGE], &st->comp_rs2[COMPOUND_WEDGE],
- sizeof(comp_rs2[COMPOUND_WEDGE]) * 2);
+ (cpi->sf.inter_sf.disable_interinter_wedge_newmv_search))
+ reuse_data[COMPOUND_WEDGE] = 1;
+ // For compound diffwtd, reuse data if fast search is enabled (no newmv search
+ // when NEWMV is present) or if NEWMV is not present in either of the
+ // directions
+ if (cpi->sf.inter_sf.enable_fast_compound_mode_search ||
+ (!have_newmv_in_inter_mode(mi->mode) &&
+ !have_newmv_in_inter_mode(st->mode)))
+ reuse_data[COMPOUND_DIFFWTD] = 1;
+
+ // Store the stats for the different compound types
+ for (int comp_type = COMPOUND_AVERAGE; comp_type < COMPOUND_TYPES;
+ comp_type++) {
+ if (reuse_data[comp_type]) {
+ comp_rate[comp_type] = st->rate[comp_type];
+ comp_dist[comp_type] = st->dist[comp_type];
+ comp_model_rate[comp_type] = st->model_rate[comp_type];
+ comp_model_dist[comp_type] = st->model_dist[comp_type];
+ comp_rs2[comp_type] = st->comp_rs2[comp_type];
+ }
}
return 1;
}
@@ -166,14 +166,14 @@ static int8_t estimate_wedge_sign(const AV1_COMP *cpi, const MACROBLOCK *x,
// TODO(nithya): Sign estimation assumes 45 degrees (1st and 4th quadrants)
// for all codebooks; experiment with other quadrant combinations for
// 0, 90 and 135 degrees also.
- cpi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
- cpi->fn_ptr[f_index].vf(src + bh_by2 * src_stride + bw_by2, src_stride,
- pred0 + bh_by2 * stride0 + bw_by2, stride0,
- &esq[0][1]);
- cpi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
- cpi->fn_ptr[f_index].vf(src + bh_by2 * src_stride + bw_by2, src_stride,
- pred1 + bh_by2 * stride1 + bw_by2, stride0,
- &esq[1][1]);
+ cpi->ppi->fn_ptr[f_index].vf(src, src_stride, pred0, stride0, &esq[0][0]);
+ cpi->ppi->fn_ptr[f_index].vf(src + bh_by2 * src_stride + bw_by2, src_stride,
+ pred0 + bh_by2 * stride0 + bw_by2, stride0,
+ &esq[0][1]);
+ cpi->ppi->fn_ptr[f_index].vf(src, src_stride, pred1, stride1, &esq[1][0]);
+ cpi->ppi->fn_ptr[f_index].vf(src + bh_by2 * src_stride + bw_by2, src_stride,
+ pred1 + bh_by2 * stride1 + bw_by2, stride0,
+ &esq[1][1]);
tl = ((int64_t)esq[0][0]) - ((int64_t)esq[1][0]);
br = ((int64_t)esq[1][1]) - ((int64_t)esq[0][1]);
@@ -314,7 +314,7 @@ static int64_t pick_interinter_wedge(
int8_t wedge_sign = 0;
assert(is_interinter_compound_used(COMPOUND_WEDGE, bsize));
- assert(cpi->common.seq_params.enable_masked_compound);
+ assert(cpi->common.seq_params->enable_masked_compound);
if (cpi->sf.inter_sf.fast_wedge_sign_estimate) {
wedge_sign = estimate_wedge_sign(cpi, x, bsize, p0, bw, p1, bw);
@@ -392,7 +392,7 @@ static int64_t pick_interintra_wedge(const AV1_COMP *const cpi,
const MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
assert(av1_is_wedge_used(bsize));
- assert(cpi->common.seq_params.enable_interintra_compound);
+ assert(cpi->common.seq_params->enable_interintra_compound);
const struct buf_2d *const src = &x->plane[0].src;
const int bw = block_size_wide[bsize];
@@ -836,7 +836,7 @@ static INLINE int compute_valid_comp_types(MACROBLOCK *x,
const int try_average_comp = (mode_search_mask & (1 << COMPOUND_AVERAGE));
const int try_distwtd_comp =
((mode_search_mask & (1 << COMPOUND_DISTWTD)) &&
- cm->seq_params.order_hint_info.enable_dist_wtd_comp == 1 &&
+ cm->seq_params->order_hint_info.enable_dist_wtd_comp == 1 &&
cpi->sf.inter_sf.use_dist_wtd_comp_flag != DIST_WTD_COMP_DISABLED);
// Check if COMPOUND_AVERAGE and COMPOUND_DISTWTD are valid cases
@@ -1058,10 +1058,12 @@ static int64_t masked_compound_type_rd(
if (compound_type == COMPOUND_WEDGE) {
unsigned int sse;
if (is_cur_buf_hbd(xd))
- (void)cpi->fn_ptr[bsize].vf(CONVERT_TO_BYTEPTR(*preds0), *strides,
- CONVERT_TO_BYTEPTR(*preds1), *strides, &sse);
+ (void)cpi->ppi->fn_ptr[bsize].vf(CONVERT_TO_BYTEPTR(*preds0), *strides,
+ CONVERT_TO_BYTEPTR(*preds1), *strides,
+ &sse);
else
- (void)cpi->fn_ptr[bsize].vf(*preds0, *strides, *preds1, *strides, &sse);
+ (void)cpi->ppi->fn_ptr[bsize].vf(*preds0, *strides, *preds1, *strides,
+ &sse);
const unsigned int mse =
ROUND_POWER_OF_TWO(sse, num_pels_log2_lookup[bsize]);
// If two predictors are very similar, skip wedge compound mode search
@@ -1164,7 +1166,8 @@ static int64_t masked_compound_type_rd(
assert(comp_dist[compound_type] != INT64_MAX);
// When disable_interinter_wedge_newmv_search is set, motion refinement is
// disabled. Hence rate and distortion can be reused in this case as well
- assert(IMPLIES(have_newmv_in_inter_mode(this_mode),
+ assert(IMPLIES((have_newmv_in_inter_mode(this_mode) &&
+ (compound_type == COMPOUND_WEDGE)),
cpi->sf.inter_sf.disable_interinter_wedge_newmv_search));
assert(mbmi->mv[0].as_int == cur_mv[0].as_int);
assert(mbmi->mv[1].as_int == cur_mv[1].as_int);
@@ -1338,11 +1341,12 @@ int av1_compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
if (have_newmv_in_inter_mode(this_mode)) {
InterPredParams inter_pred_params;
av1_dist_wtd_comp_weight_assign(
- &cpi->common, mbmi, 0, &inter_pred_params.conv_params.fwd_offset,
+ &cpi->common, mbmi, &inter_pred_params.conv_params.fwd_offset,
&inter_pred_params.conv_params.bck_offset,
&inter_pred_params.conv_params.use_dist_wtd_comp_avg, 1);
int mask_value = inter_pred_params.conv_params.fwd_offset * 4;
- memset(xd->seg_mask, mask_value, sizeof(xd->seg_mask));
+ memset(xd->seg_mask, mask_value,
+ sizeof(xd->seg_mask[0]) * 2 * MAX_SB_SQUARE);
tmp_rate_mv = av1_interinter_compound_motion_search(cpi, x, cur_mv,
bsize, this_mode);
}
@@ -1369,7 +1373,7 @@ int av1_compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
int_mv tmp_mv[2] = { mbmi->mv[0], mbmi->mv[1] };
int best_rs2 = 0;
int best_rate_mv = *rate_mv;
- const int wedge_mask_size = get_wedge_types_lookup(bsize);
+ int wedge_mask_size = get_wedge_types_lookup(bsize);
int need_mask_search = args->wedge_index == -1;
if (need_mask_search && !have_newmv_in_inter_mode(this_mode)) {
@@ -1392,7 +1396,8 @@ int av1_compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
mode_rd = RDCOST(x->rdmult, rs2 + rd_stats->rate, 0);
if (mode_rd >= ref_best_rd / 2) continue;
- if (have_newmv_in_inter_mode(this_mode)) {
+ if (have_newmv_in_inter_mode(this_mode) &&
+ !cpi->sf.inter_sf.disable_interinter_wedge_newmv_search) {
tmp_rate_mv = av1_interinter_compound_motion_search(
cpi, x, cur_mv, bsize, this_mode);
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, orig_dst,
@@ -1425,6 +1430,33 @@ int av1_compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
best_rs2 = rs2;
}
}
+ // Consider the asymmetric partitions for oblique angle only if the
+ // corresponding symmetric partition is the best so far.
+ // Note: For horizontal and vertical types, both symmetric and
+ // asymmetric partitions are always considered.
+ if (cpi->sf.inter_sf.enable_fast_wedge_mask_search) {
+ // The first 4 entries in wedge_codebook_16_heqw/hltw/hgtw[16]
+ // correspond to symmetric partitions of the 4 oblique angles, the
+ // next 4 entries correspond to the vertical/horizontal
+ // symmetric/asymmetric partitions and the last 8 entries correspond
+ // to the asymmetric partitions of oblique types.
+ const int idx_before_asym_oblique = 7;
+ const int last_oblique_sym_idx = 3;
+ if (wedge_mask == idx_before_asym_oblique) {
+ if (best_mask_index > last_oblique_sym_idx) {
+ break;
+ } else {
+ // Asymmetric (Index-1) map for the corresponding oblique masks.
+ // WEDGE_OBLIQUE27: sym - 0, asym - 8, 9
+ // WEDGE_OBLIQUE63: sym - 1, asym - 12, 13
+ // WEDGE_OBLIQUE117: sym - 2, asym - 14, 15
+ // WEDGE_OBLIQUE153: sym - 3, asym - 10, 11
+ const int asym_mask_idx[4] = { 7, 11, 13, 9 };
+ wedge_mask = asym_mask_idx[best_mask_index];
+ wedge_mask_size = wedge_mask + 3;
+ }
+ }
+ }
}
if (need_mask_search) {
@@ -1439,7 +1471,8 @@ int av1_compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
rs2 = masked_type_cost[cur_type];
rs2 += get_interinter_compound_mask_rate(&x->mode_costs, mbmi);
- if (have_newmv_in_inter_mode(this_mode)) {
+ if (have_newmv_in_inter_mode(this_mode) &&
+ !cpi->sf.inter_sf.disable_interinter_wedge_newmv_search) {
tmp_rate_mv = av1_interinter_compound_motion_search(cpi, x, cur_mv,
bsize, this_mode);
}
@@ -1485,7 +1518,8 @@ int av1_compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
if (have_newmv_in_inter_mode(this_mode)) {
// hard coded number for diff wtd
int mask_value = mask_index == 0 ? 38 : 26;
- memset(xd->seg_mask, mask_value, sizeof(xd->seg_mask));
+ memset(xd->seg_mask, mask_value,
+ sizeof(xd->seg_mask[0]) * 2 * MAX_SB_SQUARE);
tmp_rate_mv = av1_interinter_compound_motion_search(cpi, x, cur_mv,
bsize, this_mode);
}
@@ -1522,7 +1556,8 @@ int av1_compound_type_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
rs2 += get_interinter_compound_mask_rate(&x->mode_costs, mbmi);
int mask_value = mbmi->interinter_comp.mask_type == 0 ? 38 : 26;
- memset(xd->seg_mask, mask_value, sizeof(xd->seg_mask));
+ memset(xd->seg_mask, mask_value,
+ sizeof(xd->seg_mask[0]) * 2 * MAX_SB_SQUARE);
if (have_newmv_in_inter_mode(this_mode)) {
tmp_rate_mv = av1_interinter_compound_motion_search(cpi, x, cur_mv,
diff --git a/third_party/libaom/source/libaom/av1/encoder/context_tree.c b/third_party/libaom/source/libaom/av1/encoder/context_tree.c
index 566576e4f5..9fd9d1b1e8 100644
--- a/third_party/libaom/source/libaom/av1/encoder/context_tree.c
+++ b/third_party/libaom/source/libaom/av1/encoder/context_tree.c
@@ -230,7 +230,7 @@ static AOM_INLINE int get_pc_tree_nodes(const int is_sb_size_128,
void av1_setup_sms_tree(AV1_COMP *const cpi, ThreadData *td) {
AV1_COMMON *const cm = &cpi->common;
const int stat_generation_stage = is_stat_generation_stage(cpi);
- const int is_sb_size_128 = cm->seq_params.sb_size == BLOCK_128X128;
+ const int is_sb_size_128 = cm->seq_params->sb_size == BLOCK_128X128;
const int tree_nodes =
get_pc_tree_nodes(is_sb_size_128, stat_generation_stage);
int sms_tree_index = 0;
diff --git a/third_party/libaom/source/libaom/av1/encoder/dwt.c b/third_party/libaom/source/libaom/av1/encoder/dwt.c
index b5ed4a3446..5dfbcb677b 100644
--- a/third_party/libaom/source/libaom/av1/encoder/dwt.c
+++ b/third_party/libaom/source/libaom/av1/encoder/dwt.c
@@ -147,9 +147,23 @@ uint32_t av1_variance(uint8_t *input, int bw, int bh, int stride) {
return sse - (uint32_t)(((int64_t)sum * sum) / (bw * bh));
}
-int av1_haar_ac_sad_8x8_uint8_input(const uint8_t *input, int stride, int hbd) {
+static int haar_ac_sad_8x8_uint8_input(const uint8_t *input, int stride,
+ int hbd) {
tran_low_t output[64];
av1_fdwt8x8_uint8_input_c(input, output, stride, hbd);
return av1_haar_ac_sad(output, 8, 8, 8);
}
+
+int64_t av1_haar_ac_sad_mxn_uint8_input(const uint8_t *input, int stride,
+ int hbd, int num_8x8_rows,
+ int num_8x8_cols) {
+ int64_t wavelet_energy = 0;
+ for (int r8 = 0; r8 < num_8x8_rows; ++r8) {
+ for (int c8 = 0; c8 < num_8x8_cols; ++c8) {
+ wavelet_energy += haar_ac_sad_8x8_uint8_input(
+ input + c8 * 8 + r8 * 8 * stride, stride, hbd);
+ }
+ }
+ return wavelet_energy;
+}
diff --git a/third_party/libaom/source/libaom/av1/encoder/dwt.h b/third_party/libaom/source/libaom/av1/encoder/dwt.h
index 1bd32edb3b..443b6bc12c 100644
--- a/third_party/libaom/source/libaom/av1/encoder/dwt.h
+++ b/third_party/libaom/source/libaom/av1/encoder/dwt.h
@@ -19,6 +19,9 @@
void av1_fdwt8x8_uint8_input_c(const uint8_t *input, tran_low_t *output,
int stride, int hbd);
-int av1_haar_ac_sad_8x8_uint8_input(const uint8_t *input, int stride, int hbd);
+
+int64_t av1_haar_ac_sad_mxn_uint8_input(const uint8_t *input, int stride,
+ int hbd, int num_8x8_rows,
+ int num_8x8_cols);
#endif // AOM_AV1_ENCODER_DWT_H_
diff --git a/third_party/libaom/source/libaom/av1/encoder/enc_enums.h b/third_party/libaom/source/libaom/av1/encoder/enc_enums.h
index 319e5d02c9..20cefa16a5 100644
--- a/third_party/libaom/source/libaom/av1/encoder/enc_enums.h
+++ b/third_party/libaom/source/libaom/av1/encoder/enc_enums.h
@@ -216,6 +216,8 @@ enum {
NUM_SINGLE_REF_MODES = SINGLE_REF_MODE_END - SINGLE_REF_MODE_START,
THR_MODE_START = THR_NEARESTMV,
THR_MODE_END = MAX_MODES,
+ THR_INTER_MODE_START = THR_MODE_START,
+ THR_INTER_MODE_END = THR_DC,
THR_INVALID = 255
} UENUM1BYTE(THR_MODES);
diff --git a/third_party/libaom/source/libaom/av1/encoder/encode_strategy.c b/third_party/libaom/source/libaom/av1/encoder/encode_strategy.c
index da7ec4487d..01f2959d85 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encode_strategy.c
+++ b/third_party/libaom/source/libaom/av1/encoder/encode_strategy.c
@@ -106,11 +106,19 @@ void av1_configure_buffer_updates(
}
if (ext_refresh_frame_flags->update_pending &&
- (!is_stat_generation_stage(cpi)))
+ (!is_stat_generation_stage(cpi))) {
set_refresh_frame_flags(refresh_frame_flags,
ext_refresh_frame_flags->golden_frame,
ext_refresh_frame_flags->bwd_ref_frame,
ext_refresh_frame_flags->alt_ref_frame);
+ GF_GROUP *gf_group = &cpi->ppi->gf_group;
+ if (ext_refresh_frame_flags->golden_frame)
+ gf_group->update_type[cpi->gf_frame_index] = GF_UPDATE;
+ if (ext_refresh_frame_flags->alt_ref_frame)
+ gf_group->update_type[cpi->gf_frame_index] = ARF_UPDATE;
+ if (ext_refresh_frame_flags->bwd_ref_frame)
+ gf_group->update_type[cpi->gf_frame_index] = INTNL_ARF_UPDATE;
+ }
if (force_refresh_all)
set_refresh_frame_flags(refresh_frame_flags, true, true, true);
@@ -141,7 +149,7 @@ static INLINE int is_frame_droppable(
const ExtRefreshFrameFlagsInfo *const ext_refresh_frame_flags) {
// Droppable frame is only used by external refresh flags. VoD setting won't
// trigger its use case.
- if (svc->external_ref_frame_config)
+ if (svc->set_ref_frame_config)
return svc->non_reference_frame;
else if (ext_refresh_frame_flags->update_pending)
return !(ext_refresh_frame_flags->alt_ref_frame ||
@@ -168,7 +176,7 @@ static INLINE void update_frames_till_gf_update(AV1_COMP *cpi) {
static INLINE void update_gf_group_index(AV1_COMP *cpi) {
// Increment the gf group index ready for the next frame.
- ++cpi->gf_group.index;
+ ++cpi->gf_frame_index;
}
static void update_rc_counts(AV1_COMP *cpi) {
@@ -216,7 +224,7 @@ static int get_current_frame_ref_type(
// TODO(jingning): This table should be a lot simpler with the new
// ARF system in place. Keep frame_params for the time being as we are
// still evaluating a few design options.
- switch (cpi->gf_group.layer_depth[cpi->gf_group.index]) {
+ switch (cpi->ppi->gf_group.layer_depth[cpi->gf_frame_index]) {
case 0: return 0;
case 1: return 1;
case MAX_ARF_LAYERS:
@@ -238,16 +246,16 @@ static int choose_primary_ref_frame(
// In large scale case, always use Last frame's frame contexts.
// Note(yunqing): In other cases, primary_ref_frame is chosen based on
- // cpi->gf_group.layer_depth[cpi->gf_group.index], which also controls
+ // cpi->ppi->gf_group.layer_depth[cpi->gf_frame_index], which also controls
// frame bit allocation.
if (cm->tiles.large_scale) return (LAST_FRAME - LAST_FRAME);
- if (cpi->use_svc) return av1_svc_primary_ref_frame(cpi);
+ if (cpi->ppi->use_svc) return av1_svc_primary_ref_frame(cpi);
// Find the most recent reference frame with the same reference type as the
// current frame
const int current_ref_type = get_current_frame_ref_type(cpi, frame_params);
- int wanted_fb = cpi->fb_of_context_type[current_ref_type];
+ int wanted_fb = cpi->ppi->fb_of_context_type[current_ref_type];
int primary_ref_frame = PRIMARY_REF_NONE;
for (int ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) {
@@ -303,7 +311,7 @@ static void adjust_frame_rate(AV1_COMP *cpi, int64_t ts_start, int64_t ts_end) {
// Clear down mmx registers
aom_clear_system_state();
- if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) {
+ if (cpi->ppi->use_svc && cpi->svc.spatial_layer_id > 0) {
cpi->framerate = cpi->svc.base_framerate;
av1_rc_update_framerate(cpi, cpi->common.width, cpi->common.height);
return;
@@ -372,17 +380,17 @@ static struct lookahead_entry *choose_frame_source(
struct lookahead_entry **last_source,
EncodeFrameParams *const frame_params) {
AV1_COMMON *const cm = &cpi->common;
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
struct lookahead_entry *source = NULL;
// Source index in lookahead buffer.
- int src_index = gf_group->arf_src_offset[gf_group->index];
+ int src_index = gf_group->arf_src_offset[cpi->gf_frame_index];
// TODO(Aasaipriya): Forced key frames need to be fixed when rc_mode != AOM_Q
if (src_index &&
(is_forced_keyframe_pending(cpi->ppi->lookahead, src_index,
cpi->compressor_stage) != -1) &&
- cpi->oxcf.rc_cfg.mode != AOM_Q) {
+ cpi->oxcf.rc_cfg.mode != AOM_Q && !is_stat_generation_stage(cpi)) {
src_index = 0;
*flush = 1;
}
@@ -395,7 +403,7 @@ static struct lookahead_entry *choose_frame_source(
// If this is a key frame and keyframe filtering is enabled with overlay,
// then do not pop.
if (*pop_lookahead && cpi->oxcf.kf_cfg.enable_keyframe_filtering > 1 &&
- gf_group->update_type[gf_group->index] == ARF_UPDATE &&
+ gf_group->update_type[cpi->gf_frame_index] == ARF_UPDATE &&
!is_stat_generation_stage(cpi) && cpi->ppi->lookahead) {
if (cpi->ppi->lookahead->read_ctxs[cpi->compressor_stage].sz &&
(*flush ||
@@ -404,16 +412,37 @@ static struct lookahead_entry *choose_frame_source(
*pop_lookahead = 0;
}
}
+
+ // LAP stage does not have ARFs or forward key-frames,
+ // hence, always pop_lookahead here.
+ if (is_stat_generation_stage(cpi)) {
+ *pop_lookahead = 1;
+ src_index = 0;
+ }
+
frame_params->show_frame = *pop_lookahead;
- if (*pop_lookahead) {
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Future frame in parallel encode set
+ if (gf_group->src_offset[cpi->gf_frame_index] != 0 &&
+ !is_stat_generation_stage(cpi) &&
+ 0 /*will be turned on along with frame parallel encode*/) {
+ src_index = gf_group->src_offset[cpi->gf_frame_index];
+ // Don't remove future frames from lookahead_ctx. They will be
+ // removed in their actual encode call.
+ *pop_lookahead = 0;
+ }
+#endif
+ if (frame_params->show_frame) {
// show frame, pop from buffer
// Get last frame source.
if (cm->current_frame.frame_number > 0) {
- *last_source =
- av1_lookahead_peek(cpi->ppi->lookahead, -1, cpi->compressor_stage);
+ *last_source = av1_lookahead_peek(cpi->ppi->lookahead, src_index - 1,
+ cpi->compressor_stage);
}
// Read in the source frame.
- source = av1_lookahead_peek(cpi->ppi->lookahead, 0, cpi->compressor_stage);
+ source = av1_lookahead_peek(cpi->ppi->lookahead, src_index,
+ cpi->compressor_stage);
} else {
// no show frames are arf frames
source = av1_lookahead_peek(cpi->ppi->lookahead, src_index,
@@ -677,7 +706,17 @@ void av1_update_ref_frame_map(AV1_COMP *cpi,
return;
}
-static int get_free_ref_map_index(const RefBufferStack *ref_buffer_stack) {
+static int get_free_ref_map_index(
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ RefFrameMapPair ref_map_pairs[REF_FRAMES],
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ const RefBufferStack *ref_buffer_stack) {
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ (void)ref_buffer_stack;
+ for (int idx = 0; idx < REF_FRAMES; ++idx)
+ if (ref_map_pairs[idx].disp_order == -1) return idx;
+ return INVALID_IDX;
+#else
for (int idx = 0; idx < REF_FRAMES; ++idx) {
int is_free = 1;
for (int i = 0; i < ref_buffer_stack->arf_stack_size; ++i) {
@@ -704,11 +743,61 @@ static int get_free_ref_map_index(const RefBufferStack *ref_buffer_stack) {
if (is_free) return idx;
}
return INVALID_IDX;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
}
+#if CONFIG_FRAME_PARALLEL_ENCODE
+static int get_refresh_idx(RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
+ int update_arf, int cur_frame_disp) {
+ int arf_count = 0;
+ int oldest_arf_order = INT32_MAX;
+ int oldest_arf_idx = -1;
+
+ int oldest_frame_order = INT32_MAX;
+ int oldest_idx = -1;
+
+ for (int map_idx = 0; map_idx < REF_FRAMES; map_idx++) {
+ RefFrameMapPair ref_pair = ref_frame_map_pairs[map_idx];
+ if (ref_pair.disp_order == -1) continue;
+ const int frame_order = ref_pair.disp_order;
+ const int reference_frame_level = ref_pair.pyr_level;
+ // Do not refresh a future frame.
+ if (frame_order > cur_frame_disp) continue;
+
+ // Keep track of the oldest level 1 frame if the current frame is also level
+ // 1.
+ if (reference_frame_level == 1) {
+ // If there are more than 2 level 1 frames in the reference list,
+ // discard the oldest.
+ if (frame_order < oldest_arf_order) {
+ oldest_arf_order = frame_order;
+ oldest_arf_idx = map_idx;
+ }
+ arf_count++;
+ continue;
+ }
+
+ // Update the overall oldest reference frame.
+ if (frame_order < oldest_frame_order) {
+ oldest_frame_order = frame_order;
+ oldest_idx = map_idx;
+ }
+ }
+ if (update_arf && arf_count > 2) return oldest_arf_idx;
+ if (oldest_idx >= 0) return oldest_idx;
+ if (oldest_arf_idx >= 0) return oldest_arf_idx;
+ assert(0 && "No valid refresh index found");
+ return -1;
+}
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
int av1_get_refresh_frame_flags(const AV1_COMP *const cpi,
const EncodeFrameParams *const frame_params,
FRAME_UPDATE_TYPE frame_update_type,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ int cur_disp_order,
+ RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
const RefBufferStack *const ref_buffer_stack) {
const AV1_COMMON *const cm = &cpi->common;
const ExtRefreshFrameFlagsInfo *const ext_refresh_frame_flags =
@@ -733,7 +822,7 @@ int av1_get_refresh_frame_flags(const AV1_COMP *const cpi,
int refresh_mask = 0;
if (ext_refresh_frame_flags->update_pending) {
- if (svc->external_ref_frame_config) {
+ if (svc->set_ref_frame_config) {
for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; i++) {
int ref_frame_map_idx = svc->ref_idx[i];
refresh_mask |= svc->refresh[ref_frame_map_idx] << ref_frame_map_idx;
@@ -777,7 +866,30 @@ int av1_get_refresh_frame_flags(const AV1_COMP *const cpi,
}
// Search for the open slot to store the current frame.
- int free_fb_index = get_free_ref_map_index(ref_buffer_stack);
+ int free_fb_index = get_free_ref_map_index(
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ ref_frame_map_pairs,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ ref_buffer_stack);
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // No refresh necessary for these frame types.
+ if (frame_update_type == OVERLAY_UPDATE ||
+ frame_update_type == INTNL_OVERLAY_UPDATE)
+ return refresh_mask;
+
+ // If there is an open slot, refresh that one instead of replacing a
+ // reference.
+ if (free_fb_index != INVALID_IDX) {
+ refresh_mask = 1 << free_fb_index;
+ return refresh_mask;
+ }
+
+ const int update_arf = frame_update_type == ARF_UPDATE;
+ const int refresh_idx =
+ get_refresh_idx(ref_frame_map_pairs, update_arf, cur_disp_order);
+ return 1 << refresh_idx;
+#else
switch (frame_update_type) {
case KF_UPDATE:
case GF_UPDATE:
@@ -843,6 +955,7 @@ int av1_get_refresh_frame_flags(const AV1_COMP *const cpi,
}
return refresh_mask;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
}
#if !CONFIG_REALTIME_ONLY
@@ -852,10 +965,10 @@ void setup_mi(AV1_COMP *const cpi, YV12_BUFFER_CONFIG *src) {
MACROBLOCK *const x = &cpi->td.mb;
MACROBLOCKD *const xd = &x->e_mbd;
- av1_setup_src_planes(x, src, 0, 0, num_planes, cm->seq_params.sb_size);
+ av1_setup_src_planes(x, src, 0, 0, num_planes, cm->seq_params->sb_size);
- av1_setup_block_planes(xd, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y, num_planes);
+ av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
+ cm->seq_params->subsampling_y, num_planes);
set_mi_offsets(&cm->mi_params, xd, 0, 0);
}
@@ -872,8 +985,9 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
#endif
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
AV1_COMMON *const cm = &cpi->common;
- const GF_GROUP *const gf_group = &cpi->gf_group;
- FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
// Decide whether to apply temporal filtering to the source frame.
int apply_filtering = 0;
@@ -887,7 +1001,7 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
oxcf->algo_cfg.arnr_max_frames > 0 && oxcf->gf_cfg.lag_in_frames > 1;
if (allow_kf_filtering) {
const double y_noise_level = av1_estimate_noise_from_single_plane(
- frame_input->source, 0, cm->seq_params.bit_depth);
+ frame_input->source, 0, cm->seq_params->bit_depth);
apply_filtering = y_noise_level > 0;
} else {
apply_filtering = 0;
@@ -900,6 +1014,9 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
// ARF
apply_filtering = oxcf->algo_cfg.arnr_max_frames > 0;
}
+ if (is_stat_generation_stage(cpi)) {
+ apply_filtering = 0;
+ }
#if CONFIG_COLLECT_COMPONENT_TIMING
if (cpi->oxcf.pass == 2) start_timing(cpi, apply_filtering_time);
@@ -911,7 +1028,7 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
int show_existing_alt_ref = 0;
// TODO(bohanli): figure out why we need frame_type in cm here.
cm->current_frame.frame_type = frame_params->frame_type;
- int arf_src_index = gf_group->arf_src_offset[gf_group->index];
+ int arf_src_index = gf_group->arf_src_offset[cpi->gf_frame_index];
int is_forward_keyframe = 0;
if (!frame_params->show_frame && cpi->no_show_fwd_kf) {
// TODO(angiebird): Figure out why this condition yields forward keyframe.
@@ -922,8 +1039,8 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
av1_temporal_filter(cpi, arf_src_index, update_type,
is_forward_keyframe, &show_existing_alt_ref);
if (code_arf) {
- aom_extend_frame_borders(&cpi->alt_ref_buffer, av1_num_planes(cm));
- frame_input->source = &cpi->alt_ref_buffer;
+ aom_extend_frame_borders(&cpi->ppi->alt_ref_buffer, av1_num_planes(cm));
+ frame_input->source = &cpi->ppi->alt_ref_buffer;
aom_copy_metadata_to_frame_buffer(frame_input->source,
source_buffer->metadata);
}
@@ -944,12 +1061,12 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
// Don't do tpl for fwd key frames or fwd key frame overlays
allow_tpl = allow_tpl && !cpi->sf.tpl_sf.disable_filtered_key_tpl &&
!cpi->no_show_fwd_kf &&
- gf_group->update_type[gf_group->index] != OVERLAY_UPDATE;
+ gf_group->update_type[cpi->gf_frame_index] != OVERLAY_UPDATE;
} else {
// Do tpl after ARF is filtered, or if no ARF, at the second frame of GF
// group.
// TODO(bohanli): if no ARF, just do it at the first frame.
- int gf_index = gf_group->index;
+ int gf_index = cpi->gf_frame_index;
allow_tpl = allow_tpl && (gf_group->update_type[gf_index] == ARF_UPDATE ||
gf_group->update_type[gf_index] == GF_UPDATE);
if (allow_tpl) {
@@ -962,10 +1079,13 @@ static int denoise_and_encode(AV1_COMP *const cpi, uint8_t *const dest,
if (allow_tpl == 0) {
// Avoid the use of unintended TPL stats from previous GOP's results.
- if (gf_group->index == 0) av1_init_tpl_stats(&cpi->tpl_data);
+ if (cpi->gf_frame_index == 0 && !is_stat_generation_stage(cpi))
+ av1_init_tpl_stats(&cpi->ppi->tpl_data);
} else {
- if (!cpi->tpl_data.skip_tpl_setup_stats)
+ if (!cpi->skip_tpl_setup_stats) {
+ av1_tpl_preload_rc_estimate(cpi, frame_params);
av1_tpl_setup_stats(cpi, 0, frame_params, frame_input);
+ }
}
if (av1_encode(cpi, dest, frame_input, frame_params, frame_results) !=
@@ -1003,12 +1123,262 @@ static INLINE int find_unused_ref_frame(const int *used_ref_frames,
return INVALID_IDX;
}
-void av1_get_ref_frames(AV1_COMP *const cpi, RefBufferStack *ref_buffer_stack) {
+#if CONFIG_FRAME_PARALLEL_ENCODE
+/*!\cond */
+// Struct to keep track of relevant reference frame data.
+typedef struct {
+ int map_idx;
+ int disp_order;
+ int pyr_level;
+ int used;
+} RefBufMapData;
+/*!\endcond */
+
+// Comparison function to sort reference frames in ascending display order.
+static int compare_map_idx_pair_asc(const void *a, const void *b) {
+ if (((RefBufMapData *)a)->disp_order == ((RefBufMapData *)b)->disp_order) {
+ return 0;
+ } else if (((const RefBufMapData *)a)->disp_order >
+ ((const RefBufMapData *)b)->disp_order) {
+ return 1;
+ } else {
+ return -1;
+ }
+}
+
+// Checks to see if a particular reference frame is already in the reference
+// frame map.
+static int is_in_ref_map(RefBufMapData *map, int disp_order, int n_frames) {
+ for (int i = 0; i < n_frames; i++) {
+ if (disp_order == map[i].disp_order) return 1;
+ }
+ return 0;
+}
+
+// Add a reference buffer index to a named reference slot.
+static void add_ref_to_slot(RefBufMapData *ref, int *const remapped_ref_idx,
+ int frame) {
+ remapped_ref_idx[frame - LAST_FRAME] = ref->map_idx;
+ ref->used = 1;
+}
+
+// Threshold dictating when we are allowed to start considering
+// leaving lowest level frames unmapped.
+#define LOW_LEVEL_FRAMES_TR 5
+
+// Find which reference buffer should be left out of the named mapping.
+// This is because there are 8 reference buffers and only 7 named slots.
+static void set_unmapped_ref(RefBufMapData *buffer_map, int n_bufs,
+ int n_min_level_refs, int min_level,
+ int cur_frame_disp) {
+ int max_dist = 0;
+ int unmapped_idx = -1;
+ if (n_bufs <= ALTREF_FRAME) return;
+ for (int i = 0; i < n_bufs; i++) {
+ if (buffer_map[i].used) continue;
+ if (buffer_map[i].pyr_level != min_level ||
+ n_min_level_refs >= LOW_LEVEL_FRAMES_TR) {
+ int dist = abs(cur_frame_disp - buffer_map[i].disp_order);
+ if (dist > max_dist) {
+ max_dist = dist;
+ unmapped_idx = i;
+ }
+ }
+ }
+ assert(unmapped_idx >= 0 && "Unmapped reference not found");
+ buffer_map[unmapped_idx].used = 1;
+}
+
+static void get_ref_frames(AV1_COMP *const cpi,
+ RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
+ int cur_frame_disp) {
AV1_COMMON *cm = &cpi->common;
int *const remapped_ref_idx = cm->remapped_ref_idx;
- int *const arf_stack = ref_buffer_stack->arf_stack;
- int *const lst_stack = ref_buffer_stack->lst_stack;
- int *const gld_stack = ref_buffer_stack->gld_stack;
+
+ int buf_map_idx = 0;
+
+ // Initialize reference frame mappings.
+ for (int i = 0; i < REF_FRAMES; ++i) remapped_ref_idx[i] = INVALID_IDX;
+
+ RefBufMapData buffer_map[REF_FRAMES];
+ int n_bufs = 0;
+ memset(buffer_map, 0, REF_FRAMES * sizeof(buffer_map[0]));
+ int min_level = MAX_ARF_LAYERS;
+ int max_level = 0;
+
+ // Go through current reference buffers and store display order, pyr level,
+ // and map index.
+ for (int map_idx = 0; map_idx < REF_FRAMES; map_idx++) {
+ // Get reference frame buffer.
+ RefFrameMapPair ref_pair = ref_frame_map_pairs[map_idx];
+ if (ref_pair.disp_order == -1) continue;
+ const int frame_order = ref_pair.disp_order;
+ // Avoid duplicates.
+ if (is_in_ref_map(buffer_map, frame_order, n_bufs)) continue;
+ const int reference_frame_level = ref_pair.pyr_level;
+
+ // Keep track of the lowest and highest levels that currently exist.
+ if (reference_frame_level < min_level) min_level = reference_frame_level;
+ if (reference_frame_level > max_level) max_level = reference_frame_level;
+
+ buffer_map[n_bufs].map_idx = map_idx;
+ buffer_map[n_bufs].disp_order = frame_order;
+ buffer_map[n_bufs].pyr_level = reference_frame_level;
+ buffer_map[n_bufs].used = 0;
+ n_bufs++;
+ }
+
+ // Sort frames in ascending display order.
+ qsort(buffer_map, n_bufs, sizeof(buffer_map[0]), compare_map_idx_pair_asc);
+
+ int n_min_level_refs = 0;
+ int n_past_high_level = 0;
+ int closest_past_ref = -1;
+ int golden_idx = -1;
+ int altref_idx = -1;
+
+ // Find the GOLDEN_FRAME and BWDREF_FRAME.
+ // Also collect various stats about the reference frames for the remaining
+ // mappings.
+ for (int i = n_bufs - 1; i >= 0; i--) {
+ if (buffer_map[i].pyr_level == min_level) {
+ // Keep track of the number of lowest level frames.
+ n_min_level_refs++;
+ if (buffer_map[i].disp_order < cur_frame_disp && golden_idx == -1 &&
+ remapped_ref_idx[GOLDEN_FRAME - LAST_FRAME] == INVALID_IDX) {
+ // Save index for GOLDEN.
+ golden_idx = i;
+ } else if (buffer_map[i].disp_order > cur_frame_disp &&
+ altref_idx == -1 &&
+ remapped_ref_idx[ALTREF_FRAME - LAST_FRAME] == INVALID_IDX) {
+ // Save index for ALTREF.
+ altref_idx = i;
+ }
+ } else if (buffer_map[i].disp_order == cur_frame_disp) {
+ // Map the BWDREF_FRAME if this is the show_existing_frame.
+ add_ref_to_slot(&buffer_map[i], remapped_ref_idx, BWDREF_FRAME);
+ }
+
+ // Keep track of the number of past frames that are not at the lowest level.
+ if (buffer_map[i].disp_order < cur_frame_disp &&
+ buffer_map[i].pyr_level != min_level)
+ n_past_high_level++;
+
+ // Keep track of where the frames change from being past frames to future
+ // frames.
+ if (buffer_map[i].disp_order < cur_frame_disp && closest_past_ref < 0)
+ closest_past_ref = i;
+ }
+
+ // Do not map GOLDEN and ALTREF based on their pyramid level if all reference
+ // frames have the same level.
+ if (n_min_level_refs <= n_bufs) {
+ // Map the GOLDEN_FRAME.
+ if (golden_idx > -1)
+ add_ref_to_slot(&buffer_map[golden_idx], remapped_ref_idx, GOLDEN_FRAME);
+ // Map the ALTREF_FRAME.
+ if (altref_idx > -1)
+ add_ref_to_slot(&buffer_map[altref_idx], remapped_ref_idx, ALTREF_FRAME);
+ }
+
+ // Find the buffer to be excluded from the mapping.
+ set_unmapped_ref(buffer_map, n_bufs, n_min_level_refs, min_level,
+ cur_frame_disp);
+
+ // Place past frames in LAST_FRAME, LAST2_FRAME, and LAST3_FRAME.
+ for (int frame = LAST_FRAME; frame < GOLDEN_FRAME; frame++) {
+ // Continue if the current ref slot is already full.
+ if (remapped_ref_idx[frame - LAST_FRAME] != INVALID_IDX) continue;
+ // Find the next unmapped reference buffer
+ // in decreasing ouptut order relative to current picture.
+ int next_buf_max = 0;
+ int next_disp_order = INT_MIN;
+ for (buf_map_idx = n_bufs - 1; buf_map_idx >= 0; buf_map_idx--) {
+ if (!buffer_map[buf_map_idx].used &&
+ buffer_map[buf_map_idx].disp_order < cur_frame_disp &&
+ buffer_map[buf_map_idx].disp_order > next_disp_order) {
+ next_disp_order = buffer_map[buf_map_idx].disp_order;
+ next_buf_max = buf_map_idx;
+ }
+ }
+ buf_map_idx = next_buf_max;
+ if (buf_map_idx < 0) break;
+ if (buffer_map[buf_map_idx].used) break;
+ add_ref_to_slot(&buffer_map[buf_map_idx], remapped_ref_idx, frame);
+ }
+
+ // Place future frames (if there are any) in BWDREF_FRAME and ALTREF2_FRAME.
+ for (int frame = BWDREF_FRAME; frame < REF_FRAMES; frame++) {
+ // Continue if the current ref slot is already full.
+ if (remapped_ref_idx[frame - LAST_FRAME] != INVALID_IDX) continue;
+ // Find the next unmapped reference buffer
+ // in increasing ouptut order relative to current picture.
+ int next_buf_max = 0;
+ int next_disp_order = INT_MAX;
+ for (buf_map_idx = n_bufs - 1; buf_map_idx >= 0; buf_map_idx--) {
+ if (!buffer_map[buf_map_idx].used &&
+ buffer_map[buf_map_idx].disp_order > cur_frame_disp &&
+ buffer_map[buf_map_idx].disp_order < next_disp_order) {
+ next_disp_order = buffer_map[buf_map_idx].disp_order;
+ next_buf_max = buf_map_idx;
+ }
+ }
+ buf_map_idx = next_buf_max;
+ if (buf_map_idx < 0) break;
+ if (buffer_map[buf_map_idx].used) break;
+ add_ref_to_slot(&buffer_map[buf_map_idx], remapped_ref_idx, frame);
+ }
+
+ // Place remaining past frames.
+ buf_map_idx = closest_past_ref;
+ for (int frame = LAST_FRAME; frame < REF_FRAMES; frame++) {
+ // Continue if the current ref slot is already full.
+ if (remapped_ref_idx[frame - LAST_FRAME] != INVALID_IDX) continue;
+ // Find the next unmapped reference buffer.
+ for (; buf_map_idx >= 0; buf_map_idx--) {
+ if (!buffer_map[buf_map_idx].used) break;
+ }
+ if (buf_map_idx < 0) break;
+ if (buffer_map[buf_map_idx].used) break;
+ add_ref_to_slot(&buffer_map[buf_map_idx], remapped_ref_idx, frame);
+ }
+
+ // Place remaining future frames.
+ buf_map_idx = n_bufs - 1;
+ for (int frame = ALTREF_FRAME; frame >= LAST_FRAME; frame--) {
+ // Continue if the current ref slot is already full.
+ if (remapped_ref_idx[frame - LAST_FRAME] != INVALID_IDX) continue;
+ // Find the next unmapped reference buffer.
+ for (; buf_map_idx > closest_past_ref; buf_map_idx--) {
+ if (!buffer_map[buf_map_idx].used) break;
+ }
+ if (buf_map_idx < 0) break;
+ if (buffer_map[buf_map_idx].used) break;
+ add_ref_to_slot(&buffer_map[buf_map_idx], remapped_ref_idx, frame);
+ }
+
+ // Fill any slots that are empty (should only happen for the first 7 frames).
+ for (int i = 0; i < REF_FRAMES; ++i)
+ if (remapped_ref_idx[i] == INVALID_IDX) remapped_ref_idx[i] = 0;
+}
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
+void av1_get_ref_frames(const RefBufferStack *ref_buffer_stack,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ AV1_COMP *cpi,
+ RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
+ int cur_frame_disp,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ int remapped_ref_idx[REF_FRAMES]) {
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ (void)ref_buffer_stack;
+ (void)remapped_ref_idx;
+ get_ref_frames(cpi, ref_frame_map_pairs, cur_frame_disp);
+ return;
+#else
+ const int *const arf_stack = ref_buffer_stack->arf_stack;
+ const int *const lst_stack = ref_buffer_stack->lst_stack;
+ const int *const gld_stack = ref_buffer_stack->gld_stack;
const int arf_stack_size = ref_buffer_stack->arf_stack_size;
const int lst_stack_size = ref_buffer_stack->lst_stack_size;
const int gld_stack_size = ref_buffer_stack->gld_stack_size;
@@ -1079,6 +1449,7 @@ void av1_get_ref_frames(AV1_COMP *const cpi, RefBufferStack *ref_buffer_stack) {
remapped_ref_idx[idx] = ref_buffer_stack->gld_stack[0];
}
}
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
}
int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
@@ -1088,7 +1459,7 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
int flush) {
AV1EncoderConfig *const oxcf = &cpi->oxcf;
AV1_COMMON *const cm = &cpi->common;
- GF_GROUP *gf_group = &cpi->gf_group;
+ GF_GROUP *gf_group = &cpi->ppi->gf_group;
ExternalFlags *const ext_flags = &cpi->ext_flags;
GFConfig *const gf_cfg = &oxcf->gf_cfg;
@@ -1112,9 +1483,9 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
if (!av1_lookahead_peek(cpi->ppi->lookahead, 0, cpi->compressor_stage)) {
#if !CONFIG_REALTIME_ONLY
- if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
+ if (flush && oxcf->pass == 1 && !cpi->ppi->twopass.first_pass_done) {
av1_end_first_pass(cpi); /* get last stats packet */
- cpi->twopass.first_pass_done = 1;
+ cpi->ppi->twopass.first_pass_done = 1;
}
#endif
return -1;
@@ -1128,11 +1499,9 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
AOMMIN(gf_cfg->gf_min_pyr_height, gf_cfg->gf_max_pyr_height);
}
- cpi->tpl_data.skip_tpl_setup_stats = 0;
+ cpi->skip_tpl_setup_stats = 0;
#if !CONFIG_REALTIME_ONLY
- const int use_one_pass_rt_params = has_no_stats_stage(cpi) &&
- oxcf->mode == REALTIME &&
- gf_cfg->lag_in_frames == 0;
+ const int use_one_pass_rt_params = is_one_pass_rt_params(cpi);
if (!use_one_pass_rt_params && !is_stat_generation_stage(cpi)) {
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, av1_get_second_pass_params_time);
@@ -1148,19 +1517,19 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
// If this is a forward keyframe, mark as a show_existing_frame
// TODO(bohanli): find a consistent condition for fwd keyframes
if (oxcf->kf_cfg.fwd_kf_enabled &&
- gf_group->update_type[gf_group->index] == OVERLAY_UPDATE &&
+ gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE &&
cpi->rc.frames_to_key == 0) {
frame_params.show_existing_frame = 1;
} else {
frame_params.show_existing_frame =
(cpi->show_existing_alt_ref &&
- gf_group->update_type[gf_group->index] == OVERLAY_UPDATE) ||
- gf_group->update_type[gf_group->index] == INTNL_OVERLAY_UPDATE;
+ gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE) ||
+ gf_group->update_type[cpi->gf_frame_index] == INTNL_OVERLAY_UPDATE;
}
frame_params.show_existing_frame &= allow_show_existing(cpi, *frame_flags);
// Reset show_existing_alt_ref decision to 0 after it is used.
- if (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE) {
+ if (gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE) {
cpi->show_existing_alt_ref = 0;
}
} else {
@@ -1181,13 +1550,20 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
if (source == NULL) { // If no source was found, we can't encode a frame.
#if !CONFIG_REALTIME_ONLY
- if (flush && oxcf->pass == 1 && !cpi->twopass.first_pass_done) {
+ if (flush && oxcf->pass == 1 && !cpi->ppi->twopass.first_pass_done) {
av1_end_first_pass(cpi); /* get last stats packet */
- cpi->twopass.first_pass_done = 1;
+ cpi->ppi->twopass.first_pass_done = 1;
}
#endif
return -1;
}
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // reset src_offset to allow actual encode call for this frame to get its
+ // source.
+ gf_group->src_offset[cpi->gf_frame_index] = 0;
+#endif
+
// Source may be changed if temporal filtered later.
frame_input.source = &source->img;
frame_input.last_source = last_source != NULL ? &last_source->img : NULL;
@@ -1216,7 +1592,7 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
&cm->film_grain_params);
} else {
cm->cur_frame->film_grain_params_present =
- cm->seq_params.film_grain_params_present;
+ cm->seq_params->film_grain_params_present;
}
// only one operating point supported now
const int64_t pts64 = ticks_to_timebase_units(timestamp_ratio, *time_stamp);
@@ -1226,19 +1602,20 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
#if CONFIG_REALTIME_ONLY
av1_get_one_pass_rt_params(cpi, &frame_params, *frame_flags);
- if (cpi->oxcf.speed >= 5 && cm->number_spatial_layers == 1 &&
- cm->number_temporal_layers == 1)
- av1_set_reference_structure_one_pass_rt(cpi, gf_group->index == 0);
+ if (cpi->oxcf.speed >= 5 && cpi->ppi->number_spatial_layers == 1 &&
+ cpi->ppi->number_temporal_layers == 1)
+ av1_set_reference_structure_one_pass_rt(cpi, cpi->gf_frame_index == 0);
#else
if (use_one_pass_rt_params) {
av1_get_one_pass_rt_params(cpi, &frame_params, *frame_flags);
- if (cpi->oxcf.speed >= 5 && cm->number_spatial_layers == 1 &&
- cm->number_temporal_layers == 1)
- av1_set_reference_structure_one_pass_rt(cpi, gf_group->index == 0);
+ if (cpi->oxcf.speed >= 5 && cpi->ppi->number_spatial_layers == 1 &&
+ cpi->ppi->number_temporal_layers == 1)
+ av1_set_reference_structure_one_pass_rt(cpi, cpi->gf_frame_index == 0);
}
#endif
- FRAME_UPDATE_TYPE frame_update_type = get_frame_update_type(gf_group);
+ FRAME_UPDATE_TYPE frame_update_type =
+ get_frame_update_type(gf_group, cpi->gf_frame_index);
if (frame_params.show_existing_frame &&
frame_params.frame_type != KEY_FRAME) {
@@ -1302,9 +1679,21 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
const RefCntBuffer *ref_frames[INTER_REFS_PER_FRAME];
const YV12_BUFFER_CONFIG *ref_frame_buf[INTER_REFS_PER_FRAME];
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ RefFrameMapPair ref_frame_map_pairs[REF_FRAMES];
+ init_ref_map_pair(cpi, ref_frame_map_pairs);
+ const int order_offset = gf_group->arf_src_offset[cpi->gf_frame_index];
+ const int cur_frame_disp =
+ cpi->common.current_frame.frame_number + order_offset;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
if (!ext_flags->refresh_frame.update_pending) {
- av1_get_ref_frames(cpi, &cpi->ref_buffer_stack);
- } else if (cpi->svc.external_ref_frame_config) {
+ av1_get_ref_frames(&cpi->ref_buffer_stack,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ cpi, ref_frame_map_pairs, cur_frame_disp,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ cm->remapped_ref_idx);
+ } else if (cpi->svc.set_ref_frame_config) {
for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; i++)
cm->remapped_ref_idx[i] = cpi->svc.ref_idx[i];
}
@@ -1319,19 +1708,54 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
frame_params.ref_frame_flags = get_ref_frame_flags(
&cpi->sf, ref_frame_buf, ext_flags->ref_frame_flags);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Set primary_ref_frame of non-reference frames as PRIMARY_REF_NONE.
+ if (cpi->ppi->gf_group.is_frame_non_ref[cpi->gf_frame_index]) {
+ frame_params.primary_ref_frame = PRIMARY_REF_NONE;
+ } else {
+ frame_params.primary_ref_frame =
+ choose_primary_ref_frame(cpi, &frame_params);
+ }
+#else
frame_params.primary_ref_frame =
choose_primary_ref_frame(cpi, &frame_params);
- frame_params.order_offset = gf_group->arf_src_offset[gf_group->index];
-
- frame_params.refresh_frame_flags = av1_get_refresh_frame_flags(
- cpi, &frame_params, frame_update_type, &cpi->ref_buffer_stack);
-
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
+ frame_params.order_offset = gf_group->arf_src_offset[cpi->gf_frame_index];
+
+ frame_params.refresh_frame_flags =
+ av1_get_refresh_frame_flags(cpi, &frame_params, frame_update_type,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ cur_frame_disp, ref_frame_map_pairs,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ &cpi->ref_buffer_stack);
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Make the frames marked as is_frame_non_ref to non-reference frames.
+ if (gf_group->is_frame_non_ref[cpi->gf_frame_index])
+ frame_params.refresh_frame_flags = 0;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ frame_params.existing_fb_idx_to_show = INVALID_IDX;
+ // Find the frame buffer to show based on display order.
+ if (frame_params.show_existing_frame) {
+ for (int frame = 0; frame < REF_FRAMES; frame++) {
+ const RefCntBuffer *const buf = cm->ref_frame_map[frame];
+ if (buf == NULL) continue;
+ const int frame_order = (int)buf->display_order_hint;
+ if (frame_order == cur_frame_disp)
+ frame_params.existing_fb_idx_to_show = frame;
+ }
+ }
+#else
frame_params.existing_fb_idx_to_show =
frame_params.show_existing_frame
? (frame_update_type == INTNL_OVERLAY_UPDATE
? get_ref_frame_map_idx(cm, BWDREF_FRAME)
: get_ref_frame_map_idx(cm, ALTREF_FRAME))
: INVALID_IDX;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
}
// The way frame_params->remapped_ref_idx is setup is a placeholder.
@@ -1351,6 +1775,12 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
cm->quant_params.using_qmatrix = oxcf->q_cfg.using_qm;
}
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Copy previous frame's largest MV component from ppi to cpi.
+ if (!is_stat_generation_stage(cpi) && cpi->do_frame_data_update)
+ cpi->mv_search_params.max_mv_magnitude = cpi->ppi->max_mv_magnitude;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
#if CONFIG_REALTIME_ONLY
if (av1_encode(cpi, dest, &frame_input, &frame_params, &frame_results) !=
AOM_CODEC_OK) {
@@ -1369,10 +1799,17 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
}
#endif // CONFIG_REALTIME_ONLY
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Store current frame's largest MV component in ppi.
+ if (!is_stat_generation_stage(cpi) && cpi->do_frame_data_update)
+ cpi->ppi->max_mv_magnitude = cpi->mv_search_params.max_mv_magnitude;
+#endif
+
if (!is_stat_generation_stage(cpi)) {
// First pass doesn't modify reference buffer assignment or produce frame
// flags
update_frame_flags(&cpi->common, &cpi->refresh_frame, frame_flags);
+#if !CONFIG_FRAME_PARALLEL_ENCODE
if (!ext_flags->refresh_frame.update_pending) {
int ref_map_index =
av1_get_refresh_ref_frame_map(cm->current_frame.refresh_frame_flags);
@@ -1380,6 +1817,7 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
cm->show_existing_frame, ref_map_index,
&cpi->ref_buffer_stack);
}
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
}
#if !CONFIG_REALTIME_ONLY
@@ -1408,7 +1846,7 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
}
if (!is_stat_generation_stage(cpi)) {
- update_fb_of_context_type(cpi, &frame_params, cpi->fb_of_context_type);
+ update_fb_of_context_type(cpi, &frame_params, cpi->ppi->fb_of_context_type);
set_additional_frame_flags(cm, frame_flags);
update_rc_counts(cpi);
}
@@ -1421,7 +1859,7 @@ int av1_encode_strategy(AV1_COMP *const cpi, size_t *const size,
cpi->droppable = is_frame_droppable(&cpi->svc, &ext_flags->refresh_frame);
}
- if (cpi->use_svc) av1_save_layer_context(cpi);
+ if (cpi->ppi->use_svc) av1_save_layer_context(cpi);
return AOM_CODEC_OK;
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/encode_strategy.h b/third_party/libaom/source/libaom/av1/encoder/encode_strategy.h
index 351e8a1328..c7b75c8430 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encode_strategy.h
+++ b/third_party/libaom/source/libaom/av1/encoder/encode_strategy.h
@@ -69,6 +69,10 @@ void av1_configure_buffer_updates(
int av1_get_refresh_frame_flags(const AV1_COMP *const cpi,
const EncodeFrameParams *const frame_params,
FRAME_UPDATE_TYPE frame_update_type,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ int cur_disp_order,
+ RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
const RefBufferStack *const ref_buffer_stack);
int av1_get_refresh_ref_frame_map(int refresh_frame_flags);
@@ -79,7 +83,25 @@ void av1_update_ref_frame_map(AV1_COMP *cpi,
int ref_map_index,
RefBufferStack *ref_buffer_stack);
-void av1_get_ref_frames(AV1_COMP *const cpi, RefBufferStack *ref_buffer_stack);
+/*!\brief Obtain indices of reference frames from reference frame buffer stacks
+ *
+ * \callgraph
+ * \callergraph
+ *
+ * \param[in] ref_buffer_stack Data structure for reference frame buffer
+ * stacks.
+ * \param[out] remapped_ref_idx An array for storing indices of reference
+ * frames. The index is used to retrieve a
+ * reference frame buffer from ref_frame_map
+ * in AV1Common.
+ */
+void av1_get_ref_frames(const RefBufferStack *ref_buffer_stack,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ AV1_COMP *cpi,
+ RefFrameMapPair ref_frame_map_pairs[REF_FRAMES],
+ int cur_frame_disp,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ int remapped_ref_idx[REF_FRAMES]);
int is_forced_keyframe_pending(struct lookahead_ctx *lookahead,
const int up_to_index,
diff --git a/third_party/libaom/source/libaom/av1/encoder/encodeframe.c b/third_party/libaom/source/libaom/av1/encoder/encodeframe.c
index 24d3488245..b3f836b481 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encodeframe.c
+++ b/third_party/libaom/source/libaom/av1/encoder/encodeframe.c
@@ -55,6 +55,7 @@
#include "av1/encoder/encodetxb.h"
#include "av1/encoder/ethread.h"
#include "av1/encoder/extend.h"
+#include "av1/encoder/intra_mode_search_utils.h"
#include "av1/encoder/ml.h"
#include "av1/encoder/motion_search_facade.h"
#include "av1/encoder/partition_strategy.h"
@@ -150,7 +151,7 @@ unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
BLOCK_SIZE bs) {
unsigned int sse;
const unsigned int var =
- cpi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
+ cpi->ppi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
@@ -163,9 +164,9 @@ unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
const uint16_t *high_var_offs[3] = { AV1_HIGH_VAR_OFFS_8,
AV1_HIGH_VAR_OFFS_10,
AV1_HIGH_VAR_OFFS_12 };
- var =
- cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
- CONVERT_TO_BYTEPTR(high_var_offs[off_index]), 0, &sse);
+ var = cpi->ppi->fn_ptr[bs].vf(ref->buf, ref->stride,
+ CONVERT_TO_BYTEPTR(high_var_offs[off_index]), 0,
+ &sse);
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
@@ -181,7 +182,8 @@ static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi,
assert(last != NULL);
last_y =
&last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
- var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
+ var = cpi->ppi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride,
+ &sse);
return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
}
@@ -242,7 +244,7 @@ static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
assert(delta_q_info->delta_q_present_flag);
- const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+ const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
// Delta-q modulation based on variance
av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
@@ -307,7 +309,7 @@ static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
(int8_t)clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
const int frame_lf_count =
av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
- const int mib_size = cm->seq_params.mib_size;
+ const int mib_size = cm->seq_params->mib_size;
// pre-set the delta lf for loop filter. Note that this value is set
// before mi is assigned for each block in current superblock
@@ -326,22 +328,23 @@ static AOM_INLINE void setup_delta_q(AV1_COMP *const cpi, ThreadData *td,
static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
int mi_col) {
const AV1_COMMON *cm = &cpi->common;
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const CommonModeInfoParams *const mi_params = &cm->mi_params;
MACROBLOCK *x = &td->mb;
- const int frame_idx = cpi->gf_group.index;
- TplParams *const tpl_data = &cpi->tpl_data;
- TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
+ const int frame_idx = cpi->gf_frame_index;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
av1_zero(x->tpl_keep_ref_frame);
- if (tpl_frame->is_valid == 0) return;
- if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return;
if (frame_idx >= MAX_TPL_FRAME_IDX) return;
+ TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
+ if (!tpl_frame->is_valid) return;
+ if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
- const int is_overlay = cpi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
+ const int is_overlay =
+ cpi->ppi->gf_group.update_type[frame_idx] == OVERLAY_UPDATE;
if (is_overlay) {
memset(x->tpl_keep_ref_frame, 1, sizeof(x->tpl_keep_ref_frame));
return;
@@ -351,7 +354,7 @@ static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
const int tpl_stride = tpl_frame->stride;
int64_t inter_cost[INTER_REFS_PER_FRAME] = { 0 };
const int step = 1 << block_mis_log2;
- const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+ const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
const int mi_row_end =
AOMMIN(mi_size_high[sb_size] + mi_row, mi_params->mi_rows);
@@ -426,15 +429,15 @@ static void init_ref_frame_space(AV1_COMP *cpi, ThreadData *td, int mi_row,
static AOM_INLINE void adjust_rdmult_tpl_model(AV1_COMP *cpi, MACROBLOCK *x,
int mi_row, int mi_col) {
- const BLOCK_SIZE sb_size = cpi->common.seq_params.sb_size;
+ const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
const int orig_rdmult = cpi->rd.RDMULT;
- assert(IMPLIES(cpi->gf_group.size > 0,
- cpi->gf_group.index < cpi->gf_group.size));
- const int gf_group_index = cpi->gf_group.index;
+ assert(IMPLIES(cpi->ppi->gf_group.size > 0,
+ cpi->gf_frame_index < cpi->ppi->gf_group.size));
+ const int gf_group_index = cpi->gf_frame_index;
if (cpi->oxcf.algo_cfg.enable_tpl_model && cpi->oxcf.q_cfg.aq_mode == NO_AQ &&
cpi->oxcf.q_cfg.deltaq_mode == NO_DELTA_Q && gf_group_index > 0 &&
- cpi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
+ cpi->ppi->gf_group.update_type[gf_group_index] == ARF_UPDATE) {
const int dr =
av1_get_rdmult_delta(cpi, sb_size, mi_row, mi_col, orig_rdmult);
x->rdmult = dr;
@@ -451,7 +454,7 @@ static void get_estimated_pred(AV1_COMP *cpi, const TileInfo *const tile,
MACROBLOCKD *xd = &x->e_mbd;
// TODO(kyslov) Extend to 128x128
- assert(cm->seq_params.sb_size == BLOCK_64X64);
+ assert(cm->seq_params->sb_size == BLOCK_64X64);
av1_set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
@@ -512,7 +515,7 @@ static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
const TileInfo *const tile_info = &tile_data->tile_info;
MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
- const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+ const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
// Grade the temporal variation of the sb, the grade will be used to decide
// fast mode search strategy for coding blocks
@@ -557,6 +560,20 @@ static AOM_INLINE void encode_nonrd_sb(AV1_COMP *cpi, ThreadData *td,
sf->part_sf.partition_search_type == VAR_BASED_PARTITION);
set_cb_offsets(td->mb.cb_offset, 0, 0);
+ // Initialize the flag to skip cdef for 64x64 blocks: if color sensitivy is
+ // on, set to 0 (don't skip).
+ if (sf->rt_sf.skip_cdef_sb) {
+ const int block64_in_sb = (sb_size == BLOCK_128X128) ? 2 : 1;
+ for (int r = 0; r < block64_in_sb; ++r) {
+ for (int c = 0; c < block64_in_sb; ++c) {
+ const int idx_in_sb =
+ r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
+ if (mi[idx_in_sb])
+ mi[idx_in_sb]->skip_cdef_curr_sb =
+ !(x->color_sensitivity_sb[0] || x->color_sensitivity_sb[1]);
+ }
+ }
+ }
// Adjust and encode the superblock
PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
av1_nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
@@ -599,7 +616,7 @@ static INLINE void init_encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
if (gather_tpl_data) {
if (cm->delta_q_info.delta_q_present_flag) {
const int num_planes = av1_num_planes(cm);
- const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+ const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
setup_delta_q(cpi, td, x, tile_info, mi_row, mi_col, num_planes);
av1_tpl_rdmult_setup_sb(cpi, x, sb_size, mi_row, mi_col);
}
@@ -637,7 +654,7 @@ static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
const TileInfo *const tile_info = &tile_data->tile_info;
MB_MODE_INFO **mi = cm->mi_params.mi_grid_base +
get_mi_grid_idx(&cm->mi_params, mi_row, mi_col);
- const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+ const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
const int num_planes = av1_num_planes(cm);
int dummy_rate;
int64_t dummy_dist;
@@ -708,10 +725,17 @@ static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
cpi->oxcf.unit_test_cfg.sb_multipass_unit_test ? 2 : 1;
if (num_passes == 1) {
+#if CONFIG_PARTITION_SEARCH_ORDER
+ av1_reset_part_sf(&cpi->sf.part_sf);
+ RD_STATS this_rdc;
+ av1_rd_partition_search(cpi, td, tile_data, tp, sms_root, mi_row, mi_col,
+ sb_size, &this_rdc);
+#else
PC_TREE *const pc_root = av1_alloc_pc_tree_node(sb_size);
av1_rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
&dummy_rdc, dummy_rdc, pc_root, sms_root, NULL,
SB_SINGLE_PASS, NULL);
+#endif // CONFIG_PARTITION_SEARCH_ORDER
} else {
// First pass
SB_FIRST_PASS_STATS sb_fp_stats;
@@ -753,7 +777,8 @@ static AOM_INLINE void encode_rd_sb(AV1_COMP *cpi, ThreadData *td,
static AOM_INLINE int is_rtc_mode(const CostUpdateFreq *cost_upd_freq,
int use_non_rd_mode) {
return (use_non_rd_mode && cost_upd_freq->coeff >= 2 &&
- cost_upd_freq->mode >= 2 && cost_upd_freq->mv >= 2);
+ cost_upd_freq->mode >= 2 && cost_upd_freq->mv >= 2 &&
+ cost_upd_freq->dv >= 2);
}
/*!\brief Encode a superblock row by breaking it into superblocks
@@ -776,9 +801,9 @@ static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_data->tile_info);
- const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
- const int mib_size = cm->seq_params.mib_size;
- const int mib_size_log2 = cm->seq_params.mib_size_log2;
+ const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
+ const int mib_size = cm->seq_params->mib_size;
+ const int mib_size_log2 = cm->seq_params->mib_size_log2;
const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
const int use_nonrd_mode = cpi->sf.rt_sf.use_nonrd_pick_mode;
const CostUpdateFreq *const cost_upd_freq = &cpi->oxcf.cost_upd_freq;
@@ -833,6 +858,8 @@ static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
av1_set_cost_upd_freq(cpi, td, tile_info, mi_row, mi_col);
// Reset color coding related parameters
+ x->color_sensitivity_sb[0] = 0;
+ x->color_sensitivity_sb[1] = 0;
x->color_sensitivity[0] = 0;
x->color_sensitivity[1] = 0;
x->content_state_sb.source_sad = kMedSad;
@@ -855,6 +882,12 @@ static AOM_INLINE void encode_sb_row(AV1_COMP *cpi, ThreadData *td,
seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
}
+ // Produce the gradient data at superblock level, when intra mode pruning
+ // based on hog is enabled.
+ if (cpi->sf.intra_sf.intra_pruning_with_hog ||
+ cpi->sf.intra_sf.chroma_intra_pruning_with_hog)
+ produce_gradients_for_sb(cpi, x, sb_size, mi_row, mi_col);
+
// encode the superblock
if (use_nonrd_mode) {
encode_nonrd_sb(cpi, td, tile_data, tp, mi_row, mi_col, seg_skip);
@@ -886,10 +919,10 @@ static AOM_INLINE void init_encode_frame_mb_context(AV1_COMP *cpi) {
// Copy data over into macro block data structures.
av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
- cm->seq_params.sb_size);
+ cm->seq_params->sb_size);
- av1_setup_block_planes(xd, cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y, num_planes);
+ av1_setup_block_planes(xd, cm->seq_params->subsampling_x,
+ cm->seq_params->subsampling_y, num_planes);
}
void av1_alloc_tile_data(AV1_COMP *cpi) {
@@ -927,13 +960,14 @@ void av1_init_tile_data(AV1_COMP *cpi) {
TileInfo *const tile_info = &tile_data->tile_info;
av1_tile_init(tile_info, cm, tile_row, tile_col);
tile_data->firstpass_top_mv = kZeroMv;
+ tile_data->abs_sum_level = 0;
if (pre_tok != NULL && tplist != NULL) {
token_info->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
pre_tok = token_info->tile_tok[tile_row][tile_col];
- tile_tok = allocated_tokens(*tile_info,
- cm->seq_params.mib_size_log2 + MI_SIZE_LOG2,
- num_planes);
+ tile_tok = allocated_tokens(
+ *tile_info, cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
+ num_planes);
token_info->tplist[tile_row][tile_col] = tplist + tplist_count;
tplist = token_info->tplist[tile_row][tile_col];
tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info);
@@ -961,14 +995,14 @@ void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
TokenExtra *tok = NULL;
TokenList *const tplist = cpi->token_info.tplist[tile_row][tile_col];
const int sb_row_in_tile =
- (mi_row - tile_info->mi_row_start) >> cm->seq_params.mib_size_log2;
+ (mi_row - tile_info->mi_row_start) >> cm->seq_params->mib_size_log2;
const int tile_mb_cols =
(tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
const int num_mb_rows_in_sb =
- ((1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
+ ((1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
get_start_tok(cpi, tile_row, tile_col, mi_row, &tok,
- cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
+ cm->seq_params->mib_size_log2 + MI_SIZE_LOG2, num_planes);
assert(tplist != NULL);
tplist[sb_row_in_tile].start = tok;
@@ -979,7 +1013,7 @@ void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
assert((unsigned int)(tok - tplist[sb_row_in_tile].start) <=
get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
- cm->seq_params.mib_size_log2 + MI_SIZE_LOG2,
+ cm->seq_params->mib_size_log2 + MI_SIZE_LOG2,
num_planes));
(void)tile_mb_cols;
@@ -1005,7 +1039,7 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
&td->mb.e_mbd);
if (cpi->oxcf.intra_mode_cfg.enable_cfl_intra)
- cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params);
+ cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
if (td->mb.txfm_search_info.txb_rd_records != NULL) {
av1_crc32c_calculator_init(
@@ -1013,9 +1047,10 @@ void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
}
for (int mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
- mi_row += cm->seq_params.mib_size) {
+ mi_row += cm->seq_params->mib_size) {
av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
}
+ this_tile->abs_sum_level = td->abs_sum_level;
}
/*!\brief Break one frame into tiles and encode the tiles
@@ -1030,15 +1065,13 @@ static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
const int tile_rows = cm->tiles.rows;
int tile_col, tile_row;
+ MACROBLOCK *const mb = &cpi->td.mb;
assert(IMPLIES(cpi->tile_data == NULL,
cpi->allocated_tiles < tile_cols * tile_rows));
if (cpi->allocated_tiles < tile_cols * tile_rows) av1_alloc_tile_data(cpi);
av1_init_tile_data(cpi);
- if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
- cpi->td.mb.txfm_search_info.txb_rd_records =
- (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords));
- }
+ av1_alloc_mb_data(cm, mb, cpi->sf.rt_sf.use_nonrd_pick_mode);
for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
@@ -1046,6 +1079,7 @@ static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
&cpi->tile_data[tile_row * cm->tiles.cols + tile_col];
cpi->td.intrabc_used = 0;
cpi->td.deltaq_used = 0;
+ cpi->td.abs_sum_level = 0;
cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
// Reset cyclic refresh counters.
@@ -1062,10 +1096,7 @@ static AOM_INLINE void encode_tiles(AV1_COMP *cpi) {
}
}
- if (cpi->td.mb.txfm_search_info.txb_rd_records) {
- aom_free(cpi->td.mb.txfm_search_info.txb_rd_records);
- cpi->td.mb.txfm_search_info.txb_rd_records = NULL;
- }
+ av1_dealloc_mb_data(cm, mb);
}
// Set the relative distance of a reference frame w.r.t. current frame
@@ -1141,10 +1172,10 @@ static int check_skip_mode_enabled(AV1_COMP *const cpi) {
const int cur_offset = (int)cm->current_frame.order_hint;
int ref_offset[2];
get_skip_mode_ref_offsets(cm, ref_offset);
- const int cur_to_ref0 = get_relative_dist(&cm->seq_params.order_hint_info,
+ const int cur_to_ref0 = get_relative_dist(&cm->seq_params->order_hint_info,
cur_offset, ref_offset[0]);
- const int cur_to_ref1 = abs(get_relative_dist(&cm->seq_params.order_hint_info,
- cur_offset, ref_offset[1]));
+ const int cur_to_ref1 = abs(get_relative_dist(
+ &cm->seq_params->order_hint_info, cur_offset, ref_offset[1]));
if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
// High Latency: Turn off skip mode if all refs are fwd.
@@ -1248,6 +1279,9 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
MACROBLOCKD *const xd = &x->e_mbd;
RD_COUNTS *const rdc = &cpi->td.rd_counts;
FrameProbInfo *const frame_probs = &cpi->frame_probs;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
IntraBCHashInfo *const intrabc_hash_info = &x->intrabc_hash_info;
MultiThreadInfo *const mt_info = &cpi->mt_info;
AV1EncRowMultiThreadInfo *const enc_row_mt = &mt_info->enc_row_mt;
@@ -1278,9 +1312,15 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
if (features->allow_warped_motion &&
cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
- const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
- if (frame_probs->warped_probs[update_type] <
- cpi->sf.inter_sf.prune_warped_prob_thresh)
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
+ int warped_probability;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ warped_probability = temp_frame_probs->warped_probs[update_type];
+#else
+ warped_probability = frame_probs->warped_probs[update_type];
+#endif
+ if (warped_probability < cpi->sf.inter_sf.prune_warped_prob_thresh)
features->allow_warped_motion = 0;
}
@@ -1316,7 +1356,7 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
// Hash data generated for screen contents is used for intraBC ME
const int min_alloc_size = block_size_wide[mi_params->mi_alloc_bsize];
const int max_sb_size =
- (1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2));
+ (1 << (cm->seq_params->mib_size_log2 + MI_SIZE_LOG2));
int src_idx = 0;
for (int size = 4; size <= max_sb_size; size *= 2, src_idx = !src_idx) {
const int dst_idx = !src_idx;
@@ -1377,10 +1417,10 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
// is used for ineligible frames. That effectively will turn off row_mt
// usage. Note objective delta_q and tpl eligible frames are only altref
// frames currently.
- const GF_GROUP *gf_group = &cpi->gf_group;
+ const GF_GROUP *gf_group = &cpi->ppi->gf_group;
if (cm->delta_q_info.delta_q_present_flag) {
if (deltaq_mode == DELTA_Q_OBJECTIVE &&
- !is_frame_tpl_eligible(gf_group, gf_group->index))
+ !is_frame_tpl_eligible(gf_group, cpi->gf_frame_index))
cm->delta_q_info.delta_q_present_flag = 0;
}
@@ -1500,8 +1540,8 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
features->tx_mode = select_tx_mode(cm, tx_search_type);
if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats) {
- const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
-
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
for (i = 0; i < TX_SIZES_ALL; i++) {
int sum = 0;
int j;
@@ -1519,13 +1559,33 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
left -= prob;
if (j == 0) prob += left;
frame_probs->tx_type_probs[update_type][i][j] = prob;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ /* TODO(FPMT): The current update is happening in cpi->frame_probs,
+ * this need to be taken care appropriately in final FPMT implementation
+ * to carry these values to subsequent frames. The frame_probs update is
+ * accumulated across frames, so the values from all individual parallel
+ * frames need to be taken into account after all the parallel frames
+ * are encoded.
+ *
+ * Only for quality simulation purpose - Update the accumulated frame
+ * probabilities in ppi->temp_variable based on the update flag.
+ */
+ if (cpi->do_frame_data_update) {
+ for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
+ update_type_idx++) {
+ temp_frame_probs->tx_type_probs[update_type_idx][i][j] =
+ frame_probs->tx_type_probs[update_type_idx][i][j];
+ }
+ }
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
}
}
}
if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
- const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
for (i = 0; i < BLOCK_SIZES_ALL; i++) {
int sum = 0;
@@ -1535,23 +1595,63 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
sum ? 128 * cpi->td.rd_counts.obmc_used[i][1] / sum : 0;
frame_probs->obmc_probs[update_type][i] =
(frame_probs->obmc_probs[update_type][i] + new_prob) >> 1;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ /* TODO(FPMT): The current update is happening in cpi->frame_probs,
+ * this need to be taken care appropriately in final FPMT
+ * implementation to carry these values to subsequent frames.
+ * The frame_probs update is accumulated across frames, so the
+ * values from all individual parallel frames need to be taken
+ * into account after all the parallel frames are encoded.
+ *
+ * Only for quality simulation purpose - Update the accumulated frame
+ * probabilities in ppi->temp_variable based on the update flag.
+ */
+ if (cpi->do_frame_data_update) {
+ for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
+ update_type_idx++) {
+ temp_frame_probs->obmc_probs[update_type_idx][i] =
+ frame_probs->obmc_probs[update_type_idx][i];
+ }
+ }
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
}
}
if (features->allow_warped_motion &&
cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
- const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
int sum = 0;
for (i = 0; i < 2; i++) sum += cpi->td.rd_counts.warped_used[i];
const int new_prob = sum ? 128 * cpi->td.rd_counts.warped_used[1] / sum : 0;
frame_probs->warped_probs[update_type] =
(frame_probs->warped_probs[update_type] + new_prob) >> 1;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ /* TODO(FPMT): The current update is happening in cpi->frame_probs,
+ * this need to be taken care appropriately in final FPMT
+ * implementation to carry these values to subsequent frames.
+ * The frame_probs update is accumulated across frames, so the
+ * values from all individual parallel frames need to be taken
+ * into account after all the parallel frames are encoded.
+ *
+ * Only for quality simulation purpose - Update the accumulated frame
+ * probabilities in ppi->temp_variable based on the update flag.
+ */
+ if (cpi->do_frame_data_update) {
+ for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
+ update_type_idx++) {
+ temp_frame_probs->warped_probs[update_type_idx] =
+ frame_probs->warped_probs[update_type_idx];
+ }
+ }
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
}
if (cm->current_frame.frame_type != KEY_FRAME &&
cpi->sf.interp_sf.adaptive_interp_filter_search == 2 &&
features->interp_filter == SWITCHABLE) {
- const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) {
int sum = 0;
@@ -1572,6 +1672,25 @@ static AOM_INLINE void encode_frame_internal(AV1_COMP *cpi) {
left -= prob;
if (j == 0) prob += left;
frame_probs->switchable_interp_probs[update_type][i][j] = prob;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ /* TODO(FPMT): The current update is happening in cpi->frame_probs,
+ * this need to be taken care appropriately in final FPMT
+ * implementation to carry these values to subsequent frames.
+ * The frame_probs update is accumulated across frames, so the
+ * values from all individual parallel frames need to be taken
+ * into account after all the parallel frames are encoded.
+ *
+ * Only for quality simulation purpose - Update the accumulated frame
+ * probabilities in ppi->temp_variable based on the update flag.
+ */
+ if (cpi->do_frame_data_update) {
+ for (int update_type_idx = 0; update_type_idx < FRAME_UPDATE_TYPES;
+ update_type_idx++) {
+ temp_frame_probs->switchable_interp_probs[update_type_idx][i][j] =
+ frame_probs->switchable_interp_probs[update_type_idx][i][j];
+ }
+ }
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
}
}
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.c b/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.c
index c10b2ffe6c..d3fa50292b 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.c
+++ b/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.c
@@ -44,7 +44,6 @@ void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
assert(cpi->oxcf.tune_cfg.tuning == AOM_TUNE_SSIM);
- aom_clear_system_state();
for (row = mi_row / num_mi_w;
row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
for (col = mi_col / num_mi_h;
@@ -59,20 +58,19 @@ void av1_set_ssim_rdmult(const AV1_COMP *const cpi, int *errorperbit,
*rdmult = (int)((double)(*rdmult) * geom_mean_of_scale + 0.5);
*rdmult = AOMMAX(*rdmult, 0);
av1_set_error_per_bit(errorperbit, *rdmult);
- aom_clear_system_state();
}
// Return the end column for the current superblock, in unit of TPL blocks.
static int get_superblock_tpl_column_end(const AV1_COMMON *const cm, int mi_col,
int num_mi_w) {
// Find the start column of this superblock.
- const int sb_mi_col_start = (mi_col >> cm->seq_params.mib_size_log2)
- << cm->seq_params.mib_size_log2;
+ const int sb_mi_col_start = (mi_col >> cm->seq_params->mib_size_log2)
+ << cm->seq_params->mib_size_log2;
// Same but in superres upscaled dimension.
const int sb_mi_col_start_sr =
coded_to_superres_mi(sb_mi_col_start, cm->superres_scale_denominator);
// Width of this superblock in mi units.
- const int sb_mi_width = mi_size_wide[cm->seq_params.sb_size];
+ const int sb_mi_width = mi_size_wide[cm->seq_params->sb_size];
// Same but in superres upscaled dimension.
const int sb_mi_width_sr =
coded_to_superres_mi(sb_mi_width, cm->superres_scale_denominator);
@@ -86,15 +84,16 @@ int av1_get_hier_tpl_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
const BLOCK_SIZE bsize, const int mi_row,
const int mi_col, int orig_rdmult) {
const AV1_COMMON *const cm = &cpi->common;
- const GF_GROUP *const gf_group = &cpi->gf_group;
- assert(IMPLIES(cpi->gf_group.size > 0,
- cpi->gf_group.index < cpi->gf_group.size));
- const int tpl_idx = cpi->gf_group.index;
- const TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ assert(IMPLIES(cpi->ppi->gf_group.size > 0,
+ cpi->gf_frame_index < cpi->ppi->gf_group.size));
+ const int tpl_idx = cpi->gf_frame_index;
const int deltaq_rdmult = set_deltaq_rdmult(cpi, x);
- if (tpl_frame->is_valid == 0) return deltaq_rdmult;
- if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return deltaq_rdmult;
if (tpl_idx >= MAX_TPL_FRAME_IDX) return deltaq_rdmult;
+ const TplDepFrame *tpl_frame = &cpi->ppi->tpl_data.tpl_frame[tpl_idx];
+ if (!tpl_frame->is_valid) return deltaq_rdmult;
+ if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index))
+ return deltaq_rdmult;
if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return deltaq_rdmult;
const int mi_col_sr =
@@ -116,7 +115,6 @@ int av1_get_hier_tpl_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
int row, col;
double base_block_count = 0.0;
double geom_mean_of_scale = 0.0;
- aom_clear_system_state();
for (row = mi_row / num_mi_w;
row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
for (col = mi_col_sr / num_mi_h;
@@ -124,7 +122,7 @@ int av1_get_hier_tpl_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
col < sb_bcol_end;
++col) {
const int index = row * num_cols + col;
- geom_mean_of_scale += log(cpi->tpl_sb_rdmult_scaling_factors[index]);
+ geom_mean_of_scale += log(cpi->ppi->tpl_sb_rdmult_scaling_factors[index]);
base_block_count += 1.0;
}
}
@@ -132,8 +130,7 @@ int av1_get_hier_tpl_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
int rdmult = (int)((double)orig_rdmult * geom_mean_of_scale + 0.5);
rdmult = AOMMAX(rdmult, 0);
av1_set_error_per_bit(&x->errorperbit, rdmult);
- aom_clear_system_state();
- if (bsize == cm->seq_params.sb_size) {
+ if (bsize == cm->seq_params->sb_size) {
const int rdmult_sb = set_deltaq_rdmult(cpi, x);
assert(rdmult_sb == rdmult);
(void)rdmult_sb;
@@ -341,7 +338,7 @@ void av1_update_state(const AV1_COMP *const cpi, ThreadData *td,
const int x_mis = AOMMIN(bw, mi_params->mi_cols - mi_col);
const int y_mis = AOMMIN(bh, mi_params->mi_rows - mi_row);
- if (cm->seq_params.order_hint_info.enable_ref_frame_mvs)
+ if (cm->seq_params->order_hint_info.enable_ref_frame_mvs)
av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
}
@@ -604,9 +601,9 @@ static void set_partial_sb_partition(const AV1_COMMON *const cm,
MB_MODE_INFO **mib) {
int bh = bh_in;
int r, c;
- for (r = 0; r < cm->seq_params.mib_size; r += bh) {
+ for (r = 0; r < cm->seq_params->mib_size; r += bh) {
int bw = bw_in;
- for (c = 0; c < cm->seq_params.mib_size; c += bw) {
+ for (c = 0; c < cm->seq_params->mib_size; c += bw) {
const int grid_index = get_mi_grid_idx(&cm->mi_params, r, c);
const int mi_index = get_alloc_mi_idx(&cm->mi_params, r, c);
mib[grid_index] = mi + mi_index;
@@ -638,11 +635,11 @@ void av1_set_fixed_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0));
// Apply the requested partition size to the SB if it is all "in image"
- if ((mi_cols_remaining >= cm->seq_params.mib_size) &&
- (mi_rows_remaining >= cm->seq_params.mib_size)) {
- for (int block_row = 0; block_row < cm->seq_params.mib_size;
+ if ((mi_cols_remaining >= cm->seq_params->mib_size) &&
+ (mi_rows_remaining >= cm->seq_params->mib_size)) {
+ for (int block_row = 0; block_row < cm->seq_params->mib_size;
block_row += bh) {
- for (int block_col = 0; block_col < cm->seq_params.mib_size;
+ for (int block_col = 0; block_col < cm->seq_params->mib_size;
block_col += bw) {
const int grid_index = get_mi_grid_idx(mi_params, block_row, block_col);
const int mi_index = get_alloc_mi_idx(mi_params, block_row, block_col);
@@ -682,25 +679,25 @@ int av1_is_leaf_split_partition(AV1_COMMON *cm, int mi_row, int mi_col,
int av1_get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
int mi_col, int orig_rdmult) {
AV1_COMMON *const cm = &cpi->common;
- const GF_GROUP *const gf_group = &cpi->gf_group;
- assert(IMPLIES(cpi->gf_group.size > 0,
- cpi->gf_group.index < cpi->gf_group.size));
- const int tpl_idx = cpi->gf_group.index;
- TplParams *const tpl_data = &cpi->tpl_data;
- TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
- TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ assert(IMPLIES(cpi->ppi->gf_group.size > 0,
+ cpi->gf_frame_index < cpi->ppi->gf_group.size));
+ const int tpl_idx = cpi->gf_frame_index;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
- int tpl_stride = tpl_frame->stride;
int64_t intra_cost = 0;
int64_t mc_dep_cost = 0;
const int mi_wide = mi_size_wide[bsize];
const int mi_high = mi_size_high[bsize];
- if (tpl_frame->is_valid == 0) return orig_rdmult;
+ if (tpl_idx >= MAX_TPL_FRAME_IDX) return orig_rdmult;
- if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return orig_rdmult;
+ TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+ int tpl_stride = tpl_frame->stride;
+ if (!tpl_frame->is_valid) return orig_rdmult;
- if (cpi->gf_group.index >= MAX_TPL_FRAME_IDX) return orig_rdmult;
+ if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return orig_rdmult;
int mi_count = 0;
const int mi_col_sr =
@@ -727,8 +724,6 @@ int av1_get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
}
assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB);
- aom_clear_system_state();
-
double beta = 1.0;
if (mc_dep_cost > 0 && intra_cost > 0) {
const double r0 = cpi->rd.r0;
@@ -738,8 +733,6 @@ int av1_get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
int rdmult = av1_get_adaptive_rdmult(cpi, beta);
- aom_clear_system_state();
-
rdmult = AOMMIN(rdmult, orig_rdmult * 3 / 2);
rdmult = AOMMAX(rdmult, orig_rdmult * 1 / 2);
@@ -760,7 +753,7 @@ int av1_active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
if (is_stat_consumption_stage_twopass(cpi)) {
const AV1_COMMON *const cm = &cpi->common;
const FIRSTPASS_STATS *const this_frame_stats = read_one_frame_stats(
- &cpi->twopass, cm->current_frame.display_order_hint);
+ &cpi->ppi->twopass, cm->current_frame.display_order_hint);
if (this_frame_stats == NULL) return AOM_CODEC_ERROR;
// The inactive region is specified in MBs not mi units.
@@ -790,7 +783,7 @@ int av1_active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
if (is_stat_consumption_stage_twopass(cpi)) {
const AV1_COMMON *const cm = &cpi->common;
const FIRSTPASS_STATS *const this_frame_stats = read_one_frame_stats(
- &cpi->twopass, cm->current_frame.display_order_hint);
+ &cpi->ppi->twopass, cm->current_frame.display_order_hint);
if (this_frame_stats == NULL) return AOM_CODEC_ERROR;
// The inactive region is specified in MBs not mi units.
@@ -814,24 +807,26 @@ void av1_get_tpl_stats_sb(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
if (!cpi->oxcf.algo_cfg.enable_tpl_model) return;
if (cpi->common.current_frame.frame_type == KEY_FRAME) return;
- const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
if (update_type == INTNL_OVERLAY_UPDATE || update_type == OVERLAY_UPDATE)
return;
- assert(IMPLIES(cpi->gf_group.size > 0,
- cpi->gf_group.index < cpi->gf_group.size));
+ assert(IMPLIES(cpi->ppi->gf_group.size > 0,
+ cpi->gf_frame_index < cpi->ppi->gf_group.size));
AV1_COMMON *const cm = &cpi->common;
- const int gf_group_index = cpi->gf_group.index;
- TplParams *const tpl_data = &cpi->tpl_data;
- TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_group_index];
- TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
- int tpl_stride = tpl_frame->stride;
+ const int gf_group_index = cpi->gf_frame_index;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
const int mi_wide = mi_size_wide[bsize];
const int mi_high = mi_size_high[bsize];
- if (tpl_frame->is_valid == 0) return;
if (gf_group_index >= MAX_TPL_FRAME_IDX) return;
+ TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_group_index];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+ int tpl_stride = tpl_frame->stride;
+ if (!tpl_frame->is_valid) return;
+
int mi_count = 0;
int count = 0;
const int mi_col_sr =
@@ -889,26 +884,26 @@ void av1_get_tpl_stats_sb(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
int av1_get_q_for_deltaq_objective(AV1_COMP *const cpi, BLOCK_SIZE bsize,
int mi_row, int mi_col) {
AV1_COMMON *const cm = &cpi->common;
- const GF_GROUP *const gf_group = &cpi->gf_group;
- assert(IMPLIES(cpi->gf_group.size > 0,
- cpi->gf_group.index < cpi->gf_group.size));
- const int tpl_idx = cpi->gf_group.index;
- TplParams *const tpl_data = &cpi->tpl_data;
- TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
- TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ assert(IMPLIES(cpi->ppi->gf_group.size > 0,
+ cpi->gf_frame_index < cpi->ppi->gf_group.size));
+ const int tpl_idx = cpi->gf_frame_index;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
- int tpl_stride = tpl_frame->stride;
int64_t intra_cost = 0;
int64_t mc_dep_cost = 0;
const int mi_wide = mi_size_wide[bsize];
const int mi_high = mi_size_high[bsize];
const int base_qindex = cm->quant_params.base_qindex;
- if (tpl_frame->is_valid == 0) return base_qindex;
+ if (tpl_idx >= MAX_TPL_FRAME_IDX) return base_qindex;
- if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return base_qindex;
+ TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
+ TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+ int tpl_stride = tpl_frame->stride;
+ if (!tpl_frame->is_valid) return base_qindex;
- if (cpi->gf_group.index >= MAX_TPL_FRAME_IDX) return base_qindex;
+ if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return base_qindex;
int mi_count = 0;
const int mi_col_sr =
@@ -935,8 +930,6 @@ int av1_get_q_for_deltaq_objective(AV1_COMP *const cpi, BLOCK_SIZE bsize,
}
assert(mi_count <= MAX_TPL_BLK_IN_SB * MAX_TPL_BLK_IN_SB);
- aom_clear_system_state();
-
int offset = 0;
double beta = 1.0;
if (mc_dep_cost > 0 && intra_cost > 0) {
@@ -945,8 +938,7 @@ int av1_get_q_for_deltaq_objective(AV1_COMP *const cpi, BLOCK_SIZE bsize,
beta = (r0 / rk);
assert(beta > 0.0);
}
- offset = av1_get_deltaq_offset(cpi, base_qindex, beta);
- aom_clear_system_state();
+ offset = av1_get_deltaq_offset(cm->seq_params->bit_depth, base_qindex, beta);
const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
offset = AOMMIN(offset, delta_q_info->delta_q_res * 9 - 1);
@@ -1164,7 +1156,7 @@ void av1_avg_cdf_symbols(FRAME_CONTEXT *ctx_left, FRAME_CONTEXT *ctx_tr,
void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, int offset) {
unsigned int tmp_sse;
unsigned int tmp_variance;
- const BLOCK_SIZE bsize = cpi->common.seq_params.sb_size;
+ const BLOCK_SIZE bsize = cpi->common.seq_params->sb_size;
uint8_t *src_y = cpi->source->y_buffer;
int src_ystride = cpi->source->y_stride;
uint8_t *last_src_y = cpi->last_source->y_buffer;
@@ -1178,8 +1170,8 @@ void av1_source_content_sb(AV1_COMP *cpi, MACROBLOCK *x, int offset) {
#endif
src_y += offset;
last_src_y += offset;
- tmp_variance = cpi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y,
- last_src_ystride, &tmp_sse);
+ tmp_variance = cpi->ppi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y,
+ last_src_ystride, &tmp_sse);
if (tmp_sse < avg_source_sse_threshold)
x->content_state_sb.source_sad = kLowSad;
else if (tmp_sse > avg_source_sse_threshold_high)
@@ -1233,7 +1225,7 @@ void av1_backup_sb_state(SB_FIRST_PASS_STATS *sb_fp_stats, const AV1_COMP *cpi,
const AV1_COMMON *cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
- const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+ const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
xd->above_txfm_context =
cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
@@ -1269,7 +1261,7 @@ void av1_restore_sb_state(const SB_FIRST_PASS_STATS *sb_fp_stats, AV1_COMP *cpi,
const AV1_COMMON *cm = &cpi->common;
const int num_planes = av1_num_planes(cm);
- const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+ const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
av1_restore_context(x, &sb_fp_stats->x_ctx, mi_row, mi_col, sb_size,
num_planes);
@@ -1294,33 +1286,32 @@ void av1_restore_sb_state(const SB_FIRST_PASS_STATS *sb_fp_stats, AV1_COMP *cpi,
#endif // CONFIG_INTERNAL_STATS
}
-// Checks for skip status of mv cost update.
-static int skip_mv_cost_update(AV1_COMP *cpi, const TileInfo *const tile_info,
- const int mi_row, const int mi_col) {
- // For intra frames, mv cdfs are not updated during the encode. Hence, the mv
- // cost calculation is skipped in this case.
- if (frame_is_intra_only(&cpi->common)) return 1;
- // mv_cost_upd_level=0: update happens at each sb,
- // so return skip status as 0.
- // mv_cost_upd_level=1: update happens once for each sb row,
- // so return skip status as 1 for
- // mi_col != tile_info->mi_col_start.
- // mv_cost_upd_level=2: update happens once for a set of rows,
- // so return skip status as 1 appropriately.
- if (!cpi->sf.inter_sf.mv_cost_upd_level) return 0;
+/*! Checks whether to skip updating the entropy cost based on tile info.
+ *
+ * This function contains codes common to both \ref skip_mv_cost_update and
+ * \ref skip_dv_cost_update.
+ */
+static int skip_cost_update(const SequenceHeader *seq_params,
+ const TileInfo *const tile_info, const int mi_row,
+ const int mi_col,
+ INTERNAL_COST_UPDATE_TYPE upd_level) {
+ if (upd_level == INTERNAL_COST_UPD_SB) return 0;
+ if (upd_level == INTERNAL_COST_UPD_OFF) return 1;
+
+ // upd_level is at most as frequent as each sb_row in a tile.
if (mi_col != tile_info->mi_col_start) return 1;
- if (cpi->sf.inter_sf.mv_cost_upd_level == 2) {
- AV1_COMMON *const cm = &cpi->common;
- const int mib_size_log2 = cm->seq_params.mib_size_log2;
+
+ if (upd_level == INTERNAL_COST_UPD_SBROW_SET) {
+ const int mib_size_log2 = seq_params->mib_size_log2;
const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
- const int sb_size = cm->seq_params.mib_size * MI_SIZE;
+ const int sb_size = seq_params->mib_size * MI_SIZE;
const int tile_height =
(tile_info->mi_row_end - tile_info->mi_row_start) * MI_SIZE;
- // When mv_cost_upd_level = 2, the cost update happens once for 2, 4 sb
- // rows for sb size 128, sb size 64 respectively. However, as the update
- // will not be equally spaced in smaller resolutions making it equally
- // spaced by calculating (mv_num_rows_cost_update) the number of rows
- // after which the cost update should happen.
+ // When upd_level = INTERNAL_COST_UPD_SBROW_SET, the cost update happens
+ // once for 2, 4 sb rows for sb size 128, sb size 64 respectively. However,
+ // as the update will not be equally spaced in smaller resolutions making
+ // it equally spaced by calculating (mv_num_rows_cost_update) the number of
+ // rows after which the cost update should happen.
const int sb_size_update_freq_map[2] = { 2, 4 };
const int update_freq_sb_rows =
sb_size_update_freq_map[sb_size != MAX_SB_SIZE];
@@ -1337,6 +1328,32 @@ static int skip_mv_cost_update(AV1_COMP *cpi, const TileInfo *const tile_info,
return 0;
}
+// Checks for skip status of mv cost update.
+static int skip_mv_cost_update(AV1_COMP *cpi, const TileInfo *const tile_info,
+ const int mi_row, const int mi_col) {
+ const AV1_COMMON *cm = &cpi->common;
+ // For intra frames, mv cdfs are not updated during the encode. Hence, the mv
+ // cost calculation is skipped in this case.
+ if (frame_is_intra_only(cm)) return 1;
+
+ return skip_cost_update(cm->seq_params, tile_info, mi_row, mi_col,
+ cpi->sf.inter_sf.mv_cost_upd_level);
+}
+
+// Checks for skip status of dv cost update.
+static int skip_dv_cost_update(AV1_COMP *cpi, const TileInfo *const tile_info,
+ const int mi_row, const int mi_col) {
+ const AV1_COMMON *cm = &cpi->common;
+ // Intrabc is only applicable to intra frames. So skip if intrabc is not
+ // allowed.
+ if (!av1_allow_intrabc(cm) || is_stat_generation_stage(cpi)) {
+ return 1;
+ }
+
+ return skip_cost_update(cm->seq_params, tile_info, mi_row, mi_col,
+ cpi->sf.intra_sf.dv_cost_upd_level);
+}
+
// Update the rate costs of some symbols according to the frequency directed
// by speed features
void av1_set_cost_upd_freq(AV1_COMP *cpi, ThreadData *td,
@@ -1355,6 +1372,9 @@ void av1_set_cost_upd_freq(AV1_COMP *cpi, ThreadData *td,
if (mi_col != tile_info->mi_col_start) break;
AOM_FALLTHROUGH_INTENDED;
case COST_UPD_SB: // SB level
+ if (cpi->sf.inter_sf.coeff_cost_upd_level == INTERNAL_COST_UPD_SBROW &&
+ mi_col != tile_info->mi_col_start)
+ break;
av1_fill_coeff_costs(&x->coeff_costs, xd->tile_ctx, num_planes);
break;
default: assert(0);
@@ -1368,6 +1388,9 @@ void av1_set_cost_upd_freq(AV1_COMP *cpi, ThreadData *td,
if (mi_col != tile_info->mi_col_start) break;
AOM_FALLTHROUGH_INTENDED;
case COST_UPD_SB: // SB level
+ if (cpi->sf.inter_sf.mode_cost_upd_level == INTERNAL_COST_UPD_SBROW &&
+ mi_col != tile_info->mi_col_start)
+ break;
av1_fill_mode_rates(cm, &x->mode_costs, xd->tile_ctx);
break;
default: assert(0);
@@ -1388,4 +1411,19 @@ void av1_set_cost_upd_freq(AV1_COMP *cpi, ThreadData *td,
break;
default: assert(0);
}
+
+ switch (cpi->oxcf.cost_upd_freq.dv) {
+ case COST_UPD_OFF:
+ case COST_UPD_TILE: // Tile level
+ break;
+ case COST_UPD_SBROW: // SB row level in tile
+ if (mi_col != tile_info->mi_col_start) break;
+ AOM_FALLTHROUGH_INTENDED;
+ case COST_UPD_SB: // SB level
+ // Checks for skip status of dv cost update.
+ if (skip_dv_cost_update(cpi, tile_info, mi_row, mi_col)) break;
+ av1_fill_dv_costs(&xd->tile_ctx->ndvc, x->dv_costs);
+ break;
+ default: assert(0);
+ }
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.h b/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.h
index 7bdfad5cba..3096181885 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.h
+++ b/third_party/libaom/source/libaom/av1/encoder/encodeframe_utils.h
@@ -13,17 +13,68 @@
#define AOM_AV1_ENCODER_ENCODEFRAME_UTILS_H_
#include "aom_ports/aom_timer.h"
+#include "aom_ports/system_state.h"
#include "av1/common/reconinter.h"
#include "av1/encoder/encoder.h"
-#include "av1/encoder/partition_strategy.h"
#include "av1/encoder/rdopt.h"
#ifdef __cplusplus
extern "C" {
#endif
+#define WRITE_FEATURE_TO_FILE 0
+
+#define FEATURE_SIZE_SMS_SPLIT_FAST 6
+#define FEATURE_SIZE_SMS_SPLIT 17
+#define FEATURE_SIZE_SMS_PRUNE_PART 25
+#define FEATURE_SIZE_SMS_TERM_NONE 28
+#define FEATURE_SIZE_FP_SMS_TERM_NONE 20
+#define FEATURE_SIZE_MAX_MIN_PART_PRED 13
+#define MAX_NUM_CLASSES_MAX_MIN_PART_PRED 4
+
+#define FEATURE_SMS_NONE_FLAG 1
+#define FEATURE_SMS_SPLIT_FLAG (1 << 1)
+#define FEATURE_SMS_RECT_FLAG (1 << 2)
+
+#define FEATURE_SMS_PRUNE_PART_FLAG \
+ (FEATURE_SMS_NONE_FLAG | FEATURE_SMS_SPLIT_FLAG | FEATURE_SMS_RECT_FLAG)
+#define FEATURE_SMS_SPLIT_MODEL_FLAG \
+ (FEATURE_SMS_NONE_FLAG | FEATURE_SMS_SPLIT_FLAG)
+
+// Number of sub-partitions in rectangular partition types.
+#define SUB_PARTITIONS_RECT 2
+
+// Number of sub-partitions in split partition type.
+#define SUB_PARTITIONS_SPLIT 4
+
+// Number of sub-partitions in AB partition types.
+#define SUB_PARTITIONS_AB 3
+
+// Number of sub-partitions in 4-way partition types.
+#define SUB_PARTITIONS_PART4 4
+
+// 4part parition types.
+enum { HORZ4 = 0, VERT4, NUM_PART4_TYPES } UENUM1BYTE(PART4_TYPES);
+
+// AB parition types.
+enum {
+ HORZ_A = 0,
+ HORZ_B,
+ VERT_A,
+ VERT_B,
+ NUM_AB_PARTS
+} UENUM1BYTE(AB_PART_TYPE);
+
+// Rectangular parition types.
+enum { HORZ = 0, VERT, NUM_RECT_PARTS } UENUM1BYTE(RECT_PART_TYPE);
+
+// Structure to keep win flags for HORZ and VERT partition evaluations.
+typedef struct {
+ int rect_part_win[NUM_RECT_PARTS];
+} RD_RECT_PART_WIN_INFO;
+
enum { PICK_MODE_RD = 0, PICK_MODE_NONRD };
enum {
@@ -218,47 +269,6 @@ static AOM_INLINE const FIRSTPASS_STATS *read_one_frame_stats(const TWO_PASS *p,
return &p->stats_buf_ctx->stats_in_start[frm];
}
-static BLOCK_SIZE dim_to_size(int dim) {
- switch (dim) {
- case 4: return BLOCK_4X4;
- case 8: return BLOCK_8X8;
- case 16: return BLOCK_16X16;
- case 32: return BLOCK_32X32;
- case 64: return BLOCK_64X64;
- case 128: return BLOCK_128X128;
- default: assert(0); return 0;
- }
-}
-
-static AOM_INLINE void set_max_min_partition_size(SuperBlockEnc *sb_enc,
- AV1_COMP *cpi, MACROBLOCK *x,
- const SPEED_FEATURES *sf,
- BLOCK_SIZE sb_size,
- int mi_row, int mi_col) {
- const AV1_COMMON *cm = &cpi->common;
-
- sb_enc->max_partition_size =
- AOMMIN(sf->part_sf.default_max_partition_size,
- dim_to_size(cpi->oxcf.part_cfg.max_partition_size));
- sb_enc->min_partition_size =
- AOMMAX(sf->part_sf.default_min_partition_size,
- dim_to_size(cpi->oxcf.part_cfg.min_partition_size));
- sb_enc->max_partition_size =
- AOMMIN(sb_enc->max_partition_size, cm->seq_params.sb_size);
- sb_enc->min_partition_size =
- AOMMIN(sb_enc->min_partition_size, cm->seq_params.sb_size);
-
- if (use_auto_max_partition(cpi, sb_size, mi_row, mi_col)) {
- float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f };
-
- av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features);
- sb_enc->max_partition_size =
- AOMMAX(AOMMIN(av1_predict_max_partition(cpi, x, features),
- sb_enc->max_partition_size),
- sb_enc->min_partition_size);
- }
-}
-
int av1_get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
int mi_col, int orig_rdmult);
@@ -335,6 +345,57 @@ void av1_set_cost_upd_freq(AV1_COMP *cpi, ThreadData *td,
const TileInfo *const tile_info, const int mi_row,
const int mi_col);
+static AOM_INLINE void av1_dealloc_mb_data(struct AV1Common *cm,
+ struct macroblock *mb) {
+ if (mb->txfm_search_info.txb_rd_records) {
+ aom_free(mb->txfm_search_info.txb_rd_records);
+ mb->txfm_search_info.txb_rd_records = NULL;
+ }
+ const int num_planes = av1_num_planes(cm);
+ for (int plane = 0; plane < num_planes; plane++) {
+ if (mb->plane[plane].src_diff) {
+ aom_free(mb->plane[plane].src_diff);
+ mb->plane[plane].src_diff = NULL;
+ }
+ }
+ if (mb->e_mbd.seg_mask) {
+ aom_free(mb->e_mbd.seg_mask);
+ mb->e_mbd.seg_mask = NULL;
+ }
+ if (mb->winner_mode_stats) {
+ aom_free(mb->winner_mode_stats);
+ mb->winner_mode_stats = NULL;
+ }
+}
+
+static AOM_INLINE void av1_alloc_mb_data(struct AV1Common *cm,
+ struct macroblock *mb,
+ int use_nonrd_pick_mode) {
+ if (!use_nonrd_pick_mode) {
+ mb->txfm_search_info.txb_rd_records =
+ (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords));
+ }
+ const int num_planes = av1_num_planes(cm);
+ for (int plane = 0; plane < num_planes; plane++) {
+ const int subsampling_xy =
+ plane ? cm->seq_params->subsampling_x + cm->seq_params->subsampling_y
+ : 0;
+ const int sb_size = MAX_SB_SQUARE >> subsampling_xy;
+ CHECK_MEM_ERROR(cm, mb->plane[plane].src_diff,
+ (int16_t *)aom_memalign(
+ 32, sizeof(*mb->plane[plane].src_diff) * sb_size));
+ }
+ CHECK_MEM_ERROR(cm, mb->e_mbd.seg_mask,
+ (uint8_t *)aom_memalign(
+ 16, 2 * MAX_SB_SQUARE * sizeof(mb->e_mbd.seg_mask[0])));
+ const int winner_mode_count = frame_is_intra_only(cm)
+ ? MAX_WINNER_MODE_COUNT_INTRA
+ : MAX_WINNER_MODE_COUNT_INTER;
+ CHECK_MEM_ERROR(cm, mb->winner_mode_stats,
+ (WinnerModeStats *)aom_malloc(
+ winner_mode_count * sizeof(mb->winner_mode_stats[0])));
+}
+
// This function will compute the number of reference frames to be disabled
// based on selective_ref_frame speed feature.
static AOM_INLINE unsigned int get_num_refs_to_disable(
@@ -359,7 +420,7 @@ static AOM_INLINE unsigned int get_num_refs_to_disable(
#if !CONFIG_REALTIME_ONLY
else if (is_stat_consumption_stage_twopass(cpi)) {
const FIRSTPASS_STATS *const this_frame_stats =
- read_one_frame_stats(&cpi->twopass, cur_frame_display_index);
+ read_one_frame_stats(&cpi->ppi->twopass, cur_frame_display_index);
aom_clear_system_state();
const double coded_error_per_mb =
this_frame_stats->coded_error / cpi->frame_info.num_mbs;
diff --git a/third_party/libaom/source/libaom/av1/encoder/encodemb.c b/third_party/libaom/source/libaom/av1/encoder/encodemb.c
index c9ee22034b..2a875e1223 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encodemb.c
+++ b/third_party/libaom/source/libaom/av1/encoder/encodemb.c
@@ -35,19 +35,19 @@
#include "av1/encoder/rd.h"
#include "av1/encoder/rdopt.h"
-void av1_subtract_block(const MACROBLOCKD *xd, int rows, int cols,
- int16_t *diff, ptrdiff_t diff_stride,
- const uint8_t *src8, ptrdiff_t src_stride,
- const uint8_t *pred8, ptrdiff_t pred_stride) {
+void av1_subtract_block(BitDepthInfo bd_info, int rows, int cols, int16_t *diff,
+ ptrdiff_t diff_stride, const uint8_t *src8,
+ ptrdiff_t src_stride, const uint8_t *pred8,
+ ptrdiff_t pred_stride) {
assert(rows >= 4 && cols >= 4);
#if CONFIG_AV1_HIGHBITDEPTH
- if (is_cur_buf_hbd(xd)) {
+ if (bd_info.use_highbitdepth_buf) {
aom_highbd_subtract_block(rows, cols, diff, diff_stride, src8, src_stride,
- pred8, pred_stride, xd->bd);
+ pred8, pred_stride, bd_info.bit_depth);
return;
}
#endif
- (void)xd;
+ (void)bd_info;
aom_subtract_block(rows, cols, diff, diff_stride, src8, src_stride, pred8,
pred_stride);
}
@@ -55,6 +55,7 @@ void av1_subtract_block(const MACROBLOCKD *xd, int rows, int cols,
void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
int blk_col, int blk_row, TX_SIZE tx_size) {
MACROBLOCKD *const xd = &x->e_mbd;
+ const BitDepthInfo bd_info = get_bit_depth_info(xd);
struct macroblock_plane *const p = &x->plane[plane];
const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane];
const int diff_stride = block_size_wide[plane_bsize];
@@ -66,8 +67,8 @@ void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
uint8_t *src = &p->src.buf[(blk_row * src_stride + blk_col) << MI_SIZE_LOG2];
int16_t *src_diff =
&p->src_diff[(blk_row * diff_stride + blk_col) << MI_SIZE_LOG2];
- av1_subtract_block(xd, tx1d_height, tx1d_width, src_diff, diff_stride, src,
- src_stride, dst, dst_stride);
+ av1_subtract_block(bd_info, tx1d_height, tx1d_width, src_diff, diff_stride,
+ src, src_stride, dst, dst_stride);
}
void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE plane_bsize, int plane) {
@@ -77,9 +78,10 @@ void av1_subtract_plane(MACROBLOCK *x, BLOCK_SIZE plane_bsize, int plane) {
const int bw = block_size_wide[plane_bsize];
const int bh = block_size_high[plane_bsize];
const MACROBLOCKD *xd = &x->e_mbd;
+ const BitDepthInfo bd_info = get_bit_depth_info(xd);
- av1_subtract_block(xd, bh, bw, p->src_diff, bw, p->src.buf, p->src.stride,
- pd->dst.buf, pd->dst.stride);
+ av1_subtract_block(bd_info, bh, bw, p->src_diff, bw, p->src.buf,
+ p->src.stride, pd->dst.buf, pd->dst.stride);
}
int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
@@ -132,13 +134,8 @@ const int DROPOUT_MULTIPLIER_Q_BASE = 32; // Base Q to compute multiplier.
void av1_dropout_qcoeff(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
TX_TYPE tx_type, int qindex) {
- const struct macroblock_plane *const p = &mb->plane[plane];
- tran_low_t *const qcoeff = p->qcoeff + BLOCK_OFFSET(block);
- tran_low_t *const dqcoeff = p->dqcoeff + BLOCK_OFFSET(block);
const int tx_width = tx_size_wide[tx_size];
const int tx_height = tx_size_high[tx_size];
- const int max_eob = av1_get_max_eob(tx_size);
- const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
// Early return if `qindex` is out of range.
if (qindex > DROPOUT_Q_MAX || qindex < DROPOUT_Q_MIN) {
@@ -156,6 +153,19 @@ void av1_dropout_qcoeff(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
multiplier *
CLIP(base_size, DROPOUT_AFTER_BASE_MIN, DROPOUT_AFTER_BASE_MAX);
+ av1_dropout_qcoeff_num(mb, plane, block, tx_size, tx_type, dropout_num_before,
+ dropout_num_after);
+}
+
+void av1_dropout_qcoeff_num(MACROBLOCK *mb, int plane, int block,
+ TX_SIZE tx_size, TX_TYPE tx_type,
+ int dropout_num_before, int dropout_num_after) {
+ const struct macroblock_plane *const p = &mb->plane[plane];
+ tran_low_t *const qcoeff = p->qcoeff + BLOCK_OFFSET(block);
+ tran_low_t *const dqcoeff = p->dqcoeff + BLOCK_OFFSET(block);
+ const int max_eob = av1_get_max_eob(tx_size);
+ const SCAN_ORDER *const scan_order = get_scan(tx_size, tx_type);
+
// Early return if there are not enough non-zero coefficients.
if (p->eobs[block] == 0 || p->eobs[block] <= dropout_num_before) {
return;
@@ -172,7 +182,8 @@ void av1_dropout_qcoeff(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
for (int i = 0; i < p->eobs[block]; ++i) {
const int scan_idx = scan_order->scan[i];
- if (qcoeff[scan_idx] > DROPOUT_COEFF_MAX) { // Keep large coefficients.
+ if (abs(qcoeff[scan_idx]) > DROPOUT_COEFF_MAX) {
+ // Keep large coefficients.
count_zeros_before = 0;
count_zeros_after = 0;
idx = -1;
@@ -197,6 +208,7 @@ void av1_dropout_qcoeff(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
if (count_nonzeros > DROPOUT_CONTINUITY_MAX) {
count_zeros_before = 0;
count_zeros_after = 0;
+ count_nonzeros = 0;
idx = -1;
eob = i + 1;
}
@@ -513,15 +525,17 @@ static void encode_block_inter(int plane, int block, int blk_row, int blk_col,
const int bsw = tx_size_wide_unit[sub_txs];
const int bsh = tx_size_high_unit[sub_txs];
const int step = bsh * bsw;
+ const int row_end =
+ AOMMIN(tx_size_high_unit[tx_size], max_blocks_high - blk_row);
+ const int col_end =
+ AOMMIN(tx_size_wide_unit[tx_size], max_blocks_wide - blk_col);
assert(bsw > 0 && bsh > 0);
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- const int offsetr = blk_row + row;
+ for (int row = 0; row < row_end; row += bsh) {
+ const int offsetr = blk_row + row;
+ for (int col = 0; col < col_end; col += bsw) {
const int offsetc = blk_col + col;
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-
encode_block_inter(plane, block, offsetr, offsetc, plane_bsize, sub_txs,
arg, dry_run);
block += step;
diff --git a/third_party/libaom/source/libaom/av1/encoder/encodemb.h b/third_party/libaom/source/libaom/av1/encoder/encodemb.h
index fcd34a3908..f2dc956a65 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encodemb.h
+++ b/third_party/libaom/source/libaom/av1/encoder/encodemb.h
@@ -123,11 +123,16 @@ int av1_optimize_b(const struct AV1_COMP *cpi, MACROBLOCK *mb, int plane,
// `txb_entropy_ctx`, which `mb` points to, may be modified by this function.
void av1_dropout_qcoeff(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size,
TX_TYPE tx_type, int qindex);
-
-void av1_subtract_block(const MACROBLOCKD *xd, int rows, int cols,
- int16_t *diff, ptrdiff_t diff_stride,
- const uint8_t *src8, ptrdiff_t src_stride,
- const uint8_t *pred8, ptrdiff_t pred_stride);
+// Same as above, with the number of zeroes needed before/after a coeff to drop
+// it explicitly passed in, instead of being derived from qindex.
+void av1_dropout_qcoeff_num(MACROBLOCK *mb, int plane, int block,
+ TX_SIZE tx_size, TX_TYPE tx_type,
+ int dropout_num_before, int dropout_num_after);
+
+void av1_subtract_block(BitDepthInfo bd_info, int rows, int cols, int16_t *diff,
+ ptrdiff_t diff_stride, const uint8_t *src8,
+ ptrdiff_t src_stride, const uint8_t *pred8,
+ ptrdiff_t pred_stride);
void av1_subtract_txb(MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize,
int blk_col, int blk_row, TX_SIZE tx_size);
diff --git a/third_party/libaom/source/libaom/av1/encoder/encodemv.c b/third_party/libaom/source/libaom/av1/encoder/encodemv.c
index 86c6156d8f..4a7d87408c 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encodemv.c
+++ b/third_party/libaom/source/libaom/av1/encoder/encodemv.c
@@ -173,8 +173,8 @@ static void build_nmv_component_cost_table(int *mvcost,
}
}
-void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
- nmv_context *mvctx, int usehp) {
+void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, ThreadData *td, const MV *mv,
+ const MV *ref, nmv_context *mvctx, int usehp) {
const MV diff = { mv->row - ref->row, mv->col - ref->col };
const MV_JOINT_TYPE j = av1_get_mv_joint(&diff);
// If the mv_diff is zero, then we should have used near or nearest instead.
@@ -193,8 +193,7 @@ void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
// motion vector component used.
if (cpi->sf.mv_sf.auto_mv_step_size) {
int maxv = AOMMAX(abs(mv->row), abs(mv->col)) >> 3;
- cpi->mv_search_params.max_mv_magnitude =
- AOMMAX(maxv, cpi->mv_search_params.max_mv_magnitude);
+ td->max_mv_magnitude = AOMMAX(maxv, td->max_mv_magnitude);
}
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/encodemv.h b/third_party/libaom/source/libaom/av1/encoder/encodemv.h
index 9f0d607295..962844bc79 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encodemv.h
+++ b/third_party/libaom/source/libaom/av1/encoder/encodemv.h
@@ -18,8 +18,8 @@
extern "C" {
#endif
-void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, const MV *mv, const MV *ref,
- nmv_context *mvctx, int usehp);
+void av1_encode_mv(AV1_COMP *cpi, aom_writer *w, ThreadData *td, const MV *mv,
+ const MV *ref, nmv_context *mvctx, int usehp);
void av1_update_mv_stats(const MV *mv, const MV *ref, nmv_context *mvctx,
MvSubpelPrecision precision);
diff --git a/third_party/libaom/source/libaom/av1/encoder/encoder.c b/third_party/libaom/source/libaom/av1/encoder/encoder.c
index 955d15631c..41122ef45b 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encoder.c
+++ b/third_party/libaom/source/libaom/av1/encoder/encoder.c
@@ -51,6 +51,7 @@
#include "av1/encoder/aq_variance.h"
#include "av1/encoder/bitstream.h"
#include "av1/encoder/context_tree.h"
+#include "av1/encoder/dwt.h"
#include "av1/encoder/encodeframe.h"
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encode_strategy.h"
@@ -81,10 +82,6 @@
#define DEFAULT_EXPLICIT_ORDER_HINT_BITS 7
-#if CONFIG_ENTROPY_STATS
-FRAME_COUNTS aggregate_fc;
-#endif // CONFIG_ENTROPY_STATS
-
// #define OUTPUT_YUV_REC
#ifdef OUTPUT_YUV_REC
FILE *yuv_rec_file;
@@ -228,7 +225,7 @@ double av1_get_compression_ratio(const AV1_COMMON *const cm,
const int upscaled_width = cm->superres_upscaled_width;
const int height = cm->height;
const int luma_pic_size = upscaled_width * height;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const BITSTREAM_PROFILE profile = seq_params->profile;
const int pic_size_profile_factor =
profile == PROFILE_0 ? 15 : (profile == PROFILE_1 ? 30 : 36);
@@ -242,7 +239,7 @@ double av1_get_compression_ratio(const AV1_COMMON *const cm,
static void set_tile_info(AV1_COMMON *const cm,
const TileConfig *const tile_cfg) {
const CommonModeInfoParams *const mi_params = &cm->mi_params;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
CommonTileParams *const tiles = &cm->tiles;
int i, start_sb;
@@ -298,7 +295,7 @@ void av1_update_frame_size(AV1_COMP *cpi) {
// We need to reallocate the context buffers here in case we need more mis.
if (av1_alloc_context_buffers(cm, cm->width, cm->height)) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate context buffers");
}
av1_init_mi_buffers(&cm->mi_params);
@@ -308,8 +305,10 @@ void av1_update_frame_size(AV1_COMP *cpi) {
if (!is_stat_generation_stage(cpi))
alloc_context_buffers_ext(cm, &cpi->mbmi_ext_info);
- if (!cpi->seq_params_locked)
- set_sb_size(&cm->seq_params, av1_select_sb_size(cpi));
+ if (!cpi->ppi->seq_params_locked)
+ set_sb_size(cm->seq_params,
+ av1_select_sb_size(&cpi->oxcf, cm->width, cm->height,
+ cpi->svc.number_spatial_layers));
set_tile_info(cm, &cpi->oxcf.tile_cfg);
}
@@ -327,9 +326,9 @@ static INLINE int does_level_match(int width, int height, double fps,
height <= lvl_height * lvl_dim_mult;
}
-static void set_bitstream_level_tier(SequenceHeader *seq, AV1_COMMON *cm,
- int width, int height,
- double init_framerate) {
+static void set_bitstream_level_tier(AV1_PRIMARY *const ppi, int width,
+ int height, double init_framerate) {
+ SequenceHeader *const seq_params = &ppi->seq_params;
// TODO(any): This is a placeholder function that only addresses dimensions
// and max display sample rates.
// Need to add checks for max bit rate, max decoded luma sample rate, header
@@ -372,26 +371,26 @@ static void set_bitstream_level_tier(SequenceHeader *seq, AV1_COMMON *cm,
level = SEQ_LEVEL_6_2;
}
- SequenceHeader *const seq_params = &cm->seq_params;
for (int i = 0; i < MAX_NUM_OPERATING_POINTS; ++i) {
- seq->seq_level_idx[i] = level;
+ seq_params->seq_level_idx[i] = level;
// Set the maximum parameters for bitrate and buffer size for this profile,
// level, and tier
seq_params->op_params[i].bitrate = av1_max_level_bitrate(
- cm->seq_params.profile, seq->seq_level_idx[i], seq->tier[i]);
+ seq_params->profile, seq_params->seq_level_idx[i], seq_params->tier[i]);
// Level with seq_level_idx = 31 returns a high "dummy" bitrate to pass the
// check
if (seq_params->op_params[i].bitrate == 0)
aom_internal_error(
- &cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ &ppi->error, AOM_CODEC_UNSUP_BITSTREAM,
"AV1 does not support this combination of profile, level, and tier.");
// Buffer size in bits/s is bitrate in bits/s * 1 s
seq_params->op_params[i].buffer_size = seq_params->op_params[i].bitrate;
}
}
-void av1_init_seq_coding_tools(SequenceHeader *seq, AV1_COMMON *cm,
+void av1_init_seq_coding_tools(AV1_PRIMARY *const ppi,
const AV1EncoderConfig *oxcf, int use_svc) {
+ SequenceHeader *const seq = &ppi->seq_params;
const FrameDimensionCfg *const frm_dim_cfg = &oxcf->frm_dim_cfg;
const ToolCfg *const tool_cfg = &oxcf->tool_cfg;
@@ -449,7 +448,7 @@ void av1_init_seq_coding_tools(SequenceHeader *seq, AV1_COMMON *cm,
seq->enable_intra_edge_filter = oxcf->intra_mode_cfg.enable_intra_edge_filter;
seq->enable_filter_intra = oxcf->intra_mode_cfg.enable_filter_intra;
- set_bitstream_level_tier(seq, cm, frm_dim_cfg->width, frm_dim_cfg->height,
+ set_bitstream_level_tier(ppi, frm_dim_cfg->width, frm_dim_cfg->height,
oxcf->input_cfg.init_framerate);
if (seq->operating_points_cnt_minus_1 == 0) {
@@ -461,26 +460,27 @@ void av1_init_seq_coding_tools(SequenceHeader *seq, AV1_COMMON *cm,
// skip decoding enhancement layers (temporal first).
int i = 0;
assert(seq->operating_points_cnt_minus_1 ==
- (int)(cm->number_spatial_layers * cm->number_temporal_layers - 1));
- for (unsigned int sl = 0; sl < cm->number_spatial_layers; sl++) {
- for (unsigned int tl = 0; tl < cm->number_temporal_layers; tl++) {
+ (int)(ppi->number_spatial_layers * ppi->number_temporal_layers - 1));
+ for (unsigned int sl = 0; sl < ppi->number_spatial_layers; sl++) {
+ for (unsigned int tl = 0; tl < ppi->number_temporal_layers; tl++) {
seq->operating_point_idc[i] =
- (~(~0u << (cm->number_spatial_layers - sl)) << 8) |
- ~(~0u << (cm->number_temporal_layers - tl));
+ (~(~0u << (ppi->number_spatial_layers - sl)) << 8) |
+ ~(~0u << (ppi->number_temporal_layers - tl));
i++;
}
}
}
}
-static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
- AV1_COMMON *const cm = &cpi->common;
- SequenceHeader *const seq_params = &cm->seq_params;
- ResizePendingParams *resize_pending_params = &cpi->resize_pending_params;
+static void init_config_sequence(struct AV1_PRIMARY *ppi,
+ AV1EncoderConfig *oxcf) {
+ SequenceHeader *const seq_params = &ppi->seq_params;
const DecoderModelCfg *const dec_model_cfg = &oxcf->dec_model_cfg;
const ColorCfg *const color_cfg = &oxcf->color_cfg;
- cpi->oxcf = *oxcf;
- cpi->framerate = oxcf->input_cfg.init_framerate;
+
+ ppi->use_svc = 0;
+ ppi->number_spatial_layers = 1;
+ ppi->number_temporal_layers = 1;
seq_params->profile = oxcf->profile;
seq_params->bit_depth = oxcf->tool_cfg.bit_depth;
@@ -508,7 +508,7 @@ static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
// set the decoder model parameters in schedule mode
seq_params->decoder_model_info.num_units_in_decoding_tick =
dec_model_cfg->num_units_in_decoding_tick;
- cm->buffer_removal_time_present = 1;
+ ppi->buffer_removal_time_present = 1;
av1_set_aom_dec_model_info(&seq_params->decoder_model_info);
av1_set_dec_model_op_parameters(&seq_params->op_params[0]);
} else if (seq_params->timing_info_present &&
@@ -546,11 +546,19 @@ static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
}
}
}
+ av1_change_config_seq(ppi, oxcf, NULL);
+}
+
+static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
+ AV1_COMMON *const cm = &cpi->common;
+ ResizePendingParams *resize_pending_params = &cpi->resize_pending_params;
+
+ cpi->oxcf = *oxcf;
+ cpi->framerate = oxcf->input_cfg.init_framerate;
cm->width = oxcf->frm_dim_cfg.width;
cm->height = oxcf->frm_dim_cfg.height;
- set_sb_size(seq_params,
- av1_select_sb_size(cpi)); // set sb size before allocations
+
alloc_compressor_data(cpi);
av1_update_film_grain_parameters(cpi, oxcf);
@@ -559,18 +567,15 @@ static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
cpi->td.counts = &cpi->counts;
// Set init SVC parameters.
- cpi->use_svc = 0;
- cpi->svc.external_ref_frame_config = 0;
+ cpi->svc.set_ref_frame_config = 0;
cpi->svc.non_reference_frame = 0;
cpi->svc.number_spatial_layers = 1;
cpi->svc.number_temporal_layers = 1;
- cm->number_spatial_layers = 1;
- cm->number_temporal_layers = 1;
cm->spatial_layer_id = 0;
cm->temporal_layer_id = 0;
// change includes all joint functionality
- av1_change_config(cpi, oxcf);
+ av1_change_config(cpi, oxcf, true);
cpi->ref_frame_flags = 0;
@@ -583,25 +588,13 @@ static void init_config(struct AV1_COMP *cpi, AV1EncoderConfig *oxcf) {
av1_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height);
}
-void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
- AV1_COMMON *const cm = &cpi->common;
- SequenceHeader *const seq_params = &cm->seq_params;
- RATE_CONTROL *const rc = &cpi->rc;
- MACROBLOCK *const x = &cpi->td.mb;
- AV1LevelParams *const level_params = &cpi->level_params;
- InitialDimensions *const initial_dimensions = &cpi->initial_dimensions;
- RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
- const FrameDimensionCfg *const frm_dim_cfg = &cpi->oxcf.frm_dim_cfg;
+void av1_change_config_seq(struct AV1_PRIMARY *ppi,
+ const AV1EncoderConfig *oxcf,
+ bool *is_sb_size_changed) {
+ SequenceHeader *const seq_params = &ppi->seq_params;
+ const FrameDimensionCfg *const frm_dim_cfg = &oxcf->frm_dim_cfg;
const DecoderModelCfg *const dec_model_cfg = &oxcf->dec_model_cfg;
const ColorCfg *const color_cfg = &oxcf->color_cfg;
- const RateControlCfg *const rc_cfg = &oxcf->rc_cfg;
- // in case of LAP, lag in frames is set according to number of lap buffers
- // calculated at init time. This stores and restores LAP's lag in frames to
- // prevent override by new cfg.
- int lap_lag_in_frames = -1;
- if (cpi->lap_enabled && cpi->compressor_stage == LAP_STAGE) {
- lap_lag_in_frames = cpi->oxcf.gf_cfg.lag_in_frames;
- }
if (seq_params->profile != oxcf->profile) seq_params->profile = oxcf->profile;
seq_params->bit_depth = oxcf->tool_cfg.bit_depth;
@@ -632,7 +625,7 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
// set the decoder model parameters in schedule mode
seq_params->decoder_model_info.num_units_in_decoding_tick =
dec_model_cfg->num_units_in_decoding_tick;
- cm->buffer_removal_time_present = 1;
+ ppi->buffer_removal_time_present = 1;
av1_set_aom_dec_model_info(&seq_params->decoder_model_info);
av1_set_dec_model_op_parameters(&seq_params->op_params[0]);
} else if (seq_params->timing_info_present &&
@@ -645,6 +638,56 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
10; // Default value (not signaled)
}
+ av1_update_film_grain_parameters_seq(ppi, oxcf);
+
+ int sb_size = seq_params->sb_size;
+ // Superblock size should not be updated after the first key frame.
+ if (!ppi->seq_params_locked) {
+ set_sb_size(seq_params, av1_select_sb_size(oxcf, frm_dim_cfg->width,
+ frm_dim_cfg->height,
+ ppi->number_spatial_layers));
+ for (int i = 0; i < MAX_NUM_OPERATING_POINTS; ++i)
+ seq_params->tier[i] = (oxcf->tier_mask >> i) & 1;
+ }
+ if (is_sb_size_changed != NULL && sb_size != seq_params->sb_size)
+ *is_sb_size_changed = true;
+
+ // Init sequence level coding tools
+ // This should not be called after the first key frame.
+ if (!ppi->seq_params_locked) {
+ seq_params->operating_points_cnt_minus_1 =
+ (ppi->number_spatial_layers > 1 || ppi->number_temporal_layers > 1)
+ ? ppi->number_spatial_layers * ppi->number_temporal_layers - 1
+ : 0;
+ av1_init_seq_coding_tools(ppi, oxcf, ppi->use_svc);
+ }
+
+#if CONFIG_AV1_HIGHBITDEPTH
+ highbd_set_var_fns(ppi);
+#endif
+}
+
+void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf,
+ bool is_sb_size_changed) {
+ AV1_COMMON *const cm = &cpi->common;
+ SequenceHeader *const seq_params = cm->seq_params;
+ RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ MACROBLOCK *const x = &cpi->td.mb;
+ AV1LevelParams *const level_params = &cpi->ppi->level_params;
+ InitialDimensions *const initial_dimensions = &cpi->initial_dimensions;
+ RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
+ const FrameDimensionCfg *const frm_dim_cfg = &cpi->oxcf.frm_dim_cfg;
+ const RateControlCfg *const rc_cfg = &oxcf->rc_cfg;
+
+ // in case of LAP, lag in frames is set according to number of lap buffers
+ // calculated at init time. This stores and restores LAP's lag in frames to
+ // prevent override by new cfg.
+ int lap_lag_in_frames = -1;
+ if (cpi->ppi->lap_enabled && cpi->compressor_stage == LAP_STAGE) {
+ lap_lag_in_frames = cpi->oxcf.gf_cfg.lag_in_frames;
+ }
+
av1_update_film_grain_parameters(cpi, oxcf);
cpi->oxcf = *oxcf;
@@ -680,10 +723,10 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
seq_params->tier[0]);
}
- if ((has_no_stats_stage(cpi)) && (rc_cfg->mode == AOM_Q)) {
- rc->baseline_gf_interval = FIXED_GF_INTERVAL;
+ if (has_no_stats_stage(cpi) && (rc_cfg->mode == AOM_Q)) {
+ p_rc->baseline_gf_interval = FIXED_GF_INTERVAL;
} else {
- rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
+ p_rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2;
}
refresh_frame_flags->golden_frame = false;
@@ -720,16 +763,23 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
}
}
+ if (x->pixel_gradient_info == NULL) {
+ const int plane_types = PLANE_TYPES >> cm->seq_params->monochrome;
+ CHECK_MEM_ERROR(cm, x->pixel_gradient_info,
+ aom_malloc(sizeof(*x->pixel_gradient_info) * plane_types *
+ MAX_SB_SQUARE));
+ }
+
av1_reset_segment_features(cm);
av1_set_high_precision_mv(cpi, 1, 0);
- set_rc_buffer_sizes(rc, rc_cfg);
+ set_rc_buffer_sizes(cpi);
// Under a configuration change, where maximum_buffer_size may change,
// keep buffer level clipped to the maximum allowed buffer size.
- rc->bits_off_target = AOMMIN(rc->bits_off_target, rc->maximum_buffer_size);
- rc->buffer_level = AOMMIN(rc->buffer_level, rc->maximum_buffer_size);
+ rc->bits_off_target = AOMMIN(rc->bits_off_target, p_rc->maximum_buffer_size);
+ rc->buffer_level = AOMMIN(rc->buffer_level, p_rc->maximum_buffer_size);
// Set up frame rate and related parameters rate control values.
av1_new_framerate(cpi, cpi->framerate);
@@ -752,18 +802,9 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
cm->width = frm_dim_cfg->width;
cm->height = frm_dim_cfg->height;
- int sb_size = seq_params->sb_size;
- // Superblock size should not be updated after the first key frame.
- if (!cpi->seq_params_locked) {
- set_sb_size(&cm->seq_params, av1_select_sb_size(cpi));
- for (int i = 0; i < MAX_NUM_OPERATING_POINTS; ++i)
- seq_params->tier[i] = (oxcf->tier_mask >> i) & 1;
- }
-
- if (initial_dimensions->width || sb_size != seq_params->sb_size) {
+ if (initial_dimensions->width || is_sb_size_changed) {
if (cm->width > initial_dimensions->width ||
- cm->height > initial_dimensions->height ||
- seq_params->sb_size != sb_size) {
+ cm->height > initial_dimensions->height || is_sb_size_changed) {
av1_free_context_buffers(cm);
av1_free_shared_coeff_buffer(&cpi->td.shared_coeff_buf);
av1_free_sms_tree(&cpi->td);
@@ -780,27 +821,15 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
set_tile_info(cm, &cpi->oxcf.tile_cfg);
- if (!cpi->svc.external_ref_frame_config)
+ if (!cpi->svc.set_ref_frame_config)
cpi->ext_flags.refresh_frame.update_pending = 0;
cpi->ext_flags.refresh_frame_context_pending = 0;
-#if CONFIG_AV1_HIGHBITDEPTH
- highbd_set_var_fns(cpi);
-#endif
-
- // Init sequence level coding tools
- // This should not be called after the first key frame.
- if (!cpi->seq_params_locked) {
- seq_params->operating_points_cnt_minus_1 =
- (cm->number_spatial_layers > 1 || cm->number_temporal_layers > 1)
- ? cm->number_spatial_layers * cm->number_temporal_layers - 1
- : 0;
- av1_init_seq_coding_tools(&cm->seq_params, cm, oxcf, cpi->use_svc);
- }
-
- if (cpi->use_svc)
+ if (cpi->ppi->use_svc)
av1_update_layer_context_change_config(cpi, rc_cfg->target_bandwidth);
+ check_reset_rc_flag(cpi);
+
// restore the value of lag_in_frame for LAP stage.
if (lap_lag_in_frames != -1) {
cpi->oxcf.gf_cfg.lag_in_frames = lap_lag_in_frames;
@@ -810,7 +839,7 @@ void av1_change_config(struct AV1_COMP *cpi, const AV1EncoderConfig *oxcf) {
static INLINE void init_frame_info(FRAME_INFO *frame_info,
const AV1_COMMON *const cm) {
const CommonModeInfoParams *const mi_params = &cm->mi_params;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
frame_info->frame_width = cm->width;
frame_info->frame_height = cm->height;
frame_info->mi_cols = mi_params->mi_cols;
@@ -834,73 +863,44 @@ static INLINE void update_frame_index_set(FRAME_INDEX_SET *frame_index_set,
}
}
-AV1_PRIMARY *av1_create_primary_compressor() {
+AV1_PRIMARY *av1_create_primary_compressor(
+ struct aom_codec_pkt_list *pkt_list_head, int num_lap_buffers,
+ AV1EncoderConfig *oxcf) {
AV1_PRIMARY *volatile const ppi = aom_memalign(32, sizeof(AV1_PRIMARY));
if (!ppi) return NULL;
av1_zero(*ppi);
- return ppi;
-}
-
-AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
- BufferPool *const pool,
- FIRSTPASS_STATS *frame_stats_buf,
- COMPRESSOR_STAGE stage, int num_lap_buffers,
- int lap_lag_in_frames,
- STATS_BUFFER_CTX *stats_buf_context) {
- AV1_COMP *volatile const cpi = aom_memalign(32, sizeof(AV1_COMP));
- AV1_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
-
- if (!cm) return NULL;
-
- av1_zero(*cpi);
-
- cpi->ppi = ppi;
-
// The jmp_buf is valid only for the duration of the function that calls
// setjmp(). Therefore, this function must reset the 'setjmp' field to 0
// before it returns.
- if (setjmp(cm->error.jmp)) {
- cm->error.setjmp = 0;
- av1_remove_compressor(cpi);
+ if (setjmp(ppi->error.jmp)) {
+ ppi->error.setjmp = 0;
+ av1_remove_primary_compressor(ppi);
return 0;
}
+ ppi->error.setjmp = 1;
- cm->error.setjmp = 1;
- cpi->lap_enabled = num_lap_buffers > 0;
- cpi->compressor_stage = stage;
-
- CommonModeInfoParams *const mi_params = &cm->mi_params;
- mi_params->free_mi = enc_free_mi;
- mi_params->setup_mi = enc_setup_mi;
- mi_params->set_mb_mi = (oxcf->pass == 1 || cpi->compressor_stage == LAP_STAGE)
- ? stat_stage_set_mb_mi
- : enc_set_mb_mi;
-
- mi_params->mi_alloc_bsize = BLOCK_4X4;
-
- CHECK_MEM_ERROR(cm, cm->fc,
- (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
- CHECK_MEM_ERROR(
- cm, cm->default_frame_context,
- (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->default_frame_context)));
- memset(cm->fc, 0, sizeof(*cm->fc));
- memset(cm->default_frame_context, 0, sizeof(*cm->default_frame_context));
-
- cpi->common.buffer_pool = pool;
+ ppi->seq_params_locked = 0;
+ ppi->lap_enabled = num_lap_buffers > 0;
+ ppi->output_pkt_list = pkt_list_head;
+ ppi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
+ ppi->frames_left = oxcf->input_cfg.limit;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ ppi->max_mv_magnitude = 0;
+ ppi->num_fp_contexts = 1;
+#endif
- init_config(cpi, oxcf);
- if (cpi->compressor_stage == LAP_STAGE) {
- cpi->oxcf.gf_cfg.lag_in_frames = lap_lag_in_frames;
- }
+ init_config_sequence(ppi, oxcf);
- cpi->frames_left = cpi->oxcf.input_cfg.limit;
+#if CONFIG_ENTROPY_STATS
+ av1_zero(ppi->aggregate_fc);
+#endif // CONFIG_ENTROPY_STATS
- av1_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
+ av1_primary_rc_init(oxcf, &ppi->p_rc);
// For two pass and lag_in_frames > 33 in LAP.
- cpi->rc.enable_scenecut_detection = ENABLE_SCENECUT_MODE_2;
- if (cpi->lap_enabled) {
+ ppi->p_rc.enable_scenecut_detection = ENABLE_SCENECUT_MODE_2;
+ if (ppi->lap_enabled) {
if ((num_lap_buffers <
(MAX_GF_LENGTH_LAP + SCENE_CUT_KEY_TEST_INTERVAL + 1)) &&
num_lap_buffers >= (MAX_GF_LENGTH_LAP + 3)) {
@@ -908,219 +908,22 @@ AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
* For lag in frames >= 19 and <33, enable scenecut
* with limited future frame prediction.
*/
- cpi->rc.enable_scenecut_detection = ENABLE_SCENECUT_MODE_1;
+ ppi->p_rc.enable_scenecut_detection = ENABLE_SCENECUT_MODE_1;
} else if (num_lap_buffers < (MAX_GF_LENGTH_LAP + 3)) {
// Disable scenecut when lag_in_frames < 19.
- cpi->rc.enable_scenecut_detection = DISABLE_SCENECUT;
+ ppi->p_rc.enable_scenecut_detection = DISABLE_SCENECUT;
}
}
- init_frame_info(&cpi->frame_info, cm);
- init_frame_index_set(&cpi->frame_index_set);
-
- cm->current_frame.frame_number = 0;
- cm->current_frame_id = -1;
- cpi->seq_params_locked = 0;
- cpi->partition_search_skippable_frame = 0;
- cpi->tile_data = NULL;
- cpi->last_show_frame_buf = NULL;
- realloc_segmentation_maps(cpi);
-
- cpi->refresh_frame.alt_ref_frame = false;
-
- cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
-#if CONFIG_INTERNAL_STATS
- cpi->b_calculate_blockiness = 1;
- cpi->b_calculate_consistency = 1;
- cpi->total_inconsistency = 0;
- cpi->psnr[0].worst = 100.0;
- cpi->psnr[1].worst = 100.0;
- cpi->worst_ssim = 100.0;
- cpi->worst_ssim_hbd = 100.0;
-
- cpi->count[0] = 0;
- cpi->count[1] = 0;
- cpi->bytes = 0;
-#if CONFIG_SPEED_STATS
- cpi->tx_search_count = 0;
-#endif // CONFIG_SPEED_STATS
-
- if (cpi->b_calculate_psnr) {
- cpi->total_sq_error[0] = 0;
- cpi->total_samples[0] = 0;
- cpi->total_sq_error[1] = 0;
- cpi->total_samples[1] = 0;
- cpi->tot_recode_hits = 0;
- cpi->summed_quality = 0;
- cpi->summed_weights = 0;
- cpi->summed_quality_hbd = 0;
- cpi->summed_weights_hbd = 0;
- }
-
- cpi->fastssim.worst = 100.0;
- cpi->psnrhvs.worst = 100.0;
-
- if (cpi->b_calculate_blockiness) {
- cpi->total_blockiness = 0;
- cpi->worst_blockiness = 0.0;
- }
-
- if (cpi->b_calculate_consistency) {
- CHECK_MEM_ERROR(
- cm, cpi->ssim_vars,
- aom_malloc(sizeof(*cpi->ssim_vars) * 4 * cpi->common.mi_params.mi_rows *
- cpi->common.mi_params.mi_cols));
- cpi->worst_consistency = 100.0;
- }
-#endif
-#if CONFIG_ENTROPY_STATS
- av1_zero(aggregate_fc);
-#endif // CONFIG_ENTROPY_STATS
-
- cpi->time_stamps.first_ts_start = INT64_MAX;
-
-#ifdef OUTPUT_YUV_REC
- yuv_rec_file = fopen("rec.yuv", "wb");
-#endif
-#ifdef OUTPUT_YUV_DENOISED
- yuv_denoised_file = fopen("denoised.yuv", "wb");
-#endif
-
- assert(MAX_LAP_BUFFERS >= MAX_LAG_BUFFERS);
- int size = get_stats_buf_size(num_lap_buffers, MAX_LAG_BUFFERS);
- for (int i = 0; i < size; i++)
- cpi->twopass.frame_stats_arr[i] = &frame_stats_buf[i];
-
- cpi->twopass.stats_buf_ctx = stats_buf_context;
- cpi->twopass.stats_in = cpi->twopass.stats_buf_ctx->stats_in_start;
-
-#if !CONFIG_REALTIME_ONLY
- if (is_stat_consumption_stage(cpi)) {
- const size_t packet_sz = sizeof(FIRSTPASS_STATS);
- const int packets = (int)(oxcf->twopass_stats_in.sz / packet_sz);
-
- if (!cpi->lap_enabled) {
- /*Re-initialize to stats buffer, populated by application in the case of
- * two pass*/
- cpi->twopass.stats_buf_ctx->stats_in_start = oxcf->twopass_stats_in.buf;
- cpi->twopass.stats_in = cpi->twopass.stats_buf_ctx->stats_in_start;
- cpi->twopass.stats_buf_ctx->stats_in_end =
- &cpi->twopass.stats_buf_ctx->stats_in_start[packets - 1];
-
- av1_init_second_pass(cpi);
- } else {
- av1_init_single_pass_lap(cpi);
- }
- }
-#endif
-
- alloc_obmc_buffers(&cpi->td.mb.obmc_buffer, cm);
-
- CHECK_MEM_ERROR(
- cm, cpi->td.mb.inter_modes_info,
- (InterModesInfo *)aom_malloc(sizeof(*cpi->td.mb.inter_modes_info)));
-
- for (int x = 0; x < 2; x++)
- for (int y = 0; y < 2; y++)
- CHECK_MEM_ERROR(
- cm, cpi->td.mb.intrabc_hash_info.hash_value_buffer[x][y],
- (uint32_t *)aom_malloc(
- AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
- sizeof(*cpi->td.mb.intrabc_hash_info.hash_value_buffer[0][0])));
-
- cpi->td.mb.intrabc_hash_info.g_crc_initialized = 0;
-
- av1_set_speed_features_framesize_independent(cpi, oxcf->speed);
- av1_set_speed_features_framesize_dependent(cpi, oxcf->speed);
-
- CHECK_MEM_ERROR(cm, cpi->consec_zero_mv,
- aom_calloc((mi_params->mi_rows * mi_params->mi_cols) >> 2,
- sizeof(*cpi->consec_zero_mv)));
-
- {
- const int bsize = BLOCK_16X16;
- const int w = mi_size_wide[bsize];
- const int h = mi_size_high[bsize];
- const int num_cols = (mi_params->mi_cols + w - 1) / w;
- const int num_rows = (mi_params->mi_rows + h - 1) / h;
- CHECK_MEM_ERROR(cm, cpi->tpl_rdmult_scaling_factors,
- aom_calloc(num_rows * num_cols,
- sizeof(*cpi->tpl_rdmult_scaling_factors)));
- CHECK_MEM_ERROR(cm, cpi->tpl_sb_rdmult_scaling_factors,
- aom_calloc(num_rows * num_cols,
- sizeof(*cpi->tpl_sb_rdmult_scaling_factors)));
- }
-
- {
- const int bsize = BLOCK_16X16;
- const int w = mi_size_wide[bsize];
- const int h = mi_size_high[bsize];
- const int num_cols = (mi_params->mi_cols + w - 1) / w;
- const int num_rows = (mi_params->mi_rows + h - 1) / h;
- CHECK_MEM_ERROR(cm, cpi->ssim_rdmult_scaling_factors,
- aom_calloc(num_rows * num_cols,
- sizeof(*cpi->ssim_rdmult_scaling_factors)));
- }
-
-#if CONFIG_TUNE_VMAF
- {
- const int bsize = BLOCK_64X64;
- const int w = mi_size_wide[bsize];
- const int h = mi_size_high[bsize];
- const int num_cols = (mi_params->mi_cols + w - 1) / w;
- const int num_rows = (mi_params->mi_rows + h - 1) / h;
- CHECK_MEM_ERROR(cm, cpi->vmaf_info.rdmult_scaling_factors,
- aom_calloc(num_rows * num_cols,
- sizeof(*cpi->vmaf_info.rdmult_scaling_factors)));
- for (int i = 0; i < MAX_ARF_LAYERS; i++) {
- cpi->vmaf_info.last_frame_unsharp_amount[i] = -1.0;
- cpi->vmaf_info.last_frame_ysse[i] = -1.0;
- cpi->vmaf_info.last_frame_vmaf[i] = -1.0;
- }
- cpi->vmaf_info.original_qindex = -1;
-
-#if CONFIG_USE_VMAF_RC
- cpi->vmaf_info.vmaf_model = NULL;
-#endif
- }
-#endif
-
-#if CONFIG_TUNE_BUTTERAUGLI
- {
- const int w = mi_size_wide[butteraugli_rdo_bsize];
- const int h = mi_size_high[butteraugli_rdo_bsize];
- const int num_cols = (mi_params->mi_cols + w - 1) / w;
- const int num_rows = (mi_params->mi_rows + h - 1) / h;
- CHECK_MEM_ERROR(
- cm, cpi->butteraugli_info.rdmult_scaling_factors,
- aom_malloc(num_rows * num_cols *
- sizeof(*cpi->butteraugli_info.rdmult_scaling_factors)));
- memset(&cpi->butteraugli_info.source, 0,
- sizeof(cpi->butteraugli_info.source));
- memset(&cpi->butteraugli_info.resized_source, 0,
- sizeof(cpi->butteraugli_info.resized_source));
- cpi->butteraugli_info.recon_set = false;
- }
-#endif
-
-#if !CONFIG_REALTIME_ONLY
- if (!is_stat_generation_stage(cpi)) {
- av1_setup_tpl_buffers(cm, &cpi->tpl_data, cpi->oxcf.gf_cfg.lag_in_frames);
- }
-#endif
-
-#if CONFIG_COLLECT_PARTITION_STATS
- av1_zero(cpi->partition_stats);
-#endif // CONFIG_COLLECT_PARTITION_STATS
#define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \
- cpi->fn_ptr[BT].sdf = SDF; \
- cpi->fn_ptr[BT].sdaf = SDAF; \
- cpi->fn_ptr[BT].vf = VF; \
- cpi->fn_ptr[BT].svf = SVF; \
- cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].sdx4df = SDX4DF; \
- cpi->fn_ptr[BT].jsdaf = JSDAF; \
- cpi->fn_ptr[BT].jsvaf = JSVAF;
+ ppi->fn_ptr[BT].sdf = SDF; \
+ ppi->fn_ptr[BT].sdaf = SDAF; \
+ ppi->fn_ptr[BT].vf = VF; \
+ ppi->fn_ptr[BT].svf = SVF; \
+ ppi->fn_ptr[BT].svaf = SVAF; \
+ ppi->fn_ptr[BT].sdx4df = SDX4DF; \
+ ppi->fn_ptr[BT].jsdaf = JSDAF; \
+ ppi->fn_ptr[BT].jsvaf = JSVAF;
// Realtime mode doesn't use 4x rectangular blocks.
#if !CONFIG_REALTIME_ONLY
@@ -1233,9 +1036,9 @@ AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
#if !CONFIG_REALTIME_ONLY
#define OBFP(BT, OSDF, OVF, OSVF) \
- cpi->fn_ptr[BT].osdf = OSDF; \
- cpi->fn_ptr[BT].ovf = OVF; \
- cpi->fn_ptr[BT].osvf = OSVF;
+ ppi->fn_ptr[BT].osdf = OSDF; \
+ ppi->fn_ptr[BT].ovf = OVF; \
+ ppi->fn_ptr[BT].osvf = OSVF;
OBFP(BLOCK_128X128, aom_obmc_sad128x128, aom_obmc_variance128x128,
aom_obmc_sub_pixel_variance128x128)
@@ -1284,8 +1087,8 @@ AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
#endif // !CONFIG_REALTIME_ONLY
#define MBFP(BT, MCSDF, MCSVF) \
- cpi->fn_ptr[BT].msdf = MCSDF; \
- cpi->fn_ptr[BT].msvf = MCSVF;
+ ppi->fn_ptr[BT].msdf = MCSDF; \
+ ppi->fn_ptr[BT].msvf = MCSVF;
MBFP(BLOCK_128X128, aom_masked_sad128x128,
aom_masked_sub_pixel_variance128x128)
@@ -1315,8 +1118,8 @@ AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
#endif
#define SDSFP(BT, SDSF, SDSX4DF) \
- cpi->fn_ptr[BT].sdsf = SDSF; \
- cpi->fn_ptr[BT].sdsx4df = SDSX4DF;
+ ppi->fn_ptr[BT].sdsf = SDSF; \
+ ppi->fn_ptr[BT].sdsx4df = SDSX4DF;
SDSFP(BLOCK_128X128, aom_sad_skip_128x128, aom_sad_skip_128x128x4d);
SDSFP(BLOCK_128X64, aom_sad_skip_128x64, aom_sad_skip_128x64x4d);
@@ -1346,16 +1149,281 @@ AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
#undef SDSFP
#if CONFIG_AV1_HIGHBITDEPTH
- highbd_set_var_fns(cpi);
+ highbd_set_var_fns(ppi);
+#endif
+
+ {
+ // As cm->mi_params is a part of the frame level context (cpi), it is
+ // unavailable at this point. mi_params is created as a local temporary
+ // variable, to be passed into the functions used for allocating tpl
+ // buffers. The values in this variable are populated according to initial
+ // width and height of the frame.
+ CommonModeInfoParams mi_params;
+ enc_set_mb_mi(&mi_params, oxcf->frm_dim_cfg.width,
+ oxcf->frm_dim_cfg.height);
+
+ const int bsize = BLOCK_16X16;
+ const int w = mi_size_wide[bsize];
+ const int h = mi_size_high[bsize];
+ const int num_cols = (mi_params.mi_cols + w - 1) / w;
+ const int num_rows = (mi_params.mi_rows + h - 1) / h;
+ AOM_CHECK_MEM_ERROR(&ppi->error, ppi->tpl_rdmult_scaling_factors,
+ aom_calloc(num_rows * num_cols,
+ sizeof(*ppi->tpl_rdmult_scaling_factors)));
+ AOM_CHECK_MEM_ERROR(
+ &ppi->error, ppi->tpl_sb_rdmult_scaling_factors,
+ aom_calloc(num_rows * num_cols,
+ sizeof(*ppi->tpl_sb_rdmult_scaling_factors)));
+
+#if !CONFIG_REALTIME_ONLY
+ if (oxcf->pass != 1) {
+ av1_setup_tpl_buffers(ppi, &mi_params, oxcf->frm_dim_cfg.width,
+ oxcf->frm_dim_cfg.height, 0,
+ oxcf->gf_cfg.lag_in_frames);
+ }
+#endif
+
+#if CONFIG_INTERNAL_STATS
+ ppi->b_calculate_blockiness = 1;
+ ppi->b_calculate_consistency = 1;
+
+ for (int i = 0; i <= STAT_ALL; i++) {
+ ppi->psnr[0].stat[i] = 0;
+ ppi->psnr[1].stat[i] = 0;
+
+ ppi->fastssim.stat[i] = 0;
+ ppi->psnrhvs.stat[i] = 0;
+ }
+
+ ppi->psnr[0].worst = 100.0;
+ ppi->psnr[1].worst = 100.0;
+ ppi->worst_ssim = 100.0;
+ ppi->worst_ssim_hbd = 100.0;
+
+ ppi->count[0] = 0;
+ ppi->count[1] = 0;
+ ppi->total_bytes = 0;
+
+ if (ppi->b_calculate_psnr) {
+ ppi->total_sq_error[0] = 0;
+ ppi->total_samples[0] = 0;
+ ppi->total_sq_error[1] = 0;
+ ppi->total_samples[1] = 0;
+ ppi->total_recode_hits = 0;
+ ppi->summed_quality = 0;
+ ppi->summed_weights = 0;
+ ppi->summed_quality_hbd = 0;
+ ppi->summed_weights_hbd = 0;
+ }
+
+ ppi->fastssim.worst = 100.0;
+ ppi->psnrhvs.worst = 100.0;
+
+ if (ppi->b_calculate_blockiness) {
+ ppi->total_blockiness = 0;
+ ppi->worst_blockiness = 0.0;
+ }
+
+ ppi->total_inconsistency = 0;
+ ppi->worst_consistency = 100.0;
+ if (ppi->b_calculate_consistency) {
+ AOM_CHECK_MEM_ERROR(&ppi->error, ppi->ssim_vars,
+ aom_malloc(sizeof(*ppi->ssim_vars) * 4 *
+ mi_params.mi_rows * mi_params.mi_cols));
+ }
+#endif
+ }
+
+ ppi->error.setjmp = 0;
+ return ppi;
+}
+
+AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
+ BufferPool *const pool, COMPRESSOR_STAGE stage,
+ int lap_lag_in_frames) {
+ AV1_COMP *volatile const cpi = aom_memalign(32, sizeof(AV1_COMP));
+ AV1_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
+
+ if (!cm) return NULL;
+
+ av1_zero(*cpi);
+
+ cpi->ppi = ppi;
+ cm->seq_params = &ppi->seq_params;
+ cm->error = &ppi->error;
+
+ // The jmp_buf is valid only for the duration of the function that calls
+ // setjmp(). Therefore, this function must reset the 'setjmp' field to 0
+ // before it returns.
+ if (setjmp(cm->error->jmp)) {
+ cm->error->setjmp = 0;
+ av1_remove_compressor(cpi);
+ return 0;
+ }
+
+ cm->error->setjmp = 1;
+ cpi->compressor_stage = stage;
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ cpi->do_frame_data_update = true;
+#endif
+
+ CommonModeInfoParams *const mi_params = &cm->mi_params;
+ mi_params->free_mi = enc_free_mi;
+ mi_params->setup_mi = enc_setup_mi;
+ mi_params->set_mb_mi = (oxcf->pass == 1 || cpi->compressor_stage == LAP_STAGE)
+ ? stat_stage_set_mb_mi
+ : enc_set_mb_mi;
+
+ mi_params->mi_alloc_bsize = BLOCK_4X4;
+
+ CHECK_MEM_ERROR(cm, cm->fc,
+ (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc)));
+ CHECK_MEM_ERROR(
+ cm, cm->default_frame_context,
+ (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->default_frame_context)));
+ memset(cm->fc, 0, sizeof(*cm->fc));
+ memset(cm->default_frame_context, 0, sizeof(*cm->default_frame_context));
+
+ cpi->common.buffer_pool = pool;
+
+ init_config(cpi, oxcf);
+ if (cpi->compressor_stage == LAP_STAGE) {
+ cpi->oxcf.gf_cfg.lag_in_frames = lap_lag_in_frames;
+ }
+
+ av1_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc, &cpi->ppi->p_rc);
+
+ init_frame_info(&cpi->frame_info, cm);
+ init_frame_index_set(&cpi->frame_index_set);
+
+ cm->current_frame.frame_number = 0;
+ cm->current_frame_id = -1;
+ cpi->partition_search_skippable_frame = 0;
+ cpi->tile_data = NULL;
+ cpi->last_show_frame_buf = NULL;
+ realloc_segmentation_maps(cpi);
+
+ cpi->refresh_frame.alt_ref_frame = false;
+
+#if CONFIG_SPEED_STATS
+ cpi->tx_search_count = 0;
+#endif // CONFIG_SPEED_STATS
+
+ cpi->time_stamps.first_ts_start = INT64_MAX;
+
+#ifdef OUTPUT_YUV_REC
+ yuv_rec_file = fopen("rec.yuv", "wb");
+#endif
+#ifdef OUTPUT_YUV_DENOISED
+ yuv_denoised_file = fopen("denoised.yuv", "wb");
+#endif
+
+#if !CONFIG_REALTIME_ONLY
+ if (is_stat_consumption_stage(cpi)) {
+ const size_t packet_sz = sizeof(FIRSTPASS_STATS);
+ const int packets = (int)(oxcf->twopass_stats_in.sz / packet_sz);
+
+ if (!cpi->ppi->lap_enabled) {
+ /*Re-initialize to stats buffer, populated by application in the case of
+ * two pass*/
+ cpi->ppi->twopass.stats_buf_ctx->stats_in_start =
+ oxcf->twopass_stats_in.buf;
+ cpi->ppi->twopass.stats_in =
+ cpi->ppi->twopass.stats_buf_ctx->stats_in_start;
+ cpi->ppi->twopass.stats_buf_ctx->stats_in_end =
+ &cpi->ppi->twopass.stats_buf_ctx->stats_in_start[packets - 1];
+
+ av1_init_second_pass(cpi);
+ } else {
+ av1_init_single_pass_lap(cpi);
+ }
+ }
+#endif
+
+ alloc_obmc_buffers(&cpi->td.mb.obmc_buffer, cm);
+
+ CHECK_MEM_ERROR(
+ cm, cpi->td.mb.inter_modes_info,
+ (InterModesInfo *)aom_malloc(sizeof(*cpi->td.mb.inter_modes_info)));
+
+ for (int x = 0; x < 2; x++)
+ for (int y = 0; y < 2; y++)
+ CHECK_MEM_ERROR(
+ cm, cpi->td.mb.intrabc_hash_info.hash_value_buffer[x][y],
+ (uint32_t *)aom_malloc(
+ AOM_BUFFER_SIZE_FOR_BLOCK_HASH *
+ sizeof(*cpi->td.mb.intrabc_hash_info.hash_value_buffer[0][0])));
+
+ cpi->td.mb.intrabc_hash_info.g_crc_initialized = 0;
+
+ av1_set_speed_features_framesize_independent(cpi, oxcf->speed);
+ av1_set_speed_features_framesize_dependent(cpi, oxcf->speed);
+
+ CHECK_MEM_ERROR(cm, cpi->consec_zero_mv,
+ aom_calloc((mi_params->mi_rows * mi_params->mi_cols) >> 2,
+ sizeof(*cpi->consec_zero_mv)));
+
+ {
+ const int bsize = BLOCK_16X16;
+ const int w = mi_size_wide[bsize];
+ const int h = mi_size_high[bsize];
+ const int num_cols = (mi_params->mi_cols + w - 1) / w;
+ const int num_rows = (mi_params->mi_rows + h - 1) / h;
+ CHECK_MEM_ERROR(cm, cpi->ssim_rdmult_scaling_factors,
+ aom_calloc(num_rows * num_cols,
+ sizeof(*cpi->ssim_rdmult_scaling_factors)));
+ }
+
+#if CONFIG_TUNE_VMAF
+ {
+ const int bsize = BLOCK_64X64;
+ const int w = mi_size_wide[bsize];
+ const int h = mi_size_high[bsize];
+ const int num_cols = (mi_params->mi_cols + w - 1) / w;
+ const int num_rows = (mi_params->mi_rows + h - 1) / h;
+ CHECK_MEM_ERROR(cm, cpi->vmaf_info.rdmult_scaling_factors,
+ aom_calloc(num_rows * num_cols,
+ sizeof(*cpi->vmaf_info.rdmult_scaling_factors)));
+ for (int i = 0; i < MAX_ARF_LAYERS; i++) {
+ cpi->vmaf_info.last_frame_unsharp_amount[i] = -1.0;
+ cpi->vmaf_info.last_frame_ysse[i] = -1.0;
+ cpi->vmaf_info.last_frame_vmaf[i] = -1.0;
+ }
+ cpi->vmaf_info.original_qindex = -1;
+ cpi->vmaf_info.vmaf_model = NULL;
+ }
+#endif
+
+#if CONFIG_TUNE_BUTTERAUGLI
+ {
+ const int w = mi_size_wide[butteraugli_rdo_bsize];
+ const int h = mi_size_high[butteraugli_rdo_bsize];
+ const int num_cols = (mi_params->mi_cols + w - 1) / w;
+ const int num_rows = (mi_params->mi_rows + h - 1) / h;
+ CHECK_MEM_ERROR(
+ cm, cpi->butteraugli_info.rdmult_scaling_factors,
+ aom_malloc(num_rows * num_cols *
+ sizeof(*cpi->butteraugli_info.rdmult_scaling_factors)));
+ memset(&cpi->butteraugli_info.source, 0,
+ sizeof(cpi->butteraugli_info.source));
+ memset(&cpi->butteraugli_info.resized_source, 0,
+ sizeof(cpi->butteraugli_info.resized_source));
+ cpi->butteraugli_info.recon_set = false;
+ }
#endif
+#if CONFIG_COLLECT_PARTITION_STATS
+ av1_zero(cpi->partition_stats);
+#endif // CONFIG_COLLECT_PARTITION_STATS
+
/* av1_init_quantizer() is first called here. Add check in
* av1_frame_init_quantizer() so that av1_init_quantizer is only
* called later when needed. This will avoid unnecessary calls of
* av1_init_quantizer() for every frame.
*/
av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
av1_qm_init(&cm->quant_params, av1_num_planes(cm));
av1_loop_filter_init(cm);
@@ -1365,7 +1433,7 @@ AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
#if !CONFIG_REALTIME_ONLY
av1_loop_restoration_precal();
#endif
- cm->error.setjmp = 0;
+ cm->error->setjmp = 0;
return cpi;
}
@@ -1402,6 +1470,7 @@ static AOM_INLINE void free_thread_data(AV1_COMP *cpi) {
for (int j = 0; j < 2; ++j) {
aom_free(thread_data->td->tmp_pred_bufs[j]);
}
+ aom_free(thread_data->td->pixel_gradient_info);
release_obmc_buffers(&thread_data->td->obmc_buffer);
aom_free(thread_data->td->vt64x64);
@@ -1423,7 +1492,27 @@ static AOM_INLINE void free_thread_data(AV1_COMP *cpi) {
void av1_remove_primary_compressor(AV1_PRIMARY *ppi) {
if (!ppi) return;
+ aom_free_frame_buffer(&ppi->alt_ref_buffer);
+ for (int i = 0; i < MAX_NUM_OPERATING_POINTS; ++i) {
+ aom_free(ppi->level_params.level_info[i]);
+ }
av1_lookahead_destroy(ppi->lookahead);
+
+ aom_free(ppi->tpl_rdmult_scaling_factors);
+ ppi->tpl_rdmult_scaling_factors = NULL;
+ aom_free(ppi->tpl_sb_rdmult_scaling_factors);
+ ppi->tpl_sb_rdmult_scaling_factors = NULL;
+
+ TplParams *const tpl_data = &ppi->tpl_data;
+ for (int frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
+ aom_free(tpl_data->tpl_stats_pool[frame]);
+ aom_free_frame_buffer(&tpl_data->tpl_rec_pool[frame]);
+ }
+
+#if !CONFIG_REALTIME_ONLY
+ av1_tpl_dealloc(&tpl_data->tpl_mt_sync);
+#endif
+
aom_free(ppi);
}
@@ -1432,127 +1521,6 @@ void av1_remove_compressor(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
if (cm->current_frame.frame_number > 0) {
-#if CONFIG_ENTROPY_STATS
- if (!is_stat_generation_stage(cpi)) {
- fprintf(stderr, "Writing counts.stt\n");
- FILE *f = fopen("counts.stt", "wb");
- fwrite(&aggregate_fc, sizeof(aggregate_fc), 1, f);
- fclose(f);
- }
-#endif // CONFIG_ENTROPY_STATS
-#if CONFIG_INTERNAL_STATS
- aom_clear_system_state();
-
- if (!is_stat_generation_stage(cpi)) {
- char headings[512] = { 0 };
- char results[512] = { 0 };
- FILE *f = fopen("opsnr.stt", "a");
- double time_encoded =
- (cpi->time_stamps.prev_ts_end - cpi->time_stamps.first_ts_start) /
- 10000000.000;
- double total_encode_time =
- (cpi->time_receive_data + cpi->time_compress_data) / 1000.000;
- const double dr =
- (double)cpi->bytes * (double)8 / (double)1000 / time_encoded;
- const double peak =
- (double)((1 << cpi->oxcf.input_cfg.input_bit_depth) - 1);
- const double target_rate =
- (double)cpi->oxcf.rc_cfg.target_bandwidth / 1000;
- const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
-
- if (cpi->b_calculate_psnr) {
- const double total_psnr =
- aom_sse_to_psnr((double)cpi->total_samples[0], peak,
- (double)cpi->total_sq_error[0]);
- const double total_ssim =
- 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
- snprintf(headings, sizeof(headings),
- "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
- "AOMSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
- "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
- "AVPsrnY\tAPsnrCb\tAPsnrCr");
- snprintf(results, sizeof(results),
- "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
- "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
- "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
- "%7.3f\t%7.3f\t%7.3f",
- dr, cpi->psnr[0].stat[STAT_ALL] / cpi->count[0], total_psnr,
- cpi->psnr[0].stat[STAT_ALL] / cpi->count[0], total_psnr,
- total_ssim, total_ssim,
- cpi->fastssim.stat[STAT_ALL] / cpi->count[0],
- cpi->psnrhvs.stat[STAT_ALL] / cpi->count[0],
- cpi->psnr[0].worst, cpi->worst_ssim, cpi->fastssim.worst,
- cpi->psnrhvs.worst, cpi->psnr[0].stat[STAT_Y] / cpi->count[0],
- cpi->psnr[0].stat[STAT_U] / cpi->count[0],
- cpi->psnr[0].stat[STAT_V] / cpi->count[0]);
-
- if (cpi->b_calculate_blockiness) {
- SNPRINT(headings, "\t Block\tWstBlck");
- SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count[0]);
- SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness);
- }
-
- if (cpi->b_calculate_consistency) {
- double consistency =
- aom_sse_to_psnr((double)cpi->total_samples[0], peak,
- (double)cpi->total_inconsistency);
-
- SNPRINT(headings, "\tConsist\tWstCons");
- SNPRINT2(results, "\t%7.3f", consistency);
- SNPRINT2(results, "\t%7.3f", cpi->worst_consistency);
- }
-
- SNPRINT(headings, "\t Time\tRcErr\tAbsErr");
- SNPRINT2(results, "\t%8.0f", total_encode_time);
- SNPRINT2(results, " %7.2f", rate_err);
- SNPRINT2(results, " %7.2f", fabs(rate_err));
-
- SNPRINT(headings, "\tAPsnr611");
- SNPRINT2(results, " %7.3f",
- (6 * cpi->psnr[0].stat[STAT_Y] + cpi->psnr[0].stat[STAT_U] +
- cpi->psnr[0].stat[STAT_V]) /
- (cpi->count[0] * 8));
-
-#if CONFIG_AV1_HIGHBITDEPTH
- const uint32_t in_bit_depth = cpi->oxcf.input_cfg.input_bit_depth;
- const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
- if ((cpi->source->flags & YV12_FLAG_HIGHBITDEPTH) &&
- (in_bit_depth < bit_depth)) {
- const double peak_hbd = (double)((1 << bit_depth) - 1);
- const double total_psnr_hbd =
- aom_sse_to_psnr((double)cpi->total_samples[1], peak_hbd,
- (double)cpi->total_sq_error[1]);
- const double total_ssim_hbd =
- 100 * pow(cpi->summed_quality_hbd / cpi->summed_weights_hbd, 8.0);
- SNPRINT(headings,
- "\t AVGPsnrH GLBPsnrH AVPsnrPH GLPsnrPH"
- " AVPsnrYH APsnrCbH APsnrCrH WstPsnrH"
- " AOMSSIMH VPSSIMPH WstSsimH");
- SNPRINT2(results, "\t%7.3f",
- cpi->psnr[1].stat[STAT_ALL] / cpi->count[1]);
- SNPRINT2(results, " %7.3f", total_psnr_hbd);
- SNPRINT2(results, " %7.3f",
- cpi->psnr[1].stat[STAT_ALL] / cpi->count[1]);
- SNPRINT2(results, " %7.3f", total_psnr_hbd);
- SNPRINT2(results, " %7.3f",
- cpi->psnr[1].stat[STAT_Y] / cpi->count[1]);
- SNPRINT2(results, " %7.3f",
- cpi->psnr[1].stat[STAT_U] / cpi->count[1]);
- SNPRINT2(results, " %7.3f",
- cpi->psnr[1].stat[STAT_V] / cpi->count[1]);
- SNPRINT2(results, " %7.3f", cpi->psnr[1].worst);
- SNPRINT2(results, " %7.3f", total_ssim_hbd);
- SNPRINT2(results, " %7.3f", total_ssim_hbd);
- SNPRINT2(results, " %7.3f", cpi->worst_ssim_hbd);
- }
-#endif
- fprintf(f, "%s\n", headings);
- fprintf(f, "%s\n", results);
- }
-
- fclose(f);
- }
-#endif // CONFIG_INTERNAL_STATS
#if CONFIG_SPEED_STATS
if (!is_stat_generation_stage(cpi)) {
fprintf(stdout, "tx_search_count = %d\n", cpi->tx_search_count);
@@ -1571,12 +1539,6 @@ void av1_remove_compressor(AV1_COMP *cpi) {
av1_denoiser_free(&(cpi->denoiser));
#endif
- TplParams *const tpl_data = &cpi->tpl_data;
- for (int frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
- aom_free(tpl_data->tpl_stats_pool[frame]);
- aom_free_frame_buffer(&tpl_data->tpl_rec_pool[frame]);
- }
-
if (cpi->compressor_stage != LAP_STAGE) {
terminate_worker_data(cpi);
free_thread_data(cpi);
@@ -1586,6 +1548,7 @@ void av1_remove_compressor(AV1_COMP *cpi) {
#if CONFIG_MULTITHREAD
pthread_mutex_t *const enc_row_mt_mutex_ = mt_info->enc_row_mt.mutex_;
pthread_mutex_t *const gm_mt_mutex_ = mt_info->gm_sync.mutex_;
+ pthread_mutex_t *const pack_bs_mt_mutex_ = mt_info->pack_bs_sync.mutex_;
if (enc_row_mt_mutex_ != NULL) {
pthread_mutex_destroy(enc_row_mt_mutex_);
aom_free(enc_row_mt_mutex_);
@@ -1594,6 +1557,10 @@ void av1_remove_compressor(AV1_COMP *cpi) {
pthread_mutex_destroy(gm_mt_mutex_);
aom_free(gm_mt_mutex_);
}
+ if (pack_bs_mt_mutex_ != NULL) {
+ pthread_mutex_destroy(pack_bs_mt_mutex_);
+ aom_free(pack_bs_mt_mutex_);
+ }
#endif
av1_row_mt_mem_dealloc(cpi);
if (cpi->compressor_stage != LAP_STAGE) {
@@ -1601,9 +1568,6 @@ void av1_remove_compressor(AV1_COMP *cpi) {
aom_free(mt_info->workers);
}
-#if !CONFIG_REALTIME_ONLY
- av1_tpl_dealloc(&tpl_data->tpl_mt_sync);
-#endif
if (mt_info->num_workers > 1) {
av1_loop_filter_dealloc(&mt_info->lf_row_sync);
av1_cdef_mt_dealloc(&mt_info->cdef_sync);
@@ -1617,13 +1581,9 @@ void av1_remove_compressor(AV1_COMP *cpi) {
dealloc_compressor_data(cpi);
-#if CONFIG_INTERNAL_STATS
- aom_free(cpi->ssim_vars);
- cpi->ssim_vars = NULL;
-#endif // CONFIG_INTERNAL_STATS
+ av1_ext_part_delete(&cpi->ext_part_controller);
av1_remove_common(cm);
- av1_free_ref_frame_buffers(cm->buffer_pool);
aom_free(cpi);
@@ -1667,7 +1627,7 @@ static void generate_psnr_packet(AV1_COMP *cpi) {
#endif
pkt.kind = AOM_CODEC_PSNR_PKT;
- aom_codec_pkt_list_add(cpi->output_pkt_list, &pkt);
+ aom_codec_pkt_list_add(cpi->ppi->output_pkt_list, &pkt);
}
int av1_use_as_reference(int *ext_ref_frame_flags, int ref_frame_flags) {
@@ -1781,7 +1741,12 @@ static void set_mv_search_params(AV1_COMP *cpi) {
mv_search_params->mv_step_param = av1_init_search_range(
AOMMIN(max_mv_def, 2 * mv_search_params->max_mv_magnitude));
}
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Reset max_mv_magnitude for parallel frames based on update flag.
+ if (cpi->do_frame_data_update) mv_search_params->max_mv_magnitude = -1;
+#else
mv_search_params->max_mv_magnitude = -1;
+#endif
}
}
}
@@ -1789,14 +1754,14 @@ static void set_mv_search_params(AV1_COMP *cpi) {
void av1_set_screen_content_options(AV1_COMP *cpi, FeatureFlags *features) {
const AV1_COMMON *const cm = &cpi->common;
- if (cm->seq_params.force_screen_content_tools != 2) {
+ if (cm->seq_params->force_screen_content_tools != 2) {
features->allow_screen_content_tools = features->allow_intrabc =
- cm->seq_params.force_screen_content_tools;
+ cm->seq_params->force_screen_content_tools;
return;
}
if (cpi->oxcf.mode == REALTIME) {
- assert(cm->seq_params.reduced_still_picture_hdr);
+ assert(cm->seq_params->reduced_still_picture_hdr);
features->allow_screen_content_tools = features->allow_intrabc = 0;
return;
}
@@ -1814,7 +1779,7 @@ void av1_set_screen_content_options(AV1_COMP *cpi, FeatureFlags *features) {
const int stride = cpi->unfiltered_source->y_stride;
const int width = cpi->unfiltered_source->y_width;
const int height = cpi->unfiltered_source->y_height;
- const int bd = cm->seq_params.bit_depth;
+ const int bd = cm->seq_params->bit_depth;
const int blk_w = 16;
const int blk_h = 16;
// These threshold values are selected experimentally.
@@ -1960,7 +1925,7 @@ static void init_ref_frame_bufs(AV1_COMP *cpi) {
void av1_check_initial_width(AV1_COMP *cpi, int use_highbitdepth,
int subsampling_x, int subsampling_y) {
AV1_COMMON *const cm = &cpi->common;
- SequenceHeader *const seq_params = &cm->seq_params;
+ SequenceHeader *const seq_params = cm->seq_params;
InitialDimensions *const initial_dimensions = &cpi->initial_dimensions;
if (!initial_dimensions->width ||
@@ -1994,11 +1959,11 @@ static void setup_denoiser_buffer(AV1_COMP *cpi) {
if (cpi->oxcf.noise_sensitivity > 0 &&
!cpi->denoiser.frame_buffer_initialized) {
if (av1_denoiser_alloc(
- cm, &cpi->svc, &cpi->denoiser, cpi->use_svc,
+ cm, &cpi->svc, &cpi->denoiser, cpi->ppi->use_svc,
cpi->oxcf.noise_sensitivity, cm->width, cm->height,
- cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
- cm->seq_params.use_highbitdepth, AOM_BORDER_IN_PIXELS))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ cm->seq_params->subsampling_x, cm->seq_params->subsampling_y,
+ cm->seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS))
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate denoiser");
}
}
@@ -2008,9 +1973,9 @@ static void setup_denoiser_buffer(AV1_COMP *cpi) {
int av1_set_size_literal(AV1_COMP *cpi, int width, int height) {
AV1_COMMON *cm = &cpi->common;
InitialDimensions *const initial_dimensions = &cpi->initial_dimensions;
- av1_check_initial_width(cpi, cm->seq_params.use_highbitdepth,
- cm->seq_params.subsampling_x,
- cm->seq_params.subsampling_y);
+ av1_check_initial_width(cpi, cm->seq_params->use_highbitdepth,
+ cm->seq_params->subsampling_x,
+ cm->seq_params->subsampling_y);
if (width <= 0 || height <= 0) return 1;
@@ -2040,7 +2005,7 @@ int av1_set_size_literal(AV1_COMP *cpi, int width, int height) {
void av1_set_frame_size(AV1_COMP *cpi, int width, int height) {
AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const int num_planes = av1_num_planes(cm);
MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
int ref_frame;
@@ -2078,7 +2043,7 @@ void av1_set_frame_size(AV1_COMP *cpi, int width, int height) {
if (av1_alloc_above_context_buffers(above_contexts, cm->tiles.rows,
cm->mi_params.mi_cols,
av1_num_planes(cm)))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate context buffers");
}
@@ -2088,11 +2053,16 @@ void av1_set_frame_size(AV1_COMP *cpi, int width, int height) {
seq_params->subsampling_y, seq_params->use_highbitdepth,
cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
NULL, cpi->oxcf.tool_cfg.enable_global_motion))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
+ if (!is_stat_generation_stage(cpi))
+ av1_alloc_cdef_buffers(cm, &cpi->mt_info.cdef_worker,
+ &cpi->mt_info.cdef_sync,
+ cpi->mt_info.num_mod_workers[MOD_CDEF]);
+
#if !CONFIG_REALTIME_ONLY
- const int use_restoration = cm->seq_params.enable_restoration &&
+ const int use_restoration = cm->seq_params->enable_restoration &&
!cm->features.all_lossless &&
!cm->tiles.large_scale;
if (use_restoration) {
@@ -2107,6 +2077,7 @@ void av1_set_frame_size(AV1_COMP *cpi, int width, int height) {
av1_alloc_restoration_buffers(cm);
}
#endif
+
if (!is_stat_generation_stage(cpi)) alloc_util_frame_buffers(cpi);
init_motion_estimation(cpi);
@@ -2145,13 +2116,22 @@ static void cdef_restoration_frame(AV1_COMP *cpi, AV1_COMMON *cm,
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, cdef_time);
#endif
+ const int num_workers = cpi->mt_info.num_mod_workers[MOD_CDEF];
// Find CDEF parameters
av1_cdef_search(&cpi->mt_info, &cm->cur_frame->buf, cpi->source, cm, xd,
- cpi->sf.lpf_sf.cdef_pick_method, cpi->td.mb.rdmult);
+ cpi->sf.lpf_sf.cdef_pick_method, cpi->td.mb.rdmult,
+ cpi->sf.rt_sf.skip_cdef_sb, cpi->rc.frames_since_key);
// Apply the filter
- if (!cpi->sf.rt_sf.skip_loopfilter_non_reference)
- av1_cdef_frame(&cm->cur_frame->buf, cm, xd);
+ if (!cpi->sf.rt_sf.skip_loopfilter_non_reference) {
+ if (num_workers > 1) {
+ av1_cdef_frame_mt(cm, xd, cpi->mt_info.cdef_worker,
+ cpi->mt_info.workers, &cpi->mt_info.cdef_sync,
+ num_workers, av1_cdef_init_fb_row_mt);
+ } else {
+ av1_cdef_frame(&cm->cur_frame->buf, cm, xd, av1_cdef_init_fb_row);
+ }
+ }
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, cdef_time);
#endif
@@ -2211,11 +2191,19 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
const int use_loopfilter =
!cm->features.coded_lossless && !cm->tiles.large_scale;
- const int use_cdef = cm->seq_params.enable_cdef &&
+ const int use_cdef = cm->seq_params->enable_cdef &&
!cm->features.coded_lossless && !cm->tiles.large_scale;
- const int use_restoration = cm->seq_params.enable_restoration &&
+ const int use_restoration = cm->seq_params->enable_restoration &&
!cm->features.all_lossless &&
!cm->tiles.large_scale;
+ const int cur_width = cm->cur_frame->width;
+ const int cur_height = cm->cur_frame->height;
+ const int cur_width_mib = cm->mi_params.mi_cols * MI_SIZE;
+ const int cur_height_mib = cm->mi_params.mi_rows * MI_SIZE;
+ const int is_realtime =
+ cpi->sf.rt_sf.use_nonrd_pick_mode && !(cm->mi_params.mi_cols % 2) &&
+ !(cm->mi_params.mi_rows % 2) && (cur_width_mib - cur_width < MI_SIZE) &&
+ (cur_height_mib - cur_height < MI_SIZE);
struct loopfilter *lf = &cm->lf;
@@ -2238,13 +2226,13 @@ static void loopfilter_frame(AV1_COMP *cpi, AV1_COMMON *cm) {
0,
#endif
mt_info->workers, num_workers,
- &mt_info->lf_row_sync);
+ &mt_info->lf_row_sync, is_realtime);
else
av1_loop_filter_frame(&cm->cur_frame->buf, cm, xd,
#if CONFIG_LPF_MASK
0,
#endif
- 0, num_planes, 0);
+ 0, num_planes, 0, is_realtime);
}
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, loop_filter_time);
@@ -2278,16 +2266,17 @@ static int encode_without_recode(AV1_COMP *cpi) {
int top_index = 0, bottom_index = 0, q = 0;
YV12_BUFFER_CONFIG *unscaled = cpi->unscaled_source;
InterpFilter filter_scaler =
- cpi->use_svc ? svc->downsample_filter_type[svc->spatial_layer_id]
- : EIGHTTAP_SMOOTH;
- int phase_scaler =
- cpi->use_svc ? svc->downsample_filter_phase[svc->spatial_layer_id] : 0;
+ cpi->ppi->use_svc ? svc->downsample_filter_type[svc->spatial_layer_id]
+ : EIGHTTAP_SMOOTH;
+ int phase_scaler = cpi->ppi->use_svc
+ ? svc->downsample_filter_phase[svc->spatial_layer_id]
+ : 0;
set_size_independent_vars(cpi);
av1_setup_frame_size(cpi);
av1_set_size_dependent_vars(cpi, &q, &bottom_index, &top_index);
- if (!cpi->use_svc) {
+ if (!cpi->ppi->use_svc) {
phase_scaler = 8;
// 2:1 scaling.
if ((cm->width << 1) == unscaled->y_crop_width &&
@@ -2315,6 +2304,12 @@ static int encode_without_recode(AV1_COMP *cpi) {
printf("\n Encoding a frame:");
#endif
+#if CONFIG_TUNE_BUTTERAUGLI
+ if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
+ av1_setup_butteraugli_rdmult(cpi);
+ }
+#endif
+
aom_clear_system_state();
cpi->source = av1_scale_if_required(cm, unscaled, &cpi->scaled_source,
@@ -2336,7 +2331,7 @@ static int encode_without_recode(AV1_COMP *cpi) {
}
#if CONFIG_AV1_TEMPORAL_DENOISING
- if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc)
+ if (cpi->oxcf.noise_sensitivity > 0 && cpi->ppi->use_svc)
av1_denoiser_reset_on_first_frame(cpi);
#endif
@@ -2365,7 +2360,7 @@ static int encode_without_recode(AV1_COMP *cpi) {
// (zero_mode is forced), and since the scaled references are only
// use for newmv search, we can avoid scaling here.
if (!frame_is_intra_only(cm) &&
- !(cpi->use_svc && cpi->svc.force_zero_mode_spatial_ref))
+ !(cpi->ppi->use_svc && cpi->svc.force_zero_mode_spatial_ref))
av1_scale_references(cpi, filter_scaler, phase_scaler, 1);
av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q,
@@ -2373,7 +2368,7 @@ static int encode_without_recode(AV1_COMP *cpi) {
av1_set_speed_features_qindex_dependent(cpi, cpi->oxcf.speed);
if ((q_cfg->deltaq_mode != NO_DELTA_Q) || q_cfg->enable_chroma_deltaq)
av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
av1_set_variance_partition_thresholds(cpi, q, 0);
av1_setup_frame(cpi);
@@ -2388,7 +2383,7 @@ static int encode_without_recode(AV1_COMP *cpi) {
av1_set_speed_features_qindex_dependent(cpi, cpi->oxcf.speed);
if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
av1_set_variance_partition_thresholds(cpi, q, 0);
if (frame_is_intra_only(cm) || cm->features.error_resilient_mode)
av1_setup_frame(cpi);
@@ -2432,7 +2427,7 @@ static int encode_without_recode(AV1_COMP *cpi) {
end_timing(cpi, av1_encode_frame_time);
#endif
#if CONFIG_INTERNAL_STATS
- ++cpi->tot_recode_hits;
+ ++cpi->frame_recode_hits;
#endif
aom_clear_system_state();
@@ -2504,7 +2499,7 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
if (!cpi->sf.hl_sf.disable_extra_sc_testing)
av1_determine_sc_tools_with_encoding(cpi, q);
-#if CONFIG_USE_VMAF_RC
+#if CONFIG_TUNE_VMAF
if (oxcf->tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
av1_vmaf_neg_preprocessing(cpi, cpi->unscaled_source);
}
@@ -2525,6 +2520,7 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
do {
loop = 0;
+ int do_mv_stats_collection = 1;
aom_clear_system_state();
// if frame was scaled calculate global_motion_search again if already
@@ -2580,7 +2576,7 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
av1_set_variance_partition_thresholds(cpi, q, 0);
@@ -2636,14 +2632,19 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
// transform / motion compensation build reconstruction frame
av1_encode_frame(cpi);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Disable mv_stats collection for parallel frames based on update flag.
+ if (!cpi->do_frame_data_update) do_mv_stats_collection = 0;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
// Reset the mv_stats in case we are interrupted by an intraframe or an
// overlay frame.
- if (cpi->mv_stats.valid) {
- av1_zero(cpi->mv_stats);
+ if (cpi->ppi->mv_stats.valid && do_mv_stats_collection) {
+ av1_zero(cpi->ppi->mv_stats);
}
// Gather the mv_stats for the next frame
if (cpi->sf.hl_sf.high_precision_mv_usage == LAST_MV_DATA &&
- av1_frame_allows_smart_mv(cpi)) {
+ av1_frame_allows_smart_mv(cpi) && do_mv_stats_collection) {
av1_collect_mv_stats(cpi, q);
}
@@ -2653,6 +2654,9 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
aom_clear_system_state();
+#if CONFIG_BITRATE_ACCURACY
+ const int do_dummy_pack = 1;
+#else // CONFIG_BITRATE_ACCURACY
// Dummy pack of the bitstream using up to date stats to get an
// accurate estimate of output frame size to determine if we need
// to recode.
@@ -2660,6 +2664,7 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
(cpi->sf.hl_sf.recode_loop >= ALLOW_RECODE_KFARFGF &&
oxcf->rc_cfg.mode != AOM_Q) ||
oxcf->rc_cfg.min_cr > 0;
+#endif // CONFIG_BITRATE_ACCURACY
if (do_dummy_pack) {
av1_finalize_encoded_frame(cpi);
int largest_tile_id = 0; // Output from bitstream: unused here
@@ -2669,7 +2674,15 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
return AOM_CODEC_ERROR;
}
+ // bits used for this frame
rc->projected_frame_size = (int)(*size) << 3;
+
+#if CONFIG_BITRATE_ACCURACY
+ cpi->ppi->tpl_data.actual_gop_bitrate += rc->projected_frame_size;
+ printf("\nframe: %d, projected frame size: %d, total: %f\n",
+ cpi->gf_frame_index, rc->projected_frame_size,
+ cpi->ppi->tpl_data.actual_gop_bitrate);
+#endif
}
#if CONFIG_TUNE_VMAF
@@ -2688,15 +2701,19 @@ static int encode_with_recode_loop(AV1_COMP *cpi, size_t *size, uint8_t *dest) {
#if CONFIG_TUNE_BUTTERAUGLI
if (loop_count == 0 && oxcf->tune_cfg.tuning == AOM_TUNE_BUTTERAUGLI) {
loop = 1;
- av1_restore_butteraugli_source(cpi);
+ av1_setup_butteraugli_rdmult_and_restore_source(cpi, 0.4);
}
#endif
+#if CONFIG_BITRATE_ACCURACY
+ loop = 0; // turn off recode loop when CONFIG_BITRATE_ACCURACY is on
+#endif // CONFIG_BITRATE_ACCURACY
+
if (loop) {
++loop_count;
#if CONFIG_INTERNAL_STATS
- ++cpi->tot_recode_hits;
+ ++cpi->frame_recode_hits;
#endif
}
#if CONFIG_COLLECT_COMPONENT_TIMING
@@ -2796,12 +2813,12 @@ static int encode_with_recode_loop_and_filter(AV1_COMP *cpi, size_t *size,
#endif
AV1_COMMON *const cm = &cpi->common;
- SequenceHeader *const seq_params = &cm->seq_params;
+ SequenceHeader *const seq_params = cm->seq_params;
// Special case code to reduce pulsing when key frames are forced at a
// fixed interval. Note the reconstruction error if it is the frame before
// the force key frame
- if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
+ if (cpi->ppi->p_rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) {
#if CONFIG_AV1_HIGHBITDEPTH
if (seq_params->use_highbitdepth) {
cpi->ambient_err = aom_highbd_get_y_sse(cpi->source, &cm->cur_frame->buf);
@@ -2884,7 +2901,7 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
uint8_t *dest,
int *largest_tile_id) {
const AV1_COMMON *const cm = &cpi->common;
- assert(cm->seq_params.enable_superres);
+ assert(cm->seq_params->enable_superres);
assert(av1_superres_in_recode_allowed(cpi));
aom_codec_err_t err = AOM_CODEC_OK;
av1_save_all_coding_context(cpi);
@@ -2904,9 +2921,9 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
int64_t superres_rates[SCALE_NUMERATOR];
int superres_largest_tile_ids[SCALE_NUMERATOR];
// Use superres for Key-frames and Alt-ref frames only.
- const GF_GROUP *const gf_group = &cpi->gf_group;
- if (gf_group->update_type[gf_group->index] != OVERLAY_UPDATE &&
- gf_group->update_type[gf_group->index] != INTNL_OVERLAY_UPDATE) {
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ if (gf_group->update_type[cpi->gf_frame_index] != OVERLAY_UPDATE &&
+ gf_group->update_type[cpi->gf_frame_index] != INTNL_OVERLAY_UPDATE) {
for (int denom = SCALE_NUMERATOR + 1; denom <= 2 * SCALE_NUMERATOR;
++denom) {
superres_cfg->superres_scale_denominator = denom;
@@ -2952,7 +2969,7 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
const int64_t this_rate = superres_rates[this_index];
const int this_largest_tile_id = superres_largest_tile_ids[this_index];
const double this_rdcost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
- rdmult, this_rate, this_sse, cm->seq_params.bit_depth);
+ rdmult, this_rate, this_sse, cm->seq_params->bit_depth);
if (this_rdcost < proj_rdcost1) {
sse1 = this_sse;
rate1 = this_rate;
@@ -2962,7 +2979,7 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
}
}
const double proj_rdcost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
- rdmult, rate2, sse2, cm->seq_params.bit_depth);
+ rdmult, rate2, sse2, cm->seq_params->bit_depth);
// Re-encode with superres if it's better.
if (proj_rdcost1 < proj_rdcost2) {
restore_all_coding_context(cpi);
@@ -3007,9 +3024,9 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
const int64_t rdmult =
av1_compute_rd_mult_based_on_qindex(cpi, cm->quant_params.base_qindex);
proj_rdcost1 = RDCOST_DBL_WITH_NATIVE_BD_DIST(rdmult, rate1, sse1,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
const double proj_rdcost2 = RDCOST_DBL_WITH_NATIVE_BD_DIST(
- rdmult, rate2, sse2, cm->seq_params.bit_depth);
+ rdmult, rate2, sse2, cm->seq_params->bit_depth);
// Re-encode with superres if it's better.
if (proj_rdcost1 < proj_rdcost2) {
restore_all_coding_context(cpi);
@@ -3034,6 +3051,42 @@ static int encode_with_and_without_superres(AV1_COMP *cpi, size_t *size,
return err;
}
+#if !CONFIG_REALTIME_ONLY
+static void calculate_frame_avg_haar_energy(AV1_COMP *cpi) {
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
+ const FIRSTPASS_STATS *const total_stats =
+ twopass->stats_buf_ctx->total_stats;
+
+ if (is_one_pass_rt_params(cpi) ||
+ (cpi->oxcf.q_cfg.deltaq_mode != DELTA_Q_PERCEPTUAL) ||
+ (is_fp_wavelet_energy_invalid(total_stats) == 0))
+ return;
+
+ const int num_mbs = (cpi->oxcf.resize_cfg.resize_mode != RESIZE_NONE)
+ ? cpi->initial_mbs
+ : cpi->common.mi_params.MBs;
+ const YV12_BUFFER_CONFIG *const unfiltered_source = cpi->unfiltered_source;
+ const uint8_t *const src = unfiltered_source->y_buffer;
+ const int hbd = unfiltered_source->flags & YV12_FLAG_HIGHBITDEPTH;
+ const int stride = unfiltered_source->y_stride;
+ const BLOCK_SIZE fp_block_size =
+ get_fp_block_size(cpi->is_screen_content_type);
+ const int fp_block_size_width = block_size_wide[fp_block_size];
+ const int fp_block_size_height = block_size_high[fp_block_size];
+ const int num_unit_cols =
+ get_num_blocks(unfiltered_source->y_crop_width, fp_block_size_width);
+ const int num_unit_rows =
+ get_num_blocks(unfiltered_source->y_crop_height, fp_block_size_height);
+ const int num_8x8_cols = num_unit_cols * (fp_block_size_width / 8);
+ const int num_8x8_rows = num_unit_rows * (fp_block_size_height / 8);
+ int64_t frame_avg_wavelet_energy = av1_haar_ac_sad_mxn_uint8_input(
+ src, stride, hbd, num_8x8_rows, num_8x8_cols);
+
+ twopass->frame_avg_haar_energy =
+ log(((double)frame_avg_wavelet_energy / num_mbs) + 1.0);
+}
+#endif
+
extern void av1_print_frame_contexts(const FRAME_CONTEXT *fc,
const char *filename);
@@ -3055,7 +3108,7 @@ extern void av1_print_frame_contexts(const FRAME_CONTEXT *fc,
static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
uint8_t *dest) {
AV1_COMMON *const cm = &cpi->common;
- SequenceHeader *const seq_params = &cm->seq_params;
+ SequenceHeader *const seq_params = cm->seq_params;
CurrentFrame *const current_frame = &cm->current_frame;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
struct segmentation *const seg = &cm->seg;
@@ -3070,6 +3123,10 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
av1_set_screen_content_options(cpi, features);
}
+#if !CONFIG_REALTIME_ONLY
+ calculate_frame_avg_haar_energy(cpi);
+#endif
+
// frame type has been decided outside of this function call
cm->cur_frame->frame_type = current_frame->frame_type;
@@ -3088,7 +3145,7 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
cpi->last_frame_type = current_frame->frame_type;
if (frame_is_sframe(cm)) {
- GF_GROUP *gf_group = &cpi->gf_group;
+ GF_GROUP *gf_group = &cpi->ppi->gf_group;
// S frame will wipe out any previously encoded altref so we cannot place
// an overlay frame
gf_group->update_type[gf_group->size] = GF_UPDATE;
@@ -3110,7 +3167,7 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
cm->ref_frame_id[i] = display_frame_id;
}
- cpi->seq_params_locked = 1;
+ cpi->ppi->seq_params_locked = 1;
#if DUMP_RECON_FRAMES == 1
// NOTE(zoeliu): For debug - Output the filtered reconstructed video.
@@ -3147,7 +3204,7 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
if (!is_stat_generation_stage(cpi) &&
cpi->common.features.allow_screen_content_tools &&
!frame_is_intra_only(cm)) {
- if (cpi->common.seq_params.force_integer_mv == 2) {
+ if (cpi->common.seq_params->force_integer_mv == 2) {
// Adaptive mode: see what previous frame encoded did
if (cpi->unscaled_last_source != NULL) {
features->cur_frame_force_integer_mv = av1_is_integer_mv(
@@ -3157,7 +3214,7 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
}
} else {
cpi->common.features.cur_frame_force_integer_mv =
- cpi->common.seq_params.force_integer_mv;
+ cpi->common.seq_params->force_integer_mv;
}
} else {
cpi->common.features.cur_frame_force_integer_mv = 0;
@@ -3290,7 +3347,7 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
cpi->superres_mode = orig_superres_mode; // restore
}
- cpi->seq_params_locked = 1;
+ cpi->ppi->seq_params_locked = 1;
// Update reference frame ids for reference frames this frame will overwrite
if (seq_params->frame_id_numbers_present_flag) {
@@ -3332,10 +3389,6 @@ static int encode_frame_to_data_rate(AV1_COMP *cpi, size_t *size,
refresh_reference_frames(cpi);
-#if CONFIG_ENTROPY_STATS
- av1_accumulate_frame_counts(&aggregate_fc, &cpi->counts);
-#endif // CONFIG_ENTROPY_STATS
-
if (features->refresh_frame_context == REFRESH_FRAME_CONTEXT_BACKWARD) {
*cm->fc = cpi->tile_data[largest_tile_id].tctx;
av1_reset_cdf_symbol_counters(cm->fc);
@@ -3417,7 +3470,13 @@ int av1_encode(AV1_COMP *const cpi, uint8_t *const dest,
current_frame->display_order_hint = current_frame->order_hint;
current_frame->order_hint %=
- (1 << (cm->seq_params.order_hint_info.order_hint_bits_minus_1 + 1));
+ (1 << (cm->seq_params->order_hint_info.order_hint_bits_minus_1 + 1));
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ current_frame->pyramid_level = get_true_pyr_level(
+ cpi->ppi->gf_group.layer_depth[cpi->gf_frame_index],
+ current_frame->display_order_hint, cpi->ppi->gf_group.max_layer_depth);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
if (is_stat_generation_stage(cpi)) {
#if !CONFIG_REALTIME_ONLY
@@ -3442,9 +3501,9 @@ static int apply_denoise_2d(AV1_COMP *cpi, YV12_BUFFER_CONFIG *sd,
AV1_COMMON *const cm = &cpi->common;
if (!cpi->denoise_and_model) {
cpi->denoise_and_model = aom_denoise_and_model_alloc(
- cm->seq_params.bit_depth, block_size, noise_level);
+ cm->seq_params->bit_depth, block_size, noise_level);
if (!cpi->denoise_and_model) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Error allocating denoise and model");
return -1;
}
@@ -3452,7 +3511,7 @@ static int apply_denoise_2d(AV1_COMP *cpi, YV12_BUFFER_CONFIG *sd,
if (!cpi->film_grain_table) {
cpi->film_grain_table = aom_malloc(sizeof(*cpi->film_grain_table));
if (!cpi->film_grain_table) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Error allocating grain table");
return -1;
}
@@ -3474,7 +3533,7 @@ int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
int64_t end_time) {
AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
int res = 0;
const int subsampling_x = sd->subsampling_x;
const int subsampling_y = sd->subsampling_y;
@@ -3516,7 +3575,7 @@ int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
res = -1;
#if CONFIG_INTERNAL_STATS
aom_usec_timer_mark(&timer);
- cpi->time_receive_data += aom_usec_timer_elapsed(&timer);
+ cpi->ppi->total_time_receive_data += aom_usec_timer_elapsed(&timer);
#endif
// Note: Regarding profile setting, the following checks are added to help
@@ -3528,20 +3587,20 @@ int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
// header.
if ((seq_params->profile == PROFILE_0) && !seq_params->monochrome &&
(subsampling_x != 1 || subsampling_y != 1)) {
- aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
+ aom_internal_error(cm->error, AOM_CODEC_INVALID_PARAM,
"Non-4:2:0 color format requires profile 1 or 2");
res = -1;
}
if ((seq_params->profile == PROFILE_1) &&
!(subsampling_x == 0 && subsampling_y == 0)) {
- aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
+ aom_internal_error(cm->error, AOM_CODEC_INVALID_PARAM,
"Profile 1 requires 4:4:4 color format");
res = -1;
}
if ((seq_params->profile == PROFILE_2) &&
(seq_params->bit_depth <= AOM_BITS_10) &&
!(subsampling_x == 1 && subsampling_y == 0)) {
- aom_internal_error(&cm->error, AOM_CODEC_INVALID_PARAM,
+ aom_internal_error(cm->error, AOM_CODEC_INVALID_PARAM,
"Profile 2 bit-depth <= 10 requires 4:2:2 color format");
res = -1;
}
@@ -3549,6 +3608,20 @@ int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
return res;
}
+#if CONFIG_ENTROPY_STATS
+void print_entropy_stats(AV1_PRIMARY *const ppi) {
+ if (!ppi->cpi) return;
+
+ if (ppi->cpi->oxcf.pass != 1 &&
+ ppi->cpi->common.current_frame.frame_number > 0) {
+ fprintf(stderr, "Writing counts.stt\n");
+ FILE *f = fopen("counts.stt", "wb");
+ fwrite(&ppi->aggregate_fc, sizeof(ppi->aggregate_fc), 1, f);
+ fclose(f);
+ }
+}
+#endif // CONFIG_ENTROPY_STATS
+
#if CONFIG_INTERNAL_STATS
extern double av1_get_blockiness(const unsigned char *img1, int img1_pitch,
const unsigned char *img2, int img2_pitch,
@@ -3564,11 +3637,16 @@ static void adjust_image_stat(double y, double u, double v, double all,
}
static void compute_internal_stats(AV1_COMP *cpi, int frame_bytes) {
+ AV1_PRIMARY *const ppi = cpi->ppi;
AV1_COMMON *const cm = &cpi->common;
double samples = 0.0;
const uint32_t in_bit_depth = cpi->oxcf.input_cfg.input_bit_depth;
const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
+ if (cpi->ppi->use_svc &&
+ cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)
+ return;
+
#if CONFIG_INTER_STATS_ONLY
if (cm->current_frame.frame_type == KEY_FRAME) return; // skip key frame
#endif
@@ -3578,9 +3656,9 @@ static void compute_internal_stats(AV1_COMP *cpi, int frame_bytes) {
const YV12_BUFFER_CONFIG *recon = &cpi->common.cur_frame->buf;
double y, u, v, frame_all;
- cpi->count[0]++;
- cpi->count[1]++;
- if (cpi->b_calculate_psnr) {
+ ppi->count[0]++;
+ ppi->count[1]++;
+ if (cpi->ppi->b_calculate_psnr) {
PSNR_STATS psnr;
double weight[2] = { 0.0, 0.0 };
double frame_ssim2[2] = { 0.0, 0.0 };
@@ -3591,34 +3669,30 @@ static void compute_internal_stats(AV1_COMP *cpi, int frame_bytes) {
aom_calc_psnr(orig, recon, &psnr);
#endif
adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3], psnr.psnr[0],
- &(cpi->psnr[0]));
- cpi->total_sq_error[0] += psnr.sse[0];
- cpi->total_samples[0] += psnr.samples[0];
+ &(ppi->psnr[0]));
+ ppi->total_sq_error[0] += psnr.sse[0];
+ ppi->total_samples[0] += psnr.samples[0];
samples = psnr.samples[0];
- // TODO(yaowu): unify these two versions into one.
- if (cm->seq_params.use_highbitdepth)
- aom_highbd_calc_ssim(orig, recon, weight, bit_depth, in_bit_depth,
- frame_ssim2);
- else
- aom_calc_ssim(orig, recon, &weight[0], &frame_ssim2[0]);
+ aom_calc_ssim(orig, recon, bit_depth, in_bit_depth,
+ cm->seq_params->use_highbitdepth, weight, frame_ssim2);
- cpi->worst_ssim = AOMMIN(cpi->worst_ssim, frame_ssim2[0]);
- cpi->summed_quality += frame_ssim2[0] * weight[0];
- cpi->summed_weights += weight[0];
+ ppi->worst_ssim = AOMMIN(ppi->worst_ssim, frame_ssim2[0]);
+ ppi->summed_quality += frame_ssim2[0] * weight[0];
+ ppi->summed_weights += weight[0];
#if CONFIG_AV1_HIGHBITDEPTH
// Compute PSNR based on stream bit depth
if ((cpi->source->flags & YV12_FLAG_HIGHBITDEPTH) &&
(in_bit_depth < bit_depth)) {
adjust_image_stat(psnr.psnr_hbd[1], psnr.psnr_hbd[2], psnr.psnr_hbd[3],
- psnr.psnr_hbd[0], &cpi->psnr[1]);
- cpi->total_sq_error[1] += psnr.sse_hbd[0];
- cpi->total_samples[1] += psnr.samples_hbd[0];
+ psnr.psnr_hbd[0], &ppi->psnr[1]);
+ ppi->total_sq_error[1] += psnr.sse_hbd[0];
+ ppi->total_samples[1] += psnr.samples_hbd[0];
- cpi->worst_ssim_hbd = AOMMIN(cpi->worst_ssim_hbd, frame_ssim2[1]);
- cpi->summed_quality_hbd += frame_ssim2[1] * weight[1];
- cpi->summed_weights_hbd += weight[1];
+ ppi->worst_ssim_hbd = AOMMIN(ppi->worst_ssim_hbd, frame_ssim2[1]);
+ ppi->summed_quality_hbd += frame_ssim2[1] * weight[1];
+ ppi->summed_weights_hbd += weight[1];
}
#endif
@@ -3636,48 +3710,207 @@ static void compute_internal_stats(AV1_COMP *cpi, int frame_bytes) {
}
#endif
}
- if (cpi->b_calculate_blockiness) {
- if (!cm->seq_params.use_highbitdepth) {
+ if (ppi->b_calculate_blockiness) {
+ if (!cm->seq_params->use_highbitdepth) {
const double frame_blockiness =
av1_get_blockiness(orig->y_buffer, orig->y_stride, recon->y_buffer,
recon->y_stride, orig->y_width, orig->y_height);
- cpi->worst_blockiness = AOMMAX(cpi->worst_blockiness, frame_blockiness);
- cpi->total_blockiness += frame_blockiness;
+ ppi->worst_blockiness = AOMMAX(ppi->worst_blockiness, frame_blockiness);
+ ppi->total_blockiness += frame_blockiness;
}
- if (cpi->b_calculate_consistency) {
- if (!cm->seq_params.use_highbitdepth) {
+ if (ppi->b_calculate_consistency) {
+ if (!cm->seq_params->use_highbitdepth) {
const double this_inconsistency = aom_get_ssim_metrics(
orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride,
- orig->y_width, orig->y_height, cpi->ssim_vars, &cpi->metrics, 1);
+ orig->y_width, orig->y_height, ppi->ssim_vars, &ppi->metrics, 1);
const double peak = (double)((1 << in_bit_depth) - 1);
const double consistency =
- aom_sse_to_psnr(samples, peak, cpi->total_inconsistency);
+ aom_sse_to_psnr(samples, peak, ppi->total_inconsistency);
if (consistency > 0.0)
- cpi->worst_consistency =
- AOMMIN(cpi->worst_consistency, consistency);
- cpi->total_inconsistency += this_inconsistency;
+ ppi->worst_consistency =
+ AOMMIN(ppi->worst_consistency, consistency);
+ ppi->total_inconsistency += this_inconsistency;
}
}
}
frame_all =
aom_calc_fastssim(orig, recon, &y, &u, &v, bit_depth, in_bit_depth);
- adjust_image_stat(y, u, v, frame_all, &cpi->fastssim);
+ adjust_image_stat(y, u, v, frame_all, &ppi->fastssim);
frame_all = aom_psnrhvs(orig, recon, &y, &u, &v, bit_depth, in_bit_depth);
- adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs);
+ adjust_image_stat(y, u, v, frame_all, &ppi->psnrhvs);
+ }
+}
+
+void print_internal_stats(AV1_PRIMARY *const ppi) {
+ if (!ppi->cpi) return;
+ AV1_COMP *const cpi = ppi->cpi;
+
+ if (ppi->cpi->oxcf.pass != 1 &&
+ ppi->cpi->common.current_frame.frame_number > 0) {
+ aom_clear_system_state();
+ char headings[512] = { 0 };
+ char results[512] = { 0 };
+ FILE *f = fopen("opsnr.stt", "a");
+ double time_encoded =
+ (cpi->time_stamps.prev_ts_end - cpi->time_stamps.first_ts_start) /
+ 10000000.000;
+ double total_encode_time =
+ (ppi->total_time_receive_data + ppi->total_time_compress_data) /
+ 1000.000;
+ const double dr =
+ (double)ppi->total_bytes * (double)8 / (double)1000 / time_encoded;
+ const double peak =
+ (double)((1 << ppi->cpi->oxcf.input_cfg.input_bit_depth) - 1);
+ const double target_rate =
+ (double)ppi->cpi->oxcf.rc_cfg.target_bandwidth / 1000;
+ const double rate_err = ((100.0 * (dr - target_rate)) / target_rate);
+
+ if (ppi->b_calculate_psnr) {
+ const double total_psnr = aom_sse_to_psnr(
+ (double)ppi->total_samples[0], peak, (double)ppi->total_sq_error[0]);
+ const double total_ssim =
+ 100 * pow(ppi->summed_quality / ppi->summed_weights, 8.0);
+ snprintf(headings, sizeof(headings),
+ "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t"
+ "AOMSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t"
+ "WstPsnr\tWstSsim\tWstFast\tWstHVS\t"
+ "AVPsrnY\tAPsnrCb\tAPsnrCr");
+ snprintf(results, sizeof(results),
+ "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
+ "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
+ "%7.3f\t%7.3f\t%7.3f\t%7.3f\t"
+ "%7.3f\t%7.3f\t%7.3f",
+ dr, ppi->psnr[0].stat[STAT_ALL] / ppi->count[0], total_psnr,
+ ppi->psnr[0].stat[STAT_ALL] / ppi->count[0], total_psnr,
+ total_ssim, total_ssim,
+ ppi->fastssim.stat[STAT_ALL] / ppi->count[0],
+ ppi->psnrhvs.stat[STAT_ALL] / ppi->count[0], ppi->psnr[0].worst,
+ ppi->worst_ssim, ppi->fastssim.worst, ppi->psnrhvs.worst,
+ ppi->psnr[0].stat[STAT_Y] / ppi->count[0],
+ ppi->psnr[0].stat[STAT_U] / ppi->count[0],
+ ppi->psnr[0].stat[STAT_V] / ppi->count[0]);
+
+ if (ppi->b_calculate_blockiness) {
+ SNPRINT(headings, "\t Block\tWstBlck");
+ SNPRINT2(results, "\t%7.3f", ppi->total_blockiness / ppi->count[0]);
+ SNPRINT2(results, "\t%7.3f", ppi->worst_blockiness);
+ }
+
+ if (ppi->b_calculate_consistency) {
+ double consistency =
+ aom_sse_to_psnr((double)ppi->total_samples[0], peak,
+ (double)ppi->total_inconsistency);
+
+ SNPRINT(headings, "\tConsist\tWstCons");
+ SNPRINT2(results, "\t%7.3f", consistency);
+ SNPRINT2(results, "\t%7.3f", ppi->worst_consistency);
+ }
+
+ SNPRINT(headings, "\t Time\tRcErr\tAbsErr");
+ SNPRINT2(results, "\t%8.0f", total_encode_time);
+ SNPRINT2(results, " %7.2f", rate_err);
+ SNPRINT2(results, " %7.2f", fabs(rate_err));
+
+ SNPRINT(headings, "\tAPsnr611");
+ SNPRINT2(results, " %7.3f",
+ (6 * ppi->psnr[0].stat[STAT_Y] + ppi->psnr[0].stat[STAT_U] +
+ ppi->psnr[0].stat[STAT_V]) /
+ (ppi->count[0] * 8));
+
+#if CONFIG_AV1_HIGHBITDEPTH
+ const uint32_t in_bit_depth = ppi->cpi->oxcf.input_cfg.input_bit_depth;
+ const uint32_t bit_depth = ppi->seq_params.bit_depth;
+ // Since cpi->source->flags is not available here, but total_samples[1]
+ // will be non-zero if cpi->source->flags & YV12_FLAG_HIGHBITDEPTH was
+ // true in compute_internal_stats
+ if ((ppi->total_samples[1] > 0) && (in_bit_depth < bit_depth)) {
+ const double peak_hbd = (double)((1 << bit_depth) - 1);
+ const double total_psnr_hbd =
+ aom_sse_to_psnr((double)ppi->total_samples[1], peak_hbd,
+ (double)ppi->total_sq_error[1]);
+ const double total_ssim_hbd =
+ 100 * pow(ppi->summed_quality_hbd / ppi->summed_weights_hbd, 8.0);
+ SNPRINT(headings,
+ "\t AVGPsnrH GLBPsnrH AVPsnrPH GLPsnrPH"
+ " AVPsnrYH APsnrCbH APsnrCrH WstPsnrH"
+ " AOMSSIMH VPSSIMPH WstSsimH");
+ SNPRINT2(results, "\t%7.3f",
+ ppi->psnr[1].stat[STAT_ALL] / ppi->count[1]);
+ SNPRINT2(results, " %7.3f", total_psnr_hbd);
+ SNPRINT2(results, " %7.3f",
+ ppi->psnr[1].stat[STAT_ALL] / ppi->count[1]);
+ SNPRINT2(results, " %7.3f", total_psnr_hbd);
+ SNPRINT2(results, " %7.3f", ppi->psnr[1].stat[STAT_Y] / ppi->count[1]);
+ SNPRINT2(results, " %7.3f", ppi->psnr[1].stat[STAT_U] / ppi->count[1]);
+ SNPRINT2(results, " %7.3f", ppi->psnr[1].stat[STAT_V] / ppi->count[1]);
+ SNPRINT2(results, " %7.3f", ppi->psnr[1].worst);
+ SNPRINT2(results, " %7.3f", total_ssim_hbd);
+ SNPRINT2(results, " %7.3f", total_ssim_hbd);
+ SNPRINT2(results, " %7.3f", ppi->worst_ssim_hbd);
+ }
+#endif
+ fprintf(f, "%s\n", headings);
+ fprintf(f, "%s\n", results);
+ }
+
+ fclose(f);
+
+ if (ppi->ssim_vars != NULL) {
+ aom_free(ppi->ssim_vars);
+ ppi->ssim_vars = NULL;
+ }
}
}
#endif // CONFIG_INTERNAL_STATS
+void av1_post_encode_updates(AV1_COMP *const cpi, size_t size,
+ int64_t time_stamp, int64_t time_end) {
+ AV1_PRIMARY *const ppi = cpi->ppi;
+ AV1_COMMON *const cm = &cpi->common;
+ // Note *size = 0 indicates a dropped frame for which psnr is not calculated
+ if (ppi->b_calculate_psnr && size > 0) {
+ if (cm->show_existing_frame ||
+ (!is_stat_generation_stage(cpi) && cm->show_frame)) {
+ generate_psnr_packet(cpi);
+ }
+ }
+
+ if (ppi->level_params.keep_level_stats && !is_stat_generation_stage(cpi)) {
+ // Initialize level info. at the beginning of each sequence.
+ if (cm->current_frame.frame_type == KEY_FRAME && !cpi->no_show_fwd_kf) {
+ av1_init_level_info(cpi);
+ }
+ av1_update_level_info(cpi, size, time_stamp, time_end);
+ }
+
+#if CONFIG_INTERNAL_STATS
+ if (!is_stat_generation_stage(cpi)) {
+ compute_internal_stats(cpi, (int)size);
+ }
+#endif // CONFIG_INTERNAL_STATS
+}
+
int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
- size_t *size, uint8_t *dest, int64_t *time_stamp,
- int64_t *time_end, int flush,
+ size_t *size, size_t avail_size, uint8_t *dest,
+ int64_t *time_stamp, int64_t *time_end, int flush,
const aom_rational64_t *timestamp_ratio) {
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
AV1_COMMON *const cm = &cpi->common;
+#if CONFIG_INTERNAL_STATS
+ cpi->frame_recode_hits = 0;
+ cpi->time_compress_data = 0;
+ cpi->bytes = 0;
+#endif
+#if CONFIG_ENTROPY_STATS
+ if (cpi->compressor_stage == ENCODE_STAGE) {
+ av1_zero(cpi->counts);
+ }
+#endif
+
#if CONFIG_BITSTREAM_DEBUG
assert(cpi->oxcf.max_threads <= 1 &&
"bitstream debug tool does not support multithreading");
@@ -3685,12 +3918,13 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
aom_bitstream_queue_set_frame_write(cm->current_frame.order_hint * 2 +
cm->show_frame);
#endif
- if (cpi->use_svc && cm->number_spatial_layers > 1) {
+ if (cpi->ppi->use_svc && cpi->ppi->number_spatial_layers > 1) {
av1_one_pass_cbr_svc_start_layer(cpi);
}
cm->showable_frame = 0;
*size = 0;
+ cpi->available_bs_size = avail_size;
#if CONFIG_INTERNAL_STATS
struct aom_usec_timer cmptimer;
aom_usec_timer_start(&cmptimer);
@@ -3763,27 +3997,9 @@ int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
aom_usec_timer_mark(&cmptimer);
cpi->time_compress_data += aom_usec_timer_elapsed(&cmptimer);
#endif // CONFIG_INTERNAL_STATS
- // Note *size = 0 indicates a dropped frame for which psnr is not calculated
- if (cpi->b_calculate_psnr && *size > 0) {
- if (cm->show_existing_frame ||
- (!is_stat_generation_stage(cpi) && cm->show_frame)) {
- generate_psnr_packet(cpi);
- }
- }
- if (cpi->level_params.keep_level_stats && !is_stat_generation_stage(cpi)) {
- // Initialize level info. at the beginning of each sequence.
- if (cm->current_frame.frame_type == KEY_FRAME && !cpi->no_show_fwd_kf) {
- av1_init_level_info(cpi);
- }
- av1_update_level_info(cpi, *size, *time_stamp, *time_end);
- }
+ av1_post_encode_updates(cpi, *size, *time_stamp, *time_end);
-#if CONFIG_INTERNAL_STATS
- if (!is_stat_generation_stage(cpi)) {
- compute_internal_stats(cpi, (int)(*size));
- }
-#endif // CONFIG_INTERNAL_STATS
#if CONFIG_SPEED_STATS
if (!is_stat_generation_stage(cpi) && !cm->show_existing_frame) {
cpi->tx_search_count += cpi->td.mb.txfm_search_info.tx_search_count;
@@ -3806,8 +4022,8 @@ int av1_get_preview_raw_frame(AV1_COMP *cpi, YV12_BUFFER_CONFIG *dest) {
*dest = cm->cur_frame->buf;
dest->y_width = cm->width;
dest->y_height = cm->height;
- dest->uv_width = cm->width >> cm->seq_params.subsampling_x;
- dest->uv_height = cm->height >> cm->seq_params.subsampling_y;
+ dest->uv_width = cm->width >> cm->seq_params->subsampling_x;
+ dest->uv_height = cm->height >> cm->seq_params->subsampling_y;
ret = 0;
} else {
ret = -1;
@@ -3829,12 +4045,12 @@ aom_codec_err_t av1_copy_new_frame_enc(AV1_COMMON *cm,
YV12_BUFFER_CONFIG *sd) {
const int num_planes = av1_num_planes(cm);
if (!equal_dimensions_and_border(new_frame, sd))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Incorrect buffer dimensions");
else
aom_yv12_copy_frame(new_frame, sd, num_planes);
- return cm->error.error_code;
+ return cm->error->error_code;
}
int av1_set_internal_size(AV1EncoderConfig *const oxcf,
@@ -3919,7 +4135,7 @@ int av1_convert_sect5obus_to_annexb(uint8_t *buffer, size_t *frame_size) {
return AOM_CODEC_OK;
}
-static void svc_set_updates_external_ref_frame_config(
+static void svc_set_updates_ref_frame_config(
ExtRefreshFrameFlagsInfo *const ext_refresh_frame_flags, SVC *const svc) {
ext_refresh_frame_flags->update_pending = 1;
ext_refresh_frame_flags->last_frame = svc->refresh[svc->ref_idx[0]];
@@ -3980,7 +4196,7 @@ void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags) {
av1_use_as_reference(&ext_flags->ref_frame_flags, ref);
} else {
- if (cpi->svc.external_ref_frame_config) {
+ if (cpi->svc.set_ref_frame_config) {
int ref = svc_set_references_external_ref_frame_config(cpi);
av1_use_as_reference(&ext_flags->ref_frame_flags, ref);
}
@@ -4008,9 +4224,8 @@ void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags) {
ext_refresh_frame_flags->alt2_ref_frame = (upd & AOM_ALT2_FLAG) != 0;
ext_refresh_frame_flags->update_pending = 1;
} else {
- if (cpi->svc.external_ref_frame_config)
- svc_set_updates_external_ref_frame_config(ext_refresh_frame_flags,
- &cpi->svc);
+ if (cpi->svc.set_ref_frame_config)
+ svc_set_updates_ref_frame_config(ext_refresh_frame_flags, &cpi->svc);
else
ext_refresh_frame_flags->update_pending = 0;
}
@@ -4030,12 +4245,12 @@ void av1_apply_encoding_flags(AV1_COMP *cpi, aom_enc_frame_flags_t flags) {
}
}
-aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi) {
- if (!cpi) return NULL;
+aom_fixed_buf_t *av1_get_global_headers(AV1_PRIMARY *ppi) {
+ if (!ppi) return NULL;
uint8_t header_buf[512] = { 0 };
const uint32_t sequence_header_size =
- av1_write_sequence_header_obu(&cpi->common.seq_params, &header_buf[0]);
+ av1_write_sequence_header_obu(&ppi->seq_params, &header_buf[0]);
assert(sequence_header_size <= sizeof(header_buf));
if (sequence_header_size == 0) return NULL;
@@ -4046,7 +4261,8 @@ aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi) {
if (payload_offset + sequence_header_size > sizeof(header_buf)) return NULL;
memmove(&header_buf[payload_offset], &header_buf[0], sequence_header_size);
- if (av1_write_obu_header(&cpi->level_params, OBU_SEQUENCE_HEADER, 0,
+ if (av1_write_obu_header(&ppi->level_params, &ppi->cpi->frame_header_count,
+ OBU_SEQUENCE_HEADER, 0,
&header_buf[0]) != obu_header_size) {
return NULL;
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/encoder.h b/third_party/libaom/source/libaom/av1/encoder/encoder.h
index 905470f437..fe6e76f498 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encoder.h
+++ b/third_party/libaom/source/libaom/av1/encoder/encoder.h
@@ -35,6 +35,7 @@
#include "av1/encoder/block.h"
#include "av1/encoder/context_tree.h"
#include "av1/encoder/encodemb.h"
+#include "av1/encoder/external_partition.h"
#include "av1/encoder/firstpass.h"
#include "av1/encoder/global_motion.h"
#include "av1/encoder/level.h"
@@ -49,6 +50,7 @@
#include "av1/encoder/tokenize.h"
#include "av1/encoder/tpl_model.h"
#include "av1/encoder/av1_noise_estimate.h"
+#include "av1/encoder/bitstream.h"
#if CONFIG_INTERNAL_STATS
#include "aom_dsp/ssim.h"
@@ -119,6 +121,26 @@ enum {
FRAMEFLAGS_ERROR_RESILIENT = 1 << 6,
} UENUM1BYTE(FRAMETYPE_FLAGS);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+// 0 level frames are sometimes used for rate control purposes, but for
+// reference mapping purposes, the minimum level should be 1.
+#define MIN_PYR_LEVEL 1
+static INLINE int get_true_pyr_level(int frame_level, int frame_order,
+ int max_layer_depth) {
+ if (frame_order == 0) {
+ // Keyframe case
+ return MIN_PYR_LEVEL;
+ } else if (frame_level == MAX_ARF_LAYERS) {
+ // Leaves
+ return max_layer_depth;
+ } else if (frame_level == (MAX_ARF_LAYERS + 1)) {
+ // Altrefs
+ return MIN_PYR_LEVEL;
+ }
+ return AOMMAX(MIN_PYR_LEVEL, frame_level);
+}
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
enum {
NO_AQ = 0,
VARIANCE_AQ = 1,
@@ -159,13 +181,6 @@ enum {
/*!\cond */
typedef enum {
- COST_UPD_SB,
- COST_UPD_SBROW,
- COST_UPD_TILE,
- COST_UPD_OFF,
-} COST_UPDATE_TYPE;
-
-typedef enum {
MOD_FP, // First pass
MOD_TF, // Temporal filtering
MOD_TPL, // TPL
@@ -173,12 +188,24 @@ typedef enum {
MOD_ENC, // Encode stage
MOD_LPF, // Deblocking loop filter
MOD_CDEF_SEARCH, // CDEF search
+ MOD_CDEF, // CDEF frame
MOD_LR, // Loop restoration filtering
+ MOD_PACK_BS, // Pack bitstream
NUM_MT_MODULES
} MULTI_THREADED_MODULES;
/*!\endcond */
+/*!\enum COST_UPDATE_TYPE
+ * \brief This enum controls how often the entropy costs should be updated.
+ */
+typedef enum {
+ COST_UPD_SB, /*!< Update every sb. */
+ COST_UPD_SBROW, /*!< Update every sb rows inside a tile. */
+ COST_UPD_TILE, /*!< Update every tile. */
+ COST_UPD_OFF, /*!< Turn off cost updates. */
+} COST_UPDATE_TYPE;
+
/*!
* \brief Encoder config related to resize.
*/
@@ -623,6 +650,8 @@ typedef struct {
COST_UPDATE_TYPE mode;
// Indicates the update frequency for mv costs.
COST_UPDATE_TYPE mv;
+ // Indicates the update frequency for dv costs.
+ COST_UPDATE_TYPE dv;
} CostUpdateFreq;
typedef struct {
@@ -711,7 +740,10 @@ typedef struct {
*/
typedef struct {
/*!
- * Indicates the loop filter sharpness.
+ * Controls the level at which rate-distortion optimization of transform
+ * coefficients favours sharpness in the block. Has no impact on RD when set
+ * to zero (default). For values 1-7, eob and skip block optimization are
+ * avoided and rdmult is adjusted in favour of block sharpness.
*/
int sharpness;
@@ -940,6 +972,10 @@ typedef struct AV1EncoderConfig {
// format.
bool save_as_annexb;
+ // The path for partition stats reading and writing, used in the experiment
+ // CONFIG_PARTITION_SEARCH_ORDER.
+ const char *partition_info_path;
+
/*!\endcond */
} AV1EncoderConfig;
@@ -1267,6 +1303,7 @@ typedef struct TileDataEnc {
TileInfo tile_info;
DECLARE_ALIGNED(16, FRAME_CONTEXT, tctx);
FRAME_CONTEXT *row_ctx;
+ uint64_t abs_sum_level;
uint8_t allow_update_cdf;
InterModeRdModel inter_mode_rd_models[BLOCK_SIZES_ALL];
AV1EncRowMultiThreadSync row_mt_sync;
@@ -1295,14 +1332,23 @@ typedef struct ThreadData {
PALETTE_BUFFER *palette_buffer;
CompoundTypeRdBuffers comp_rd_buffer;
CONV_BUF_TYPE *tmp_conv_dst;
+ uint64_t abs_sum_level;
uint8_t *tmp_pred_bufs[2];
int intrabc_used;
int deltaq_used;
+ int coefficient_size;
+ int max_mv_magnitude;
+ int interp_filter_selected[SWITCHABLE];
FRAME_CONTEXT *tctx;
VP64x64 *vt64x64;
int32_t num_64x64_blocks;
PICK_MODE_CONTEXT *firstpass_ctx;
TemporalFilterData tf_data;
+ TplTxfmStats tpl_txfm_stats;
+ // Pointer to the array of structures to store gradient information of each
+ // pixel in a superblock. The buffer constitutes of MAX_SB_SQUARE pixel level
+ // structures for each of the plane types (PLANE_TYPE_Y and PLANE_TYPE_UV).
+ PixelLevelGradientInfo *pixel_gradient_info;
} ThreadData;
struct EncWorkerData;
@@ -1427,6 +1473,11 @@ typedef struct MultiThreadInfo {
AV1LrSync lr_row_sync;
/*!
+ * Pack bitstream multi-threading object.
+ */
+ AV1EncPackBSSync pack_bs_sync;
+
+ /*!
* Global Motion multi-threading object.
*/
AV1GlobalMotionSync gm_sync;
@@ -1440,6 +1491,11 @@ typedef struct MultiThreadInfo {
* CDEF search multi-threading object.
*/
AV1CdefSync cdef_sync;
+
+ /*!
+ * CDEF row multi-threading data.
+ */
+ AV1CdefWorkerData *cdef_worker;
} MultiThreadInfo;
/*!\cond */
@@ -1561,10 +1617,13 @@ enum {
rd_pick_sb_modes_time,
av1_rd_pick_intra_mode_sb_time,
av1_rd_pick_inter_mode_sb_time,
+ set_params_rd_pick_inter_mode_time,
+ skip_inter_mode_time,
handle_inter_mode_time,
evaluate_motion_mode_for_winner_candidates_time,
- handle_intra_mode_time,
do_tx_search_time,
+ handle_intra_mode_time,
+ refine_winner_mode_tx_time,
av1_search_palette_mode_time,
handle_newmv_time,
compound_type_rd_time,
@@ -1609,11 +1668,15 @@ static INLINE char const *get_component_name(int index) {
return "av1_rd_pick_intra_mode_sb_time";
case av1_rd_pick_inter_mode_sb_time:
return "av1_rd_pick_inter_mode_sb_time";
+ case set_params_rd_pick_inter_mode_time:
+ return "set_params_rd_pick_inter_mode_time";
+ case skip_inter_mode_time: return "skip_inter_mode_time";
case handle_inter_mode_time: return "handle_inter_mode_time";
case evaluate_motion_mode_for_winner_candidates_time:
return "evaluate_motion_mode_for_winner_candidates_time";
- case handle_intra_mode_time: return "handle_intra_mode_time";
case do_tx_search_time: return "do_tx_search_time";
+ case handle_intra_mode_time: return "handle_intra_mode_time";
+ case refine_winner_mode_tx_time: return "refine_winner_mode_tx_time";
case av1_search_palette_mode_time: return "av1_search_palette_mode_time";
case handle_newmv_time: return "handle_newmv_time";
case compound_type_rd_time: return "compound_type_rd_time";
@@ -2045,12 +2108,88 @@ typedef struct {
uint8_t *entropy_ctx;
} CoeffBufferPool;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+/*!
+ * \brief Max number of frames that can be encoded in a parallel encode set.
+ */
+#define MAX_PARALLEL_FRAMES 4
+
+/*!
+ * \brief Structure to hold data of frame encoded in a given parallel encode
+ * set.
+ */
+typedef struct AV1_FP_OUT_DATA {
+ /*!
+ * Buffer to store packed bitstream data of a frame.
+ */
+ unsigned char *cx_data_frame;
+
+ /*!
+ * Allocated size of the cx_data_frame buffer.
+ */
+ size_t cx_data_sz;
+
+ /*!
+ * Size of data written in the cx_data_frame buffer.
+ */
+ size_t frame_size;
+
+ /*!
+ * Display order hint of frame whose packed data is in cx_data_frame buffer.
+ */
+ int frame_display_order_hint;
+} AV1_FP_OUT_DATA;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
/*!
* \brief Top level primary encoder structure
*/
typedef struct AV1_PRIMARY {
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ /*!
+ * Array of frame level encoder stage top level structures
+ */
+ struct AV1_COMP *parallel_cpi[MAX_PARALLEL_FRAMES];
+
+ /*!
+ * Number of frame level contexts(cpis)
+ */
+ int num_fp_contexts;
+
+ /*!
+ * Array of structures to hold data of frames encoded in a given parallel
+ * encode set.
+ */
+ struct AV1_FP_OUT_DATA parallel_frames_data[MAX_PARALLEL_FRAMES - 1];
+
+ /*!
+ * Loopfilter levels of the previous encoded frame.
+ */
+ int filter_level[2];
+ int filter_level_u;
+ int filter_level_v;
+
+ /*!
+ * Largest MV component used in previous encoded frame during
+ * stats consumption stage.
+ */
+ int max_mv_magnitude;
+
+ /*!
+ * Temporary variable simulating the delayed frame_probability update.
+ */
+ FrameProbInfo temp_frame_probs;
+
+ /*!
+ * Temporary variable used in simulating the delayed update of
+ * avg_frame_qindex.
+ */
+ int temp_avg_frame_qindex[FRAME_TYPES];
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
/*!
* Encode stage top level structure
+ * When CONFIG_FRAME_PARALLEL_ENCODE is enabled this is the same as
+ * parallel_cpi[0]
*/
struct AV1_COMP *cpi;
@@ -2063,6 +2202,186 @@ typedef struct AV1_PRIMARY {
* Look-ahead context.
*/
struct lookahead_ctx *lookahead;
+
+ /*!
+ * Sequence parameters have been transmitted already and locked
+ * or not. Once locked av1_change_config cannot change the seq
+ * parameters.
+ */
+ int seq_params_locked;
+
+ /*!
+ * Pointer to internal utility functions that manipulate aom_codec_* data
+ * structures.
+ */
+ struct aom_codec_pkt_list *output_pkt_list;
+
+ /*!
+ * When set, indicates that internal ARFs are enabled.
+ */
+ int internal_altref_allowed;
+
+ /*!
+ * Information related to a gf group.
+ */
+ GF_GROUP gf_group;
+
+ /*!
+ * Track prior gf group state.
+ */
+ GF_STATE gf_state;
+
+ /*!
+ * Flag indicating whether look ahead processing (LAP) is enabled.
+ */
+ int lap_enabled;
+
+ /*!
+ * Parameters for AV1 bitstream levels.
+ */
+ AV1LevelParams level_params;
+
+ /*!
+ * Calculates PSNR on each frame when set to 1.
+ */
+ int b_calculate_psnr;
+
+ /*!
+ * Number of frames left to be encoded, is 0 if limit is not set.
+ */
+ int frames_left;
+
+ /*!
+ * Information related to two pass encoding.
+ */
+ TWO_PASS twopass;
+
+ /*!
+ * Rate control related parameters.
+ */
+ PRIMARY_RATE_CONTROL p_rc;
+
+ /*!
+ * Frame buffer holding the temporally filtered source frame. It can be KEY
+ * frame or ARF frame.
+ */
+ YV12_BUFFER_CONFIG alt_ref_buffer;
+
+ /*!
+ * Elements part of the sequence header, that are applicable for all the
+ * frames in the video.
+ */
+ SequenceHeader seq_params;
+
+ /*!
+ * Indicates whether to use SVC.
+ */
+ int use_svc;
+
+ /*!
+ * If true, buffer removal times are present.
+ */
+ bool buffer_removal_time_present;
+
+ /*!
+ * Number of temporal layers: may be > 1 for SVC (scalable vector coding).
+ */
+ unsigned int number_temporal_layers;
+
+ /*!
+ * Number of spatial layers: may be > 1 for SVC (scalable vector coding).
+ */
+ unsigned int number_spatial_layers;
+
+ /*!
+ * Code and details about current error status.
+ */
+ struct aom_internal_error_info error;
+
+ /*!
+ * Function pointers to variants of sse/sad/variance computation functions.
+ * fn_ptr[i] indicates the list of function pointers corresponding to block
+ * size i.
+ */
+ aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL];
+
+ /*!
+ * Scaling factors used in the RD multiplier modulation.
+ * TODO(sdeng): consider merge the following arrays.
+ * tpl_rdmult_scaling_factors is a temporary buffer used to store the
+ * intermediate scaling factors which are used in the calculation of
+ * tpl_sb_rdmult_scaling_factors. tpl_rdmult_scaling_factors[i] stores the
+ * intermediate scaling factor of the ith 16 x 16 block in raster scan order.
+ */
+ double *tpl_rdmult_scaling_factors;
+
+ /*!
+ * tpl_sb_rdmult_scaling_factors[i] stores the RD multiplier scaling factor of
+ * the ith 16 x 16 block in raster scan order.
+ */
+ double *tpl_sb_rdmult_scaling_factors;
+
+ /*!
+ * Parameters related to tpl.
+ */
+ TplParams tpl_data;
+
+ /*!
+ * Motion vector stats of the previous encoded frame.
+ */
+ MV_STATS mv_stats;
+
+#if CONFIG_INTERNAL_STATS
+ /*!\cond */
+ uint64_t total_time_receive_data;
+ uint64_t total_time_compress_data;
+
+ unsigned int total_mode_chosen_counts[MAX_MODES];
+
+ int count[2];
+ uint64_t total_sq_error[2];
+ uint64_t total_samples[2];
+ ImageStat psnr[2];
+
+ double total_blockiness;
+ double worst_blockiness;
+
+ int total_bytes;
+ double summed_quality;
+ double summed_weights;
+ double summed_quality_hbd;
+ double summed_weights_hbd;
+ unsigned int total_recode_hits;
+ double worst_ssim;
+ double worst_ssim_hbd;
+
+ ImageStat fastssim;
+ ImageStat psnrhvs;
+
+ int b_calculate_blockiness;
+ int b_calculate_consistency;
+
+ double total_inconsistency;
+ double worst_consistency;
+ Ssimv *ssim_vars;
+ Metrics metrics;
+ /*!\endcond */
+#endif
+
+#if CONFIG_ENTROPY_STATS
+ /*!
+ * Aggregates frame counts for the sequence.
+ */
+ FRAME_COUNTS aggregate_fc;
+#endif // CONFIG_ENTROPY_STATS
+
+ /*!
+ * For each type of reference frame, this contains the index of a reference
+ * frame buffer for a reference frame of the same type. We use this to
+ * choose our primary reference frame (which is the most recent reference
+ * frame of the same type as the current frame).
+ */
+ int fb_of_context_type[REF_FRAMES];
} AV1_PRIMARY;
/*!
@@ -2173,9 +2492,9 @@ typedef struct AV1_COMP {
YV12_BUFFER_CONFIG *unfiltered_source;
/*!
- * Parameters related to tpl.
+ * Skip tpl setup when tpl data from gop length decision can be reused.
*/
- TplParams tpl_data;
+ int skip_tpl_setup_stats;
/*!
* Temporal filter context.
@@ -2209,14 +2528,6 @@ typedef struct AV1_COMP {
RefreshFrameFlagsInfo refresh_frame;
/*!
- * For each type of reference frame, this contains the index of a reference
- * frame buffer for a reference frame of the same type. We use this to
- * choose our primary reference frame (which is the most recent reference
- * frame of the same type as the current frame).
- */
- int fb_of_context_type[REF_FRAMES];
-
- /*!
* Flags signalled by the external interface at frame level.
*/
ExternalFlags ext_flags;
@@ -2275,12 +2586,6 @@ typedef struct AV1_COMP {
double framerate;
/*!
- * Pointer to internal utility functions that manipulate aom_codec_* data
- * structures.
- */
- struct aom_codec_pkt_list *output_pkt_list;
-
- /*!
* Bitmask indicating which reference buffers may be referenced by this frame.
*/
int ref_frame_flags;
@@ -2322,26 +2627,9 @@ typedef struct AV1_COMP {
ActiveMap active_map;
/*!
- * Function pointers to variants of sse/sad/variance computation functions.
- * fn_ptr[i] indicates the list of function pointers corresponding to block
- * size i.
- */
- aom_variance_fn_ptr_t fn_ptr[BLOCK_SIZES_ALL];
-
- /*!
- * Information related to two pass encoding.
- */
- TWO_PASS twopass;
-
- /*!
- * Information related to a gf group.
- */
- GF_GROUP gf_group;
-
- /*!
- * Track prior gf group state.
+ * The frame processing order within a GOP.
*/
- GF_STATE gf_state;
+ unsigned char gf_frame_index;
/*!
* To control the reference frame buffer and selection.
@@ -2349,58 +2637,20 @@ typedef struct AV1_COMP {
RefBufferStack ref_buffer_stack;
/*!
- * Frame buffer holding the temporally filtered source frame. It can be KEY
- * frame or ARF frame.
- */
- YV12_BUFFER_CONFIG alt_ref_buffer;
-
- /*!
* Tell if OVERLAY frame shows existing alt_ref frame.
*/
int show_existing_alt_ref;
#if CONFIG_INTERNAL_STATS
/*!\cond */
- uint64_t time_receive_data;
uint64_t time_compress_data;
unsigned int mode_chosen_counts[MAX_MODES];
-
- int count[2];
- uint64_t total_sq_error[2];
- uint64_t total_samples[2];
- ImageStat psnr[2];
-
- double total_blockiness;
- double worst_blockiness;
-
int bytes;
- double summed_quality;
- double summed_weights;
- double summed_quality_hbd;
- double summed_weights_hbd;
- unsigned int tot_recode_hits;
- double worst_ssim;
- double worst_ssim_hbd;
-
- ImageStat fastssim;
- ImageStat psnrhvs;
-
- int b_calculate_blockiness;
- int b_calculate_consistency;
-
- double total_inconsistency;
- double worst_consistency;
- Ssimv *ssim_vars;
- Metrics metrics;
+ unsigned int frame_recode_hits;
/*!\endcond */
#endif
- /*!
- * Calculates PSNR on each frame when set to 1.
- */
- int b_calculate_psnr;
-
#if CONFIG_SPEED_STATS
/*!
* For debugging: number of transform searches we have performed.
@@ -2458,13 +2708,6 @@ typedef struct AV1_COMP {
TokenInfo token_info;
/*!
- * Sequence parameters have been transmitted already and locked
- * or not. Once locked av1_change_config cannot change the seq
- * parameters.
- */
- int seq_params_locked;
-
- /*!
* VARIANCE_AQ segment map refresh.
*/
int vaq_refresh;
@@ -2492,21 +2735,11 @@ typedef struct AV1_COMP {
int existing_fb_idx_to_show;
/*!
- * When set, indicates that internal ARFs are enabled.
- */
- int internal_altref_allowed;
-
- /*!
* A flag to indicate if intrabc is ever used in current frame.
*/
int intrabc_used;
/*!
- * Tables to calculate IntraBC MV cost.
- */
- IntraBCMVCosts dv_costs;
-
- /*!
* Mark which ref frames can be skipped for encoding current frame during RDO.
*/
int prune_ref_frame_mask;
@@ -2571,9 +2804,9 @@ typedef struct AV1_COMP {
#endif
/*!
- * Parameters for AV1 bitstream levels.
+ * Count the number of OBU_FRAME and OBU_FRAME_HEADER for level calculation.
*/
- AV1LevelParams level_params;
+ int frame_header_count;
/*!
* Whether any no-zero delta_q was actually used.
@@ -2586,20 +2819,6 @@ typedef struct AV1_COMP {
RefFrameDistanceInfo ref_frame_dist_info;
/*!
- * Scaling factors used in the RD multiplier modulation.
- * TODO(sdeng): consider merge the following arrays.
- * tpl_rdmult_scaling_factors is a temporary buffer used to store the
- * intermediate scaling factors which are used in the calculation of
- * tpl_sb_rdmult_scaling_factors. tpl_rdmult_scaling_factors[i] stores the
- * intermediate scaling factor of the ith 16 x 16 block in raster scan order.
- */
- double *tpl_rdmult_scaling_factors;
- /*!
- * tpl_sb_rdmult_scaling_factors[i] stores the RD multiplier scaling factor of
- * the ith 16 x 16 block in raster scan order.
- */
- double *tpl_sb_rdmult_scaling_factors;
- /*!
* ssim_rdmult_scaling_factors[i] stores the RD multiplier scaling factor of
* the ith 16 x 16 block in raster scan order. This scaling factor is used for
* RD multiplier modulation when SSIM tuning is enabled.
@@ -2621,30 +2840,16 @@ typedef struct AV1_COMP {
#endif
/*!
- * Indicates whether to use SVC.
- */
- int use_svc;
- /*!
* Parameters for scalable video coding.
*/
SVC svc;
/*!
- * Flag indicating whether look ahead processing (LAP) is enabled.
- */
- int lap_enabled;
- /*!
* Indicates whether current processing stage is encode stage or LAP stage.
*/
COMPRESSOR_STAGE compressor_stage;
/*!
- * Some motion vector stats from the last encoded frame to help us decide what
- * precision to use to encode the current frame.
- */
- MV_STATS mv_stats;
-
- /*!
* Frame type of the last frame. May be used in some heuristics for speeding
* up the encoding.
*/
@@ -2686,14 +2891,35 @@ typedef struct AV1_COMP {
uint8_t *consec_zero_mv;
/*!
- * Number of frames left to be encoded, is 0 if limit is not set.
+ * Block size of first pass encoding
*/
- int frames_left;
+ BLOCK_SIZE fp_block_size;
/*!
- * Block size of first pass encoding
+ * The counter of encoded super block, used to differentiate block names.
+ * This number starts from 0 and increases whenever a super block is encoded.
*/
- BLOCK_SIZE fp_block_size;
+ int sb_counter;
+
+ /*!
+ * Available bitstream buffer size in bytes
+ */
+ size_t available_bs_size;
+
+ /*!
+ * The controller of the external partition model.
+ * It is used to do partition type selection based on external models.
+ */
+ ExtPartController ext_part_controller;
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ /*!
+ * A flag to indicate frames that will update their data to the primary
+ * context at the end of the encode. It is set for non-parallel frames and the
+ * last frame in encode order in a given parallel encode set.
+ */
+ bool do_frame_data_update;
+#endif
} AV1_COMP;
/*!
@@ -2773,26 +2999,39 @@ void av1_initialize_enc(void);
struct AV1_COMP *av1_create_compressor(AV1_PRIMARY *ppi, AV1EncoderConfig *oxcf,
BufferPool *const pool,
- FIRSTPASS_STATS *frame_stats_buf,
COMPRESSOR_STAGE stage,
- int num_lap_buffers,
- int lap_lag_in_frames,
- STATS_BUFFER_CTX *stats_buf_context);
+ int lap_lag_in_frames);
-struct AV1_PRIMARY *av1_create_primary_compressor();
+struct AV1_PRIMARY *av1_create_primary_compressor(
+ struct aom_codec_pkt_list *pkt_list_head, int num_lap_buffers,
+ AV1EncoderConfig *oxcf);
void av1_remove_compressor(AV1_COMP *cpi);
void av1_remove_primary_compressor(AV1_PRIMARY *ppi);
-void av1_change_config(AV1_COMP *cpi, const AV1EncoderConfig *oxcf);
+#if CONFIG_ENTROPY_STATS
+void print_entropy_stats(AV1_PRIMARY *const ppi);
+#endif
+#if CONFIG_INTERNAL_STATS
+void print_internal_stats(AV1_PRIMARY *ppi);
+#endif
+
+void av1_change_config_seq(AV1_PRIMARY *ppi, const AV1EncoderConfig *oxcf,
+ bool *sb_size_changed);
+
+void av1_change_config(AV1_COMP *cpi, const AV1EncoderConfig *oxcf,
+ bool sb_size_changed);
void av1_check_initial_width(AV1_COMP *cpi, int use_highbitdepth,
int subsampling_x, int subsampling_y);
-void av1_init_seq_coding_tools(SequenceHeader *seq, AV1_COMMON *cm,
+void av1_init_seq_coding_tools(AV1_PRIMARY *const ppi,
const AV1EncoderConfig *oxcf, int use_svc);
+void av1_post_encode_updates(AV1_COMP *const cpi, size_t size,
+ int64_t time_stamp, int64_t time_end);
+
/*!\endcond */
/*!\brief Obtain the raw frame data
@@ -2827,6 +3066,7 @@ int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
* \param[in] cpi Top-level encoder structure
* \param[in] frame_flags Flags to decide how to encoding the frame
* \param[in] size Bitstream size
+ * \param[in] avail_size Available bitstream buffer size
* \param[in] dest Bitstream output
* \param[out] time_stamp Time stamp of the frame
* \param[out] time_end Time end
@@ -2840,8 +3080,8 @@ int av1_receive_raw_frame(AV1_COMP *cpi, aom_enc_frame_flags_t frame_flags,
* \retval #AOM_CODEC_ERROR
*/
int av1_get_compressed_data(AV1_COMP *cpi, unsigned int *frame_flags,
- size_t *size, uint8_t *dest, int64_t *time_stamp,
- int64_t *time_end, int flush,
+ size_t *size, size_t avail_size, uint8_t *dest,
+ int64_t *time_stamp, int64_t *time_end, int flush,
const aom_rational64_t *timebase);
/*!\brief Run 1-pass/2-pass encoding
@@ -2902,6 +3142,71 @@ void av1_set_screen_content_options(struct AV1_COMP *cpi,
void av1_update_frame_size(AV1_COMP *cpi);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+typedef struct {
+ int pyr_level;
+ int disp_order;
+} RefFrameMapPair;
+
+static INLINE void init_ref_map_pair(
+ AV1_COMP *cpi, RefFrameMapPair ref_frame_map_pairs[REF_FRAMES]) {
+ if (cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == KF_UPDATE) {
+ memset(ref_frame_map_pairs, -1, sizeof(*ref_frame_map_pairs) * REF_FRAMES);
+ return;
+ }
+ memset(ref_frame_map_pairs, 0, sizeof(*ref_frame_map_pairs) * REF_FRAMES);
+ for (int map_idx = 0; map_idx < REF_FRAMES; map_idx++) {
+ // Get reference frame buffer.
+ const RefCntBuffer *const buf = cpi->common.ref_frame_map[map_idx];
+ if (ref_frame_map_pairs[map_idx].disp_order == -1) continue;
+ if (buf == NULL) {
+ ref_frame_map_pairs[map_idx].disp_order = -1;
+ ref_frame_map_pairs[map_idx].pyr_level = -1;
+ continue;
+ } else if (buf->ref_count > 1) {
+ // Once the keyframe is coded, the slots in ref_frame_map will all
+ // point to the same frame. In that case, all subsequent pointers
+ // matching the current are considered "free" slots. This will find
+ // the next occurance of the current pointer if ref_count indicates
+ // there are multiple instances of it and mark it as free.
+ for (int idx2 = map_idx + 1; idx2 < REF_FRAMES; ++idx2) {
+ const RefCntBuffer *const buf2 = cpi->common.ref_frame_map[idx2];
+ if (buf2 == buf) {
+ ref_frame_map_pairs[idx2].disp_order = -1;
+ ref_frame_map_pairs[idx2].pyr_level = -1;
+ }
+ }
+ }
+ ref_frame_map_pairs[map_idx].disp_order = (int)buf->display_order_hint;
+ ref_frame_map_pairs[map_idx].pyr_level = buf->pyramid_level;
+ }
+}
+
+static AOM_INLINE void calc_frame_data_update_flag(
+ GF_GROUP *const gf_group, int gf_frame_index,
+ bool *const do_frame_data_update) {
+ *do_frame_data_update = true;
+ // Set the flag to false for all frames in a given parallel encode set except
+ // the last frame in the set with frame_parallel_level = 2.
+ if (gf_group->frame_parallel_level[gf_frame_index] == 1) {
+ *do_frame_data_update = false;
+ } else if (gf_group->frame_parallel_level[gf_frame_index] == 2) {
+ // Check if this is the last frame in the set with frame_parallel_level = 2.
+ for (int i = gf_frame_index + 1; i < gf_group->size; i++) {
+ if ((gf_group->frame_parallel_level[i] == 0 &&
+ (gf_group->update_type[i] == ARF_UPDATE ||
+ gf_group->update_type[i] == INTNL_ARF_UPDATE)) ||
+ gf_group->frame_parallel_level[i] == 1) {
+ break;
+ } else if (gf_group->frame_parallel_level[i] == 2) {
+ *do_frame_data_update = false;
+ break;
+ }
+ }
+ }
+}
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
// TODO(jingning): Move these functions as primitive members for the new cpi
// class.
static INLINE void stack_push(int *stack, int *stack_size, int item) {
@@ -2949,8 +3254,9 @@ ticks_to_timebase_units(const aom_rational64_t *timestamp_ratio, int64_t n) {
}
static INLINE int frame_is_kf_gf_arf(const AV1_COMP *cpi) {
- const GF_GROUP *const gf_group = &cpi->gf_group;
- const FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_group->index];
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ const FRAME_UPDATE_TYPE update_type =
+ gf_group->update_type[cpi->gf_frame_index];
return frame_is_intra_only(&cpi->common) || update_type == ARF_UPDATE ||
update_type == GF_UPDATE;
@@ -3009,10 +3315,25 @@ static INLINE int is_altref_enabled(int lag_in_frames, bool enable_auto_arf) {
return lag_in_frames >= ALT_MIN_LAG && enable_auto_arf;
}
+static AOM_INLINE int can_disable_altref(const GFConfig *gf_cfg) {
+ return is_altref_enabled(gf_cfg->lag_in_frames, gf_cfg->enable_auto_arf) &&
+ (gf_cfg->gf_min_pyr_height == 0);
+}
+
+static AOM_INLINE int use_ml_model_to_decide_flat_gop(
+ const RateControlCfg *rc_cfg) {
+ return (rc_cfg->mode == AOM_Q && rc_cfg->cq_level <= 200);
+}
+
+// Helper function to compute number of blocks on either side of the frame.
+static INLINE int get_num_blocks(const int frame_length, const int mb_length) {
+ return (frame_length + mb_length - 1) / mb_length;
+}
+
// Check if statistics generation stage
static INLINE int is_stat_generation_stage(const AV1_COMP *const cpi) {
assert(IMPLIES(cpi->compressor_stage == LAP_STAGE,
- cpi->oxcf.pass == 0 && cpi->lap_enabled));
+ cpi->oxcf.pass == 0 && cpi->ppi->lap_enabled));
return (cpi->oxcf.pass == 1 || (cpi->compressor_stage == LAP_STAGE));
}
// Check if statistics consumption stage
@@ -3024,7 +3345,7 @@ static INLINE int is_stat_consumption_stage_twopass(const AV1_COMP *const cpi) {
static INLINE int is_stat_consumption_stage(const AV1_COMP *const cpi) {
return (is_stat_consumption_stage_twopass(cpi) ||
(cpi->oxcf.pass == 0 && (cpi->compressor_stage == ENCODE_STAGE) &&
- cpi->lap_enabled));
+ cpi->ppi->lap_enabled));
}
/*!\endcond */
@@ -3037,11 +3358,18 @@ static INLINE int is_stat_consumption_stage(const AV1_COMP *const cpi) {
* \return 0 if no stats for current stage else 1
*/
static INLINE int has_no_stats_stage(const AV1_COMP *const cpi) {
- assert(IMPLIES(!cpi->lap_enabled, cpi->compressor_stage == ENCODE_STAGE));
- return (cpi->oxcf.pass == 0 && !cpi->lap_enabled);
+ assert(
+ IMPLIES(!cpi->ppi->lap_enabled, cpi->compressor_stage == ENCODE_STAGE));
+ return (cpi->oxcf.pass == 0 && !cpi->ppi->lap_enabled);
}
+
/*!\cond */
+static INLINE int is_one_pass_rt_params(const AV1_COMP *cpi) {
+ return has_no_stats_stage(cpi) && cpi->oxcf.mode == REALTIME &&
+ cpi->oxcf.gf_cfg.lag_in_frames == 0;
+}
+
// Function return size of frame stats buffer
static INLINE int get_stats_buf_size(int num_lap_buffer, int num_lag_buffer) {
/* if lookahead is enabled return num_lap_buffers else num_lag_buffers */
@@ -3208,7 +3536,7 @@ static INLINE int get_ref_frame_flags(const SPEED_FEATURES *const sf,
// Note: The OBU returned is in Low Overhead Bitstream Format. Specifically,
// the obu_has_size_field bit is set, and the buffer contains the obu_size
// field.
-aom_fixed_buf_t *av1_get_global_headers(AV1_COMP *cpi);
+aom_fixed_buf_t *av1_get_global_headers(AV1_PRIMARY *ppi);
#define MAX_GFUBOOST_FACTOR 10.0
#define MIN_GFUBOOST_FACTOR 4.0
@@ -3229,9 +3557,9 @@ static INLINE int is_frame_eligible_for_ref_pruning(const GF_GROUP *gf_group,
}
// Get update type of the current frame.
-static INLINE FRAME_UPDATE_TYPE
-get_frame_update_type(const GF_GROUP *gf_group) {
- return gf_group->update_type[gf_group->index];
+static INLINE FRAME_UPDATE_TYPE get_frame_update_type(const GF_GROUP *gf_group,
+ int gf_frame_index) {
+ return gf_group->update_type[gf_frame_index];
}
static INLINE int av1_pixels_to_mi(int pixels) {
@@ -3241,14 +3569,15 @@ static INLINE int av1_pixels_to_mi(int pixels) {
static AOM_INLINE int is_psnr_calc_enabled(const AV1_COMP *cpi) {
const AV1_COMMON *const cm = &cpi->common;
- return cpi->b_calculate_psnr && !is_stat_generation_stage(cpi) &&
+ return cpi->ppi->b_calculate_psnr && !is_stat_generation_stage(cpi) &&
cm->show_frame;
}
#if CONFIG_AV1_TEMPORAL_DENOISING
static INLINE int denoise_svc(const struct AV1_COMP *const cpi) {
- return (!cpi->use_svc || (cpi->use_svc && cpi->svc.spatial_layer_id >=
- cpi->svc.first_layer_denoise));
+ return (!cpi->ppi->use_svc ||
+ (cpi->ppi->use_svc &&
+ cpi->svc.spatial_layer_id >= cpi->svc.first_layer_denoise));
}
#endif
diff --git a/third_party/libaom/source/libaom/av1/encoder/encoder_alloc.h b/third_party/libaom/source/libaom/av1/encoder/encoder_alloc.h
index eae34e0fe6..6eb44e7ee1 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encoder_alloc.h
+++ b/third_party/libaom/source/libaom/av1/encoder/encoder_alloc.h
@@ -56,7 +56,7 @@ static AOM_INLINE void alloc_compressor_data(AV1_COMP *cpi) {
TokenInfo *token_info = &cpi->token_info;
if (av1_alloc_context_buffers(cm, cm->width, cm->height)) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate context buffers");
}
@@ -78,6 +78,13 @@ static AOM_INLINE void alloc_compressor_data(AV1_COMP *cpi) {
CHECK_MEM_ERROR(cm, cpi->td.mb.mv_costs,
(MvCosts *)aom_calloc(1, sizeof(MvCosts)));
+ if (cpi->td.mb.dv_costs) {
+ aom_free(cpi->td.mb.dv_costs);
+ cpi->td.mb.dv_costs = NULL;
+ }
+ CHECK_MEM_ERROR(cm, cpi->td.mb.dv_costs,
+ (IntraBCMVCosts *)aom_malloc(sizeof(*cpi->td.mb.dv_costs)));
+
av1_setup_shared_coeff_buffer(&cpi->common, &cpi->td.shared_coeff_buf);
av1_setup_sms_tree(cpi, &cpi->td);
cpi->td.firstpass_ctx =
@@ -186,19 +193,10 @@ static AOM_INLINE void dealloc_compressor_data(AV1_COMP *cpi) {
aom_free(cpi->ssim_rdmult_scaling_factors);
cpi->ssim_rdmult_scaling_factors = NULL;
- aom_free(cpi->tpl_rdmult_scaling_factors);
- cpi->tpl_rdmult_scaling_factors = NULL;
-
- aom_free(cpi->tpl_sb_rdmult_scaling_factors);
- cpi->tpl_sb_rdmult_scaling_factors = NULL;
-
#if CONFIG_TUNE_VMAF
aom_free(cpi->vmaf_info.rdmult_scaling_factors);
cpi->vmaf_info.rdmult_scaling_factors = NULL;
-
-#if CONFIG_USE_VMAF_RC
- aom_close_vmaf_model_rc(cpi->vmaf_info.vmaf_model);
-#endif
+ aom_close_vmaf_model(cpi->vmaf_info.vmaf_model);
#endif
#if CONFIG_TUNE_BUTTERAUGLI
@@ -215,6 +213,11 @@ static AOM_INLINE void dealloc_compressor_data(AV1_COMP *cpi) {
cpi->td.mb.mv_costs = NULL;
}
+ if (cpi->td.mb.dv_costs) {
+ aom_free(cpi->td.mb.dv_costs);
+ cpi->td.mb.dv_costs = NULL;
+ }
+
aom_free(cpi->td.mb.inter_modes_info);
cpi->td.mb.inter_modes_info = NULL;
@@ -235,7 +238,6 @@ static AOM_INLINE void dealloc_compressor_data(AV1_COMP *cpi) {
av1_free_pmc(cpi->td.firstpass_ctx, av1_num_planes(cm));
cpi->td.firstpass_ctx = NULL;
- av1_free_ref_frame_buffers(cm->buffer_pool);
av1_free_txb_buf(cpi);
av1_free_context_buffers(cm);
@@ -243,10 +245,15 @@ static AOM_INLINE void dealloc_compressor_data(AV1_COMP *cpi) {
#if !CONFIG_REALTIME_ONLY
av1_free_restoration_buffers(cm);
#endif
+
+ if (!is_stat_generation_stage(cpi))
+ av1_free_cdef_buffers(cm, &cpi->mt_info.cdef_worker,
+ &cpi->mt_info.cdef_sync,
+ cpi->mt_info.num_mod_workers[MOD_CDEF]);
+
aom_free_frame_buffer(&cpi->trial_frame_rst);
aom_free_frame_buffer(&cpi->scaled_source);
aom_free_frame_buffer(&cpi->scaled_last_source);
- aom_free_frame_buffer(&cpi->alt_ref_buffer);
free_token_info(token_info);
@@ -259,6 +266,7 @@ static AOM_INLINE void dealloc_compressor_data(AV1_COMP *cpi) {
for (int j = 0; j < 2; ++j) {
aom_free(cpi->td.mb.tmp_pred_bufs[j]);
}
+ aom_free(cpi->td.mb.pixel_gradient_info);
#if CONFIG_DENOISE
if (cpi->denoise_and_model) {
@@ -271,11 +279,7 @@ static AOM_INLINE void dealloc_compressor_data(AV1_COMP *cpi) {
cpi->film_grain_table = NULL;
}
- for (int i = 0; i < MAX_NUM_OPERATING_POINTS; ++i) {
- aom_free(cpi->level_params.level_info[i]);
- }
-
- if (cpi->use_svc) av1_free_svc_cyclic_refresh(cpi);
+ if (cpi->ppi->use_svc) av1_free_svc_cyclic_refresh(cpi);
if (cpi->consec_zero_mv) {
aom_free(cpi->consec_zero_mv);
@@ -285,7 +289,7 @@ static AOM_INLINE void dealloc_compressor_data(AV1_COMP *cpi) {
static AOM_INLINE void variance_partition_alloc(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
- const int num_64x64_blocks = (cm->seq_params.sb_size == BLOCK_64X64) ? 1 : 4;
+ const int num_64x64_blocks = (cm->seq_params->sb_size == BLOCK_64X64) ? 1 : 4;
if (cpi->td.vt64x64) {
if (num_64x64_blocks != cpi->td.num_64x64_blocks) {
aom_free(cpi->td.vt64x64);
@@ -301,7 +305,7 @@ static AOM_INLINE void variance_partition_alloc(AV1_COMP *cpi) {
static AOM_INLINE void alloc_altref_frame_buffer(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const AV1EncoderConfig *oxcf = &cpi->oxcf;
// When lag_in_frames <= 1, alt-ref frames are not enabled. In this case,
@@ -311,29 +315,29 @@ static AOM_INLINE void alloc_altref_frame_buffer(AV1_COMP *cpi) {
// TODO(agrange) Check if ARF is enabled and skip allocation if not.
if (aom_realloc_frame_buffer(
- &cpi->alt_ref_buffer, oxcf->frm_dim_cfg.width,
+ &cpi->ppi->alt_ref_buffer, oxcf->frm_dim_cfg.width,
oxcf->frm_dim_cfg.height, seq_params->subsampling_x,
seq_params->subsampling_y, seq_params->use_highbitdepth,
cpi->oxcf.border_in_pixels, cm->features.byte_alignment, NULL, NULL,
NULL, cpi->oxcf.tool_cfg.enable_global_motion))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate altref buffer");
}
static AOM_INLINE void alloc_util_frame_buffers(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const int byte_alignment = cm->features.byte_alignment;
if (aom_realloc_frame_buffer(
&cpi->last_frame_uf, cm->width, cm->height, seq_params->subsampling_x,
seq_params->subsampling_y, seq_params->use_highbitdepth,
cpi->oxcf.border_in_pixels, byte_alignment, NULL, NULL, NULL, 0))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate last frame buffer");
// The frame buffer trial_frame_rst is used during loop restoration filter
// search. Hence it is allocated only when loop restoration is used.
- const int use_restoration = cm->seq_params.enable_restoration &&
+ const int use_restoration = cm->seq_params->enable_restoration &&
!cm->features.all_lossless &&
!cm->tiles.large_scale;
if (use_restoration) {
@@ -342,7 +346,7 @@ static AOM_INLINE void alloc_util_frame_buffers(AV1_COMP *cpi) {
cm->superres_upscaled_height, seq_params->subsampling_x,
seq_params->subsampling_y, seq_params->use_highbitdepth,
AOM_RESTORATION_FRAME_BORDER, byte_alignment, NULL, NULL, NULL, 0))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate trial restored frame buffer");
}
@@ -351,7 +355,7 @@ static AOM_INLINE void alloc_util_frame_buffers(AV1_COMP *cpi) {
seq_params->subsampling_y, seq_params->use_highbitdepth,
cpi->oxcf.border_in_pixels, byte_alignment, NULL, NULL, NULL,
cpi->oxcf.tool_cfg.enable_global_motion))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate scaled source buffer");
// The frame buffer cpi->scaled_last_source is used to hold the previous
@@ -367,7 +371,7 @@ static AOM_INLINE void alloc_util_frame_buffers(AV1_COMP *cpi) {
seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
byte_alignment, NULL, NULL, NULL,
cpi->oxcf.tool_cfg.enable_global_motion))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate scaled last source buffer");
}
}
@@ -384,16 +388,16 @@ static AOM_INLINE YV12_BUFFER_CONFIG *realloc_and_scale_source(
if (aom_realloc_frame_buffer(
&cpi->scaled_source, scaled_width, scaled_height,
- cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
- cm->seq_params.use_highbitdepth, AOM_BORDER_IN_PIXELS,
+ cm->seq_params->subsampling_x, cm->seq_params->subsampling_y,
+ cm->seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
cm->features.byte_alignment, NULL, NULL, NULL,
cpi->oxcf.tool_cfg.enable_global_motion))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to reallocate scaled source buffer");
assert(cpi->scaled_source.y_crop_width == scaled_width);
assert(cpi->scaled_source.y_crop_height == scaled_height);
av1_resize_and_extend_frame_nonnormative(
- cpi->unscaled_source, &cpi->scaled_source, (int)cm->seq_params.bit_depth,
+ cpi->unscaled_source, &cpi->scaled_source, (int)cm->seq_params->bit_depth,
num_planes);
return &cpi->scaled_source;
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/encoder_utils.c b/third_party/libaom/source/libaom/av1/encoder/encoder_utils.c
index 7a7e8505b4..557268f9d3 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encoder_utils.c
+++ b/third_party/libaom/source/libaom/av1/encoder/encoder_utils.c
@@ -344,7 +344,7 @@ static void configure_static_seg_features(AV1_COMP *cpi) {
seg->update_data = 1;
qi_delta = av1_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
av1_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2);
av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_H, -2);
av1_set_segdata(seg, 1, SEG_LVL_ALT_LF_Y_V, -2);
@@ -459,13 +459,13 @@ void av1_apply_active_map(AV1_COMP *cpi) {
#if !CONFIG_REALTIME_ONLY
static void process_tpl_stats_frame(AV1_COMP *cpi) {
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
AV1_COMMON *const cm = &cpi->common;
- assert(IMPLIES(gf_group->size > 0, gf_group->index < gf_group->size));
+ assert(IMPLIES(gf_group->size > 0, cpi->gf_frame_index < gf_group->size));
- const int tpl_idx = gf_group->index;
- TplParams *const tpl_data = &cpi->tpl_data;
+ const int tpl_idx = cpi->gf_frame_index;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
@@ -497,22 +497,23 @@ static void process_tpl_stats_frame(AV1_COMP *cpi) {
} else {
aom_clear_system_state();
cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
- if (is_frame_tpl_eligible(gf_group, gf_group->index)) {
- if (cpi->lap_enabled) {
- double min_boost_factor = sqrt(cpi->rc.baseline_gf_interval);
+ if (is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) {
+ if (cpi->ppi->lap_enabled) {
+ double min_boost_factor = sqrt(cpi->ppi->p_rc.baseline_gf_interval);
const int gfu_boost = get_gfu_boost_from_r0_lap(
min_boost_factor, MAX_GFUBOOST_FACTOR, cpi->rd.r0,
- cpi->rc.num_stats_required_for_gfu_boost);
+ cpi->ppi->p_rc.num_stats_required_for_gfu_boost);
// printf("old boost %d new boost %d\n", cpi->rc.gfu_boost,
// gfu_boost);
- cpi->rc.gfu_boost = combine_prior_with_tpl_boost(
- min_boost_factor, MAX_BOOST_COMBINE_FACTOR, cpi->rc.gfu_boost,
- gfu_boost, cpi->rc.num_stats_used_for_gfu_boost);
+ cpi->ppi->p_rc.gfu_boost = combine_prior_with_tpl_boost(
+ min_boost_factor, MAX_BOOST_COMBINE_FACTOR,
+ cpi->ppi->p_rc.gfu_boost, gfu_boost,
+ cpi->ppi->p_rc.num_stats_used_for_gfu_boost);
} else {
const int gfu_boost = (int)(200.0 / cpi->rd.r0);
- cpi->rc.gfu_boost = combine_prior_with_tpl_boost(
+ cpi->ppi->p_rc.gfu_boost = combine_prior_with_tpl_boost(
MIN_BOOST_COMBINE_FACTOR, MAX_BOOST_COMBINE_FACTOR,
- cpi->rc.gfu_boost, gfu_boost, cpi->rc.frames_to_key);
+ cpi->ppi->p_rc.gfu_boost, gfu_boost, cpi->rc.frames_to_key);
}
}
aom_clear_system_state();
@@ -529,17 +530,17 @@ void av1_set_size_dependent_vars(AV1_COMP *cpi, int *q, int *bottom_index,
av1_set_speed_features_framesize_dependent(cpi, cpi->speed);
#if !CONFIG_REALTIME_ONLY
- GF_GROUP *gf_group = &cpi->gf_group;
+ GF_GROUP *gf_group = &cpi->ppi->gf_group;
if (cpi->oxcf.algo_cfg.enable_tpl_model &&
- is_frame_tpl_eligible(gf_group, gf_group->index)) {
+ is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) {
process_tpl_stats_frame(cpi);
av1_tpl_rdmult_setup(cpi);
}
#endif
// Decide q and q bounds.
- *q = av1_rc_pick_q_and_bounds(cpi, &cpi->rc, cm->width, cm->height,
- cpi->gf_group.index, bottom_index, top_index);
+ *q = av1_rc_pick_q_and_bounds(cpi, cm->width, cm->height, cpi->gf_frame_index,
+ bottom_index, top_index);
// Configure experimental use of segmentation for enhanced coding of
// static regions if indicated.
@@ -564,6 +565,23 @@ static void reset_film_grain_chroma_params(aom_film_grain_t *pars) {
memset(pars->ar_coeffs_cb, 0, sizeof(pars->ar_coeffs_cb));
}
+void av1_update_film_grain_parameters_seq(struct AV1_PRIMARY *ppi,
+ const AV1EncoderConfig *oxcf) {
+ SequenceHeader *const seq_params = &ppi->seq_params;
+ const TuneCfg *const tune_cfg = &oxcf->tune_cfg;
+
+ if (tune_cfg->film_grain_test_vector || tune_cfg->film_grain_table_filename ||
+ tune_cfg->content == AOM_CONTENT_FILM) {
+ seq_params->film_grain_params_present = 1;
+ } else {
+#if CONFIG_DENOISE
+ seq_params->film_grain_params_present = (oxcf->noise_level > 0);
+#else
+ seq_params->film_grain_params_present = 0;
+#endif
+ }
+}
+
void av1_update_film_grain_parameters(struct AV1_COMP *cpi,
const AV1EncoderConfig *oxcf) {
AV1_COMMON *const cm = &cpi->common;
@@ -577,39 +595,30 @@ void av1_update_film_grain_parameters(struct AV1_COMP *cpi,
}
if (tune_cfg->film_grain_test_vector) {
- cm->seq_params.film_grain_params_present = 1;
if (cm->current_frame.frame_type == KEY_FRAME) {
memcpy(&cm->film_grain_params,
film_grain_test_vectors + tune_cfg->film_grain_test_vector - 1,
sizeof(cm->film_grain_params));
if (oxcf->tool_cfg.enable_monochrome)
reset_film_grain_chroma_params(&cm->film_grain_params);
- cm->film_grain_params.bit_depth = cm->seq_params.bit_depth;
- if (cm->seq_params.color_range == AOM_CR_FULL_RANGE) {
+ cm->film_grain_params.bit_depth = cm->seq_params->bit_depth;
+ if (cm->seq_params->color_range == AOM_CR_FULL_RANGE) {
cm->film_grain_params.clip_to_restricted_range = 0;
}
}
} else if (tune_cfg->film_grain_table_filename) {
- cm->seq_params.film_grain_params_present = 1;
-
cpi->film_grain_table = aom_malloc(sizeof(*cpi->film_grain_table));
memset(cpi->film_grain_table, 0, sizeof(aom_film_grain_table_t));
aom_film_grain_table_read(cpi->film_grain_table,
- tune_cfg->film_grain_table_filename, &cm->error);
+ tune_cfg->film_grain_table_filename, cm->error);
} else if (tune_cfg->content == AOM_CONTENT_FILM) {
- cm->seq_params.film_grain_params_present = 1;
- cm->film_grain_params.bit_depth = cm->seq_params.bit_depth;
+ cm->film_grain_params.bit_depth = cm->seq_params->bit_depth;
if (oxcf->tool_cfg.enable_monochrome)
reset_film_grain_chroma_params(&cm->film_grain_params);
- if (cm->seq_params.color_range == AOM_CR_FULL_RANGE)
+ if (cm->seq_params->color_range == AOM_CR_FULL_RANGE)
cm->film_grain_params.clip_to_restricted_range = 0;
} else {
-#if CONFIG_DENOISE
- cm->seq_params.film_grain_params_present = (cpi->oxcf.noise_level > 0);
-#else
- cm->seq_params.film_grain_params_present = 0;
-#endif
memset(&cm->film_grain_params, 0, sizeof(cm->film_grain_params));
}
}
@@ -643,7 +652,7 @@ void av1_scale_references(AV1_COMP *cpi, const InterpFilter filter,
if (aom_yv12_realloc_with_new_border(
&ref_fb->buf, AOM_BORDER_IN_PIXELS,
cm->features.byte_alignment, num_planes) != 0) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
}
}
@@ -652,7 +661,7 @@ void av1_scale_references(AV1_COMP *cpi, const InterpFilter filter,
if (new_fb == NULL) {
const int new_fb_idx = get_free_fb(cm);
if (new_fb_idx == INVALID_IDX) {
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Unable to find free frame buffer");
}
force_scaling = 1;
@@ -663,30 +672,30 @@ void av1_scale_references(AV1_COMP *cpi, const InterpFilter filter,
new_fb->buf.y_crop_height != cm->height) {
if (aom_realloc_frame_buffer(
&new_fb->buf, cm->width, cm->height,
- cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
- cm->seq_params.use_highbitdepth, AOM_BORDER_IN_PIXELS,
+ cm->seq_params->subsampling_x, cm->seq_params->subsampling_y,
+ cm->seq_params->use_highbitdepth, AOM_BORDER_IN_PIXELS,
cm->features.byte_alignment, NULL, NULL, NULL, 0)) {
if (force_scaling) {
// Release the reference acquired in the get_free_fb() call above.
--new_fb->ref_count;
}
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
}
#if CONFIG_AV1_HIGHBITDEPTH
- if (use_optimized_scaler && cm->seq_params.bit_depth == AOM_BITS_8)
+ if (use_optimized_scaler && cm->seq_params->bit_depth == AOM_BITS_8)
av1_resize_and_extend_frame(ref, &new_fb->buf, filter, phase,
num_planes);
else
av1_resize_and_extend_frame_nonnormative(
- ref, &new_fb->buf, (int)cm->seq_params.bit_depth, num_planes);
+ ref, &new_fb->buf, (int)cm->seq_params->bit_depth, num_planes);
#else
if (use_optimized_scaler)
av1_resize_and_extend_frame(ref, &new_fb->buf, filter, phase,
num_planes);
else
av1_resize_and_extend_frame_nonnormative(
- ref, &new_fb->buf, (int)cm->seq_params.bit_depth, num_planes);
+ ref, &new_fb->buf, (int)cm->seq_params->bit_depth, num_planes);
#endif
cpi->scaled_ref_buf[ref_frame - 1] = new_fb;
alloc_frame_mvs(cm, new_fb);
@@ -704,10 +713,8 @@ void av1_scale_references(AV1_COMP *cpi, const InterpFilter filter,
}
}
-BLOCK_SIZE av1_select_sb_size(const AV1_COMP *const cpi) {
- const AV1_COMMON *const cm = &cpi->common;
- const AV1EncoderConfig *const oxcf = &cpi->oxcf;
-
+BLOCK_SIZE av1_select_sb_size(const AV1EncoderConfig *const oxcf, int width,
+ int height, int number_spatial_layers) {
if (oxcf->tool_cfg.superblock_size == AOM_SUPERBLOCK_SIZE_64X64)
return BLOCK_64X64;
if (oxcf->tool_cfg.superblock_size == AOM_SUPERBLOCK_SIZE_128X128)
@@ -715,7 +722,7 @@ BLOCK_SIZE av1_select_sb_size(const AV1_COMP *const cpi) {
assert(oxcf->tool_cfg.superblock_size == AOM_SUPERBLOCK_SIZE_DYNAMIC);
- if (cpi->svc.number_spatial_layers > 1 ||
+ if (number_spatial_layers > 1 ||
oxcf->resize_cfg.resize_mode != RESIZE_NONE) {
// Use the configured size (top resolution) for spatial layers or
// on resize.
@@ -732,7 +739,7 @@ BLOCK_SIZE av1_select_sb_size(const AV1_COMP *const cpi) {
// speed-feature.
if (oxcf->superres_cfg.superres_mode == AOM_SUPERRES_NONE &&
oxcf->resize_cfg.resize_mode == RESIZE_NONE && oxcf->speed >= 1) {
- return AOMMIN(cm->width, cm->height) > 480 ? BLOCK_128X128 : BLOCK_64X64;
+ return AOMMIN(width, height) > 480 ? BLOCK_128X128 : BLOCK_64X64;
}
return BLOCK_128X128;
@@ -753,8 +760,10 @@ void av1_setup_frame(AV1_COMP *cpi) {
if ((cm->current_frame.frame_type == KEY_FRAME && cm->show_frame) ||
frame_is_sframe(cm)) {
- if (!cpi->seq_params_locked) {
- set_sb_size(&cm->seq_params, av1_select_sb_size(cpi));
+ if (!cpi->ppi->seq_params_locked) {
+ set_sb_size(cm->seq_params,
+ av1_select_sb_size(&cpi->oxcf, cm->width, cm->height,
+ cpi->svc.number_spatial_layers));
}
} else {
const RefCntBuffer *const primary_ref_buf = get_primary_ref_frame_buf(cm);
@@ -959,7 +968,7 @@ void av1_determine_sc_tools_with_encoding(AV1_COMP *cpi, const int q_orig) {
av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
av1_set_variance_partition_thresholds(cpi, q_for_screen_content_quick_run,
0);
@@ -1005,13 +1014,13 @@ void av1_finalize_encoded_frame(AV1_COMP *const cpi) {
AV1_COMMON *const cm = &cpi->common;
CurrentFrame *const current_frame = &cm->current_frame;
- if (!cm->seq_params.reduced_still_picture_hdr &&
+ if (!cm->seq_params->reduced_still_picture_hdr &&
encode_show_existing_frame(cm)) {
RefCntBuffer *const frame_to_show =
cm->ref_frame_map[cpi->existing_fb_idx_to_show];
if (frame_to_show == NULL) {
- aom_internal_error(&cm->error, AOM_CODEC_UNSUP_BITSTREAM,
+ aom_internal_error(cm->error, AOM_CODEC_UNSUP_BITSTREAM,
"Buffer does not contain a reconstructed frame");
}
assert(frame_to_show->ref_count > 0);
@@ -1019,7 +1028,7 @@ void av1_finalize_encoded_frame(AV1_COMP *const cpi) {
}
if (!encode_show_existing_frame(cm) &&
- cm->seq_params.film_grain_params_present &&
+ cm->seq_params->film_grain_params_present &&
(cm->show_frame || cm->showable_frame)) {
// Copy the current frame's film grain params to the its corresponding
// RefCntBuffer slot.
@@ -1232,7 +1241,7 @@ static void save_extra_coding_context(AV1_COMP *cpi) {
cc->lf = cm->lf;
cc->cdef_info = cm->cdef_info;
cc->rc = cpi->rc;
- cc->mv_stats = cpi->mv_stats;
+ cc->mv_stats = cpi->ppi->mv_stats;
}
void av1_save_all_coding_context(AV1_COMP *cpi) {
@@ -1301,11 +1310,11 @@ void av1_dump_filtered_recon_frames(AV1_COMP *cpi) {
"show_frame=%d, show_existing_frame=%d, source_alt_ref_active=%d, "
"refresh_alt_ref_frame=%d, "
"y_stride=%4d, uv_stride=%4d, cm->width=%4d, cm->height=%4d\n\n",
- current_frame->frame_number, cpi->gf_group.index,
- cpi->gf_group.update_type[cpi->gf_group.index], current_frame->order_hint,
- cm->show_frame, cm->show_existing_frame, cpi->rc.source_alt_ref_active,
- cpi->refresh_frame.alt_ref_frame, recon_buf->y_stride,
- recon_buf->uv_stride, cm->width, cm->height);
+ current_frame->frame_number, cpi->gf_frame_index,
+ cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
+ current_frame->order_hint, cm->show_frame, cm->show_existing_frame,
+ cpi->rc.source_alt_ref_active, cpi->refresh_frame.alt_ref_frame,
+ recon_buf->y_stride, recon_buf->uv_stride, cm->width, cm->height);
#if 0
int ref_frame;
printf("get_ref_frame_map_idx: [");
diff --git a/third_party/libaom/source/libaom/av1/encoder/encoder_utils.h b/third_party/libaom/source/libaom/av1/encoder/encoder_utils.h
index 40652e956c..e75bc79ba6 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encoder_utils.h
+++ b/third_party/libaom/source/libaom/av1/encoder/encoder_utils.h
@@ -125,14 +125,14 @@ static AOM_INLINE void init_buffer_indices(
}
#define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, JSDAF, JSVAF) \
- cpi->fn_ptr[BT].sdf = SDF; \
- cpi->fn_ptr[BT].sdaf = SDAF; \
- cpi->fn_ptr[BT].vf = VF; \
- cpi->fn_ptr[BT].svf = SVF; \
- cpi->fn_ptr[BT].svaf = SVAF; \
- cpi->fn_ptr[BT].sdx4df = SDX4DF; \
- cpi->fn_ptr[BT].jsdaf = JSDAF; \
- cpi->fn_ptr[BT].jsvaf = JSVAF;
+ ppi->fn_ptr[BT].sdf = SDF; \
+ ppi->fn_ptr[BT].sdaf = SDAF; \
+ ppi->fn_ptr[BT].vf = VF; \
+ ppi->fn_ptr[BT].svf = SVF; \
+ ppi->fn_ptr[BT].svaf = SVAF; \
+ ppi->fn_ptr[BT].sdx4df = SDX4DF; \
+ ppi->fn_ptr[BT].jsdaf = JSDAF; \
+ ppi->fn_ptr[BT].jsvaf = JSVAF;
#define HIGHBD_BFP_WRAPPER(WIDTH, HEIGHT, BD) \
HIGHBD_BFP( \
@@ -325,8 +325,8 @@ MAKE_BFP_JSADAVG_WRAPPER(aom_highbd_dist_wtd_sad64x16_avg)
#endif // CONFIG_AV1_HIGHBITDEPTH
#define HIGHBD_MBFP(BT, MCSDF, MCSVF) \
- cpi->fn_ptr[BT].msdf = MCSDF; \
- cpi->fn_ptr[BT].msvf = MCSVF;
+ ppi->fn_ptr[BT].msdf = MCSDF; \
+ ppi->fn_ptr[BT].msvf = MCSVF;
#define HIGHBD_MBFP_WRAPPER(WIDTH, HEIGHT, BD) \
HIGHBD_MBFP(BLOCK_##WIDTH##X##HEIGHT, \
@@ -386,8 +386,8 @@ MAKE_MBFP_COMPOUND_SAD_WRAPPER(aom_highbd_masked_sad64x16)
#endif
#define HIGHBD_SDSFP(BT, SDSF, SDSX4DF) \
- cpi->fn_ptr[BT].sdsf = SDSF; \
- cpi->fn_ptr[BT].sdsx4df = SDSX4DF;
+ ppi->fn_ptr[BT].sdsf = SDSF; \
+ ppi->fn_ptr[BT].sdsx4df = SDSX4DF;
#define HIGHBD_SDSFP_WRAPPER(WIDTH, HEIGHT, BD) \
HIGHBD_SDSFP(BLOCK_##WIDTH##X##HEIGHT, \
@@ -487,9 +487,9 @@ MAKE_SDSF_SKIP_SAD_4D_WRAPPER(aom_highbd_sad_skip_8x32x4d)
aom_highbd_obmc_sub_pixel_variance##WIDTH##x##HEIGHT)
#define HIGHBD_OBFP(BT, OSDF, OVF, OSVF) \
- cpi->fn_ptr[BT].osdf = OSDF; \
- cpi->fn_ptr[BT].ovf = OVF; \
- cpi->fn_ptr[BT].osvf = OSVF;
+ ppi->fn_ptr[BT].osdf = OSDF; \
+ ppi->fn_ptr[BT].ovf = OVF; \
+ ppi->fn_ptr[BT].osvf = OSVF;
#define HIGHBD_OBFP_WRAPPER(WIDTH, HEIGHT, BD) \
HIGHBD_OBFP(BLOCK_##WIDTH##X##HEIGHT, \
@@ -542,10 +542,10 @@ MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad16x64)
MAKE_OBFP_SAD_WRAPPER(aom_highbd_obmc_sad64x16)
#endif
-static AOM_INLINE void highbd_set_var_fns(AV1_COMP *const cpi) {
- AV1_COMMON *const cm = &cpi->common;
- if (cm->seq_params.use_highbitdepth) {
- switch (cm->seq_params.bit_depth) {
+static AOM_INLINE void highbd_set_var_fns(AV1_PRIMARY *const ppi) {
+ SequenceHeader *const seq_params = &ppi->seq_params;
+ if (seq_params->use_highbitdepth) {
+ switch (seq_params->bit_depth) {
case AOM_BITS_8:
#if !CONFIG_REALTIME_ONLY
HIGHBD_BFP_WRAPPER(64, 16, 8)
@@ -850,7 +850,7 @@ static AOM_INLINE void highbd_set_var_fns(AV1_COMP *const cpi) {
default:
assert(0 &&
- "cm->seq_params.bit_depth should be AOM_BITS_8, "
+ "cm->seq_params->bit_depth should be AOM_BITS_8, "
"AOM_BITS_10 or AOM_BITS_12");
}
}
@@ -873,6 +873,33 @@ static AOM_INLINE void copy_frame_prob_info(AV1_COMP *cpi) {
av1_copy(frame_probs->switchable_interp_probs,
default_switchable_interp_probs);
}
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ FrameProbInfo *const temp_frame_probs = &cpi->ppi->temp_frame_probs;
+ if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats) {
+ av1_copy(temp_frame_probs->tx_type_probs, default_tx_type_probs);
+ }
+ if (cpi->sf.inter_sf.prune_obmc_prob_thresh > 0 &&
+ cpi->sf.inter_sf.prune_obmc_prob_thresh < INT_MAX) {
+ av1_copy(temp_frame_probs->obmc_probs, default_obmc_probs);
+ }
+ if (cpi->sf.inter_sf.prune_warped_prob_thresh > 0) {
+ av1_copy(temp_frame_probs->warped_probs, default_warped_probs);
+ }
+ if (cpi->sf.interp_sf.adaptive_interp_filter_search == 2) {
+ av1_copy(temp_frame_probs->switchable_interp_probs,
+ default_switchable_interp_probs);
+ }
+#endif
+}
+
+static AOM_INLINE void restore_cdef_coding_context(CdefInfo *const dst,
+ const CdefInfo *const src) {
+ dst->cdef_bits = src->cdef_bits;
+ dst->cdef_damping = src->cdef_damping;
+ av1_copy(dst->cdef_strengths, src->cdef_strengths);
+ av1_copy(dst->cdef_uv_strengths, src->cdef_uv_strengths);
+ dst->nb_cdef_strengths = src->nb_cdef_strengths;
}
// Coding context that only needs to be restored when recode loop includes
@@ -882,9 +909,9 @@ static AOM_INLINE void restore_extra_coding_context(AV1_COMP *cpi) {
CODING_CONTEXT *const cc = &cpi->coding_context;
AV1_COMMON *cm = &cpi->common;
cm->lf = cc->lf;
- cm->cdef_info = cc->cdef_info;
+ restore_cdef_coding_context(&cm->cdef_info, &cc->cdef_info);
cpi->rc = cc->rc;
- cpi->mv_stats = cc->mv_stats;
+ cpi->ppi->mv_stats = cc->mv_stats;
}
static AOM_INLINE int equal_dimensions_and_border(const YV12_BUFFER_CONFIG *a,
@@ -964,6 +991,8 @@ static AOM_INLINE void refresh_reference_frames(AV1_COMP *cpi) {
}
}
+void av1_update_film_grain_parameters_seq(struct AV1_PRIMARY *ppi,
+ const AV1EncoderConfig *oxcf);
void av1_update_film_grain_parameters(struct AV1_COMP *cpi,
const AV1EncoderConfig *oxcf);
@@ -972,7 +1001,8 @@ void av1_scale_references(AV1_COMP *cpi, const InterpFilter filter,
void av1_setup_frame(AV1_COMP *cpi);
-BLOCK_SIZE av1_select_sb_size(const AV1_COMP *const cpi);
+BLOCK_SIZE av1_select_sb_size(const AV1EncoderConfig *const oxcf, int width,
+ int height, int number_spatial_layers);
void av1_apply_active_map(AV1_COMP *cpi);
diff --git a/third_party/libaom/source/libaom/av1/encoder/encodetxb.c b/third_party/libaom/source/libaom/av1/encoder/encodetxb.c
index 7b0b281c80..0eb134890e 100644
--- a/third_party/libaom/source/libaom/av1/encoder/encodetxb.c
+++ b/third_party/libaom/source/libaom/av1/encoder/encodetxb.c
@@ -26,11 +26,11 @@
void av1_alloc_txb_buf(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
CoeffBufferPool *coeff_buf_pool = &cpi->coeff_buffer_pool;
- int size = ((cm->mi_params.mi_rows >> cm->seq_params.mib_size_log2) + 1) *
- ((cm->mi_params.mi_cols >> cm->seq_params.mib_size_log2) + 1);
+ int size = ((cm->mi_params.mi_rows >> cm->seq_params->mib_size_log2) + 1) *
+ ((cm->mi_params.mi_cols >> cm->seq_params->mib_size_log2) + 1);
const int num_planes = av1_num_planes(cm);
- const int subsampling_x = cm->seq_params.subsampling_x;
- const int subsampling_y = cm->seq_params.subsampling_y;
+ const int subsampling_x = cm->seq_params->subsampling_x;
+ const int subsampling_y = cm->seq_params->subsampling_y;
const int chroma_max_sb_square =
MAX_SB_SQUARE >> (subsampling_x + subsampling_y);
const int num_tcoeffs =
@@ -624,6 +624,7 @@ void av1_update_and_record_txb_context(int plane, int block, int blk_row,
const int coeff_ctx = coeff_contexts[pos];
const tran_low_t v = qcoeff[pos];
const tran_low_t level = abs(v);
+ td->abs_sum_level += level;
if (allow_update_cdf) {
if (c == eob - 1) {
@@ -719,7 +720,7 @@ void av1_update_intra_mb_txb_context(const AV1_COMP *cpi, ThreadData *td,
CB_COEFF_BUFFER *av1_get_cb_coeff_buffer(const struct AV1_COMP *cpi, int mi_row,
int mi_col) {
const AV1_COMMON *const cm = &cpi->common;
- const int mib_size_log2 = cm->seq_params.mib_size_log2;
+ const int mib_size_log2 = cm->seq_params->mib_size_log2;
const int stride = (cm->mi_params.mi_cols >> mib_size_log2) + 1;
const int offset =
(mi_row >> mib_size_log2) * stride + (mi_col >> mib_size_log2);
diff --git a/third_party/libaom/source/libaom/av1/encoder/ethread.c b/third_party/libaom/source/libaom/av1/encoder/ethread.c
index 3735ca3c8b..d274b6b84f 100644
--- a/third_party/libaom/source/libaom/av1/encoder/ethread.c
+++ b/third_party/libaom/source/libaom/av1/encoder/ethread.c
@@ -11,9 +11,11 @@
#include "av1/common/warped_motion.h"
+#include "av1/encoder/bitstream.h"
#include "av1/encoder/encodeframe.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/encoder_alloc.h"
+#include "av1/encoder/encodeframe_utils.h"
#include "av1/encoder/ethread.h"
#if !CONFIG_REALTIME_ONLY
#include "av1/encoder/firstpass.h"
@@ -52,7 +54,7 @@ static AOM_INLINE void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
static AOM_INLINE void update_delta_lf_for_row_mt(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
- const int mib_size = cm->seq_params.mib_size;
+ const int mib_size = cm->seq_params->mib_size;
const int frame_lf_count =
av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
for (int row = 0; row < cm->tiles.rows; row++) {
@@ -68,7 +70,8 @@ static AOM_INLINE void update_delta_lf_for_row_mt(AV1_COMP *cpi) {
const int idx_str = cm->mi_params.mi_stride * mi_row + mi_col;
MB_MODE_INFO **mi = cm->mi_params.mi_grid_base + idx_str;
MB_MODE_INFO *mbmi = mi[0];
- if (mbmi->skip_txfm == 1 && (mbmi->bsize == cm->seq_params.sb_size)) {
+ if (mbmi->skip_txfm == 1 &&
+ (mbmi->bsize == cm->seq_params->sb_size)) {
for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id)
mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id];
mbmi->delta_lf_from_base = xd->delta_lf_from_base;
@@ -362,7 +365,7 @@ static AOM_INLINE void switch_tile_and_get_next_job(
*cur_tile_id = tile_id;
const int unit_height = mi_size_high[fp_block_size];
get_next_job(&tile_data[tile_id], current_mi_row,
- is_firstpass ? unit_height : cm->seq_params.mib_size);
+ is_firstpass ? unit_height : cm->seq_params->mib_size);
}
}
@@ -441,13 +444,20 @@ static int enc_row_mt_worker_hook(void *arg1, void *unused) {
const BLOCK_SIZE fp_block_size = cpi->fp_block_size;
int end_of_frame = 0;
+
+ // When master thread does not have a valid job to process, xd->tile_ctx
+ // is not set and it contains NULL pointer. This can result in NULL pointer
+ // access violation if accessed beyond the encode stage. Hence, updating
+ // thread_data->td->mb.e_mbd.tile_ctx is initialized with common frame
+ // context to avoid NULL pointer access in subsequent stages.
+ thread_data->td->mb.e_mbd.tile_ctx = cm->fc;
while (1) {
int current_mi_row = -1;
#if CONFIG_MULTITHREAD
pthread_mutex_lock(enc_row_mt_mutex_);
#endif
if (!get_next_job(&cpi->tile_data[cur_tile_id], &current_mi_row,
- cm->seq_params.mib_size)) {
+ cm->seq_params->mib_size)) {
// No jobs are available for the current tile. Query for the status of
// other tiles and get the next job if available
switch_tile_and_get_next_job(cm, cpi->tile_data, &cur_tile_id,
@@ -470,6 +480,7 @@ static int enc_row_mt_worker_hook(void *arg1, void *unused) {
td->mb.e_mbd.tile_ctx = td->tctx;
td->mb.tile_pb_ctx = &this_tile->tctx;
+ td->abs_sum_level = 0;
if (this_tile->allow_update_cdf) {
td->mb.row_ctx = this_tile->row_ctx;
@@ -482,7 +493,7 @@ static int enc_row_mt_worker_hook(void *arg1, void *unused) {
av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
&td->mb.e_mbd);
- cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params);
+ cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
if (td->mb.txfm_search_info.txb_rd_records != NULL) {
av1_crc32c_calculator_init(
&td->mb.txfm_search_info.txb_rd_records->mb_rd_record.crc_calculator);
@@ -492,6 +503,7 @@ static int enc_row_mt_worker_hook(void *arg1, void *unused) {
#if CONFIG_MULTITHREAD
pthread_mutex_lock(enc_row_mt_mutex_);
#endif
+ this_tile->abs_sum_level += td->abs_sum_level;
row_mt_sync->num_threads_working--;
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(enc_row_mt_mutex_);
@@ -526,16 +538,12 @@ static int enc_worker_hook(void *arg1, void *unused) {
return 1;
}
-void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers) {
+#if CONFIG_MULTITHREAD
+void av1_init_mt_sync(AV1_COMP *cpi, int is_first_pass) {
AV1_COMMON *const cm = &cpi->common;
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
MultiThreadInfo *const mt_info = &cpi->mt_info;
- assert(mt_info->workers != NULL);
- assert(mt_info->tile_thr_data != NULL);
-
-#if CONFIG_MULTITHREAD
- if (cpi->oxcf.row_mt == 1) {
+ if (is_first_pass || cpi->oxcf.row_mt == 1) {
AV1EncRowMultiThreadInfo *enc_row_mt = &mt_info->enc_row_mt;
if (enc_row_mt->mutex_ == NULL) {
CHECK_MEM_ERROR(cm, enc_row_mt->mutex_,
@@ -543,24 +551,39 @@ void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers) {
if (enc_row_mt->mutex_) pthread_mutex_init(enc_row_mt->mutex_, NULL);
}
}
- AV1GlobalMotionSync *gm_sync = &mt_info->gm_sync;
- if (gm_sync->mutex_ == NULL) {
- CHECK_MEM_ERROR(cm, gm_sync->mutex_,
- aom_malloc(sizeof(*(gm_sync->mutex_))));
- if (gm_sync->mutex_) pthread_mutex_init(gm_sync->mutex_, NULL);
- }
- AV1TemporalFilterSync *tf_sync = &mt_info->tf_sync;
- if (tf_sync->mutex_ == NULL) {
- CHECK_MEM_ERROR(cm, tf_sync->mutex_, aom_malloc(sizeof(*tf_sync->mutex_)));
- if (tf_sync->mutex_) pthread_mutex_init(tf_sync->mutex_, NULL);
- }
- AV1CdefSync *cdef_sync = &mt_info->cdef_sync;
- if (cdef_sync->mutex_ == NULL) {
- CHECK_MEM_ERROR(cm, cdef_sync->mutex_,
- aom_malloc(sizeof(*(cdef_sync->mutex_))));
- if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
+
+ if (!is_first_pass) {
+ AV1GlobalMotionSync *gm_sync = &mt_info->gm_sync;
+ if (gm_sync->mutex_ == NULL) {
+ CHECK_MEM_ERROR(cm, gm_sync->mutex_,
+ aom_malloc(sizeof(*(gm_sync->mutex_))));
+ if (gm_sync->mutex_) pthread_mutex_init(gm_sync->mutex_, NULL);
+ }
+#if !CONFIG_REALTIME_ONLY
+ AV1TemporalFilterSync *tf_sync = &mt_info->tf_sync;
+ if (tf_sync->mutex_ == NULL) {
+ CHECK_MEM_ERROR(cm, tf_sync->mutex_,
+ aom_malloc(sizeof(*tf_sync->mutex_)));
+ if (tf_sync->mutex_) pthread_mutex_init(tf_sync->mutex_, NULL);
+ }
+#endif // !CONFIG_REALTIME_ONLY
+ AV1CdefSync *cdef_sync = &mt_info->cdef_sync;
+ if (cdef_sync->mutex_ == NULL) {
+ CHECK_MEM_ERROR(cm, cdef_sync->mutex_,
+ aom_malloc(sizeof(*(cdef_sync->mutex_))));
+ if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
+ }
}
-#endif
+}
+#endif // CONFIG_MULTITHREAD
+
+void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers) {
+ AV1_COMMON *const cm = &cpi->common;
+ const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+ MultiThreadInfo *const mt_info = &cpi->mt_info;
+
+ assert(mt_info->workers != NULL);
+ assert(mt_info->tile_thr_data != NULL);
for (int i = num_workers - 1; i >= 0; i--) {
AVxWorker *const worker = &mt_info->workers[i];
@@ -576,7 +599,7 @@ void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers) {
// Create threads
if (!winterface->reset(worker))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Tile encoder thread creation failed");
} else {
// Main thread acts as a worker and uses the thread data in cpi.
@@ -625,10 +648,6 @@ static AOM_INLINE void create_enc_workers(AV1_COMP *cpi, int num_workers) {
alloc_compound_type_rd_buffers(cm, &thread_data->td->comp_rd_buffer);
- CHECK_MEM_ERROR(
- cm, thread_data->td->tmp_conv_dst,
- aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
- sizeof(*thread_data->td->tmp_conv_dst)));
for (int j = 0; j < 2; ++j) {
CHECK_MEM_ERROR(
cm, thread_data->td->tmp_pred_bufs[j],
@@ -636,9 +655,14 @@ static AOM_INLINE void create_enc_workers(AV1_COMP *cpi, int num_workers) {
sizeof(*thread_data->td->tmp_pred_bufs[j])));
}
+ const int plane_types = PLANE_TYPES >> cm->seq_params->monochrome;
+ CHECK_MEM_ERROR(cm, thread_data->td->pixel_gradient_info,
+ aom_malloc(sizeof(*thread_data->td->pixel_gradient_info) *
+ plane_types * MAX_SB_SQUARE));
+
if (cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION) {
const int num_64x64_blocks =
- (cm->seq_params.sb_size == BLOCK_64X64) ? 1 : 4;
+ (cm->seq_params->sb_size == BLOCK_64X64) ? 1 : 4;
CHECK_MEM_ERROR(
cm, thread_data->td->vt64x64,
aom_malloc(sizeof(*thread_data->td->vt64x64) * num_64x64_blocks));
@@ -680,6 +704,10 @@ void av1_create_workers(AV1_COMP *cpi, int num_workers) {
// Set up shared coeff buffers.
av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->tmp_conv_dst,
+ aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
+ sizeof(*thread_data->td->tmp_conv_dst)));
}
++mt_info->num_workers;
}
@@ -724,7 +752,7 @@ static AOM_INLINE void fp_create_enc_workers(AV1_COMP *cpi, int num_workers) {
if (create_workers) {
// Create threads
if (!winterface->reset(worker))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Tile encoder thread creation failed");
}
} else {
@@ -764,7 +792,7 @@ static AOM_INLINE void sync_enc_workers(MultiThreadInfo *const mt_info,
}
if (had_error)
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Failed to encode tile data");
}
@@ -780,14 +808,15 @@ static AOM_INLINE void accumulate_counters_enc_workers(AV1_COMP *cpi,
!frame_is_intra_only(&cpi->common))
av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh,
&thread_data->td->mb);
- if (thread_data->td->mb.txfm_search_info.txb_rd_records) {
- aom_free(thread_data->td->mb.txfm_search_info.txb_rd_records);
- thread_data->td->mb.txfm_search_info.txb_rd_records = NULL;
- }
- if (thread_data->td != &cpi->td &&
- cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
- aom_free(thread_data->td->mb.mv_costs);
+ if (thread_data->td != &cpi->td) {
+ if (cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
+ aom_free(thread_data->td->mb.mv_costs);
+ }
+ if (cpi->oxcf.cost_upd_freq.dv < COST_UPD_OFF) {
+ aom_free(thread_data->td->mb.dv_costs);
+ }
}
+ av1_dealloc_mb_data(&cpi->common, &thread_data->td->mb);
// Accumulate counters.
if (i > 0) {
@@ -822,6 +851,7 @@ static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
thread_data->td->intrabc_used = 0;
thread_data->td->deltaq_used = 0;
+ thread_data->td->abs_sum_level = 0;
// Before encoding a frame, copy the thread data from cpi.
if (thread_data->td != &cpi->td) {
@@ -846,15 +876,19 @@ static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
memcpy(thread_data->td->mb.mv_costs, cpi->td.mb.mv_costs,
sizeof(MvCosts));
}
+ if (cpi->oxcf.cost_upd_freq.dv < COST_UPD_OFF) {
+ CHECK_MEM_ERROR(cm, thread_data->td->mb.dv_costs,
+ (IntraBCMVCosts *)aom_malloc(sizeof(IntraBCMVCosts)));
+ memcpy(thread_data->td->mb.dv_costs, cpi->td.mb.dv_costs,
+ sizeof(IntraBCMVCosts));
+ }
}
+ av1_alloc_mb_data(cm, &thread_data->td->mb,
+ cpi->sf.rt_sf.use_nonrd_pick_mode);
+
// Reset cyclic refresh counters.
av1_init_cyclic_refresh_counters(&thread_data->td->mb);
- if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
- CHECK_MEM_ERROR(cm, thread_data->td->mb.txfm_search_info.txb_rd_records,
- (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords)));
- }
-
if (thread_data->td->counts != &cpi->counts) {
memcpy(thread_data->td->counts, &cpi->counts, sizeof(cpi->counts));
}
@@ -867,6 +901,8 @@ static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
thread_data->td->mb.tmp_pred_bufs[j] =
thread_data->td->tmp_pred_bufs[j];
}
+ thread_data->td->mb.pixel_gradient_info =
+ thread_data->td->pixel_gradient_info;
thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst;
for (int j = 0; j < 2; ++j) {
@@ -904,11 +940,16 @@ static AOM_INLINE void fp_prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
memcpy(thread_data->td->mb.mv_costs, cpi->td.mb.mv_costs,
sizeof(MvCosts));
}
+ if (cpi->oxcf.cost_upd_freq.dv < COST_UPD_OFF) {
+ CHECK_MEM_ERROR(cm, thread_data->td->mb.dv_costs,
+ (IntraBCMVCosts *)aom_malloc(sizeof(IntraBCMVCosts)));
+ memcpy(thread_data->td->mb.dv_costs, cpi->td.mb.dv_costs,
+ sizeof(IntraBCMVCosts));
+ }
}
- if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
- CHECK_MEM_ERROR(cm, thread_data->td->mb.txfm_search_info.txb_rd_records,
- (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords)));
- }
+
+ av1_alloc_mb_data(cm, &thread_data->td->mb,
+ cpi->sf.rt_sf.use_nonrd_pick_mode);
}
}
#endif
@@ -1191,13 +1232,15 @@ void av1_fp_encode_tiles_row_mt(AV1_COMP *cpi) {
sync_enc_workers(&cpi->mt_info, cm, num_workers);
for (int i = num_workers - 1; i >= 0; i--) {
EncWorkerData *const thread_data = &cpi->mt_info.tile_thr_data[i];
- if (thread_data->td != &cpi->td &&
- cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
- aom_free(thread_data->td->mb.mv_costs);
- }
- if (thread_data->td->mb.txfm_search_info.txb_rd_records) {
- aom_free(thread_data->td->mb.txfm_search_info.txb_rd_records);
+ if (thread_data->td != &cpi->td) {
+ if (cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
+ aom_free(thread_data->td->mb.mv_costs);
+ }
+ if (cpi->oxcf.cost_upd_freq.dv < COST_UPD_OFF) {
+ aom_free(thread_data->td->mb.dv_costs);
+ }
}
+ av1_dealloc_mb_data(cm, &thread_data->td->mb);
}
}
@@ -1277,11 +1320,15 @@ static int tpl_worker_hook(void *arg1, void *unused) {
AV1_COMMON *cm = &cpi->common;
MACROBLOCK *x = &thread_data->td->mb;
MACROBLOCKD *xd = &x->e_mbd;
+ TplTxfmStats *tpl_txfm_stats = &thread_data->td->tpl_txfm_stats;
CommonModeInfoParams *mi_params = &cm->mi_params;
- BLOCK_SIZE bsize = convert_length_to_bsize(cpi->tpl_data.tpl_bsize_1d);
+ BLOCK_SIZE bsize = convert_length_to_bsize(cpi->ppi->tpl_data.tpl_bsize_1d);
TX_SIZE tx_size = max_txsize_lookup[bsize];
int mi_height = mi_size_high[bsize];
- int num_active_workers = cpi->tpl_data.tpl_mt_sync.num_threads_working;
+ int num_active_workers = cpi->ppi->tpl_data.tpl_mt_sync.num_threads_working;
+
+ av1_init_tpl_txfm_stats(tpl_txfm_stats);
+
for (int mi_row = thread_data->start * mi_height; mi_row < mi_params->mi_rows;
mi_row += num_active_workers * mi_height) {
// Motion estimation row boundary
@@ -1290,7 +1337,7 @@ static int tpl_worker_hook(void *arg1, void *unused) {
xd->mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE);
xd->mb_to_bottom_edge =
GET_MV_SUBPEL((mi_params->mi_rows - mi_height - mi_row) * MI_SIZE);
- av1_mc_flow_dispenser_row(cpi, x, mi_row, bsize, tx_size);
+ av1_mc_flow_dispenser_row(cpi, tpl_txfm_stats, x, mi_row, bsize, tx_size);
}
return 1;
}
@@ -1370,6 +1417,24 @@ static AOM_INLINE void prepare_tpl_workers(AV1_COMP *cpi, AVxWorkerHook hook,
// OBMC buffers are used only to init MS params and remain unused when
// called from tpl, hence set the buffers to defaults.
av1_init_obmc_buffer(&thread_data->td->mb.obmc_buffer);
+ thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst;
+ thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst;
+ }
+ }
+}
+
+// Accumulate transform stats after tpl.
+static void tpl_accumulate_txfm_stats(ThreadData *main_td,
+ const MultiThreadInfo *mt_info,
+ int num_workers) {
+ TplTxfmStats *accumulated_stats = &main_td->tpl_txfm_stats;
+ for (int i = num_workers - 1; i >= 0; i--) {
+ AVxWorker *const worker = &mt_info->workers[i];
+ EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
+ ThreadData *td = thread_data->td;
+ if (td != main_td) {
+ const TplTxfmStats *tpl_txfm_stats = &td->tpl_txfm_stats;
+ av1_accumulate_tpl_txfm_stats(tpl_txfm_stats, accumulated_stats);
}
}
}
@@ -1379,7 +1444,7 @@ void av1_mc_flow_dispenser_mt(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
CommonModeInfoParams *mi_params = &cm->mi_params;
MultiThreadInfo *mt_info = &cpi->mt_info;
- TplParams *tpl_data = &cpi->tpl_data;
+ TplParams *tpl_data = &cpi->ppi->tpl_data;
AV1TplRowMultiThreadSync *tpl_sync = &tpl_data->tpl_mt_sync;
int mb_rows = mi_params->mb_rows;
int num_workers =
@@ -1398,6 +1463,7 @@ void av1_mc_flow_dispenser_mt(AV1_COMP *cpi) {
prepare_tpl_workers(cpi, tpl_worker_hook, num_workers);
launch_workers(&cpi->mt_info, num_workers);
sync_enc_workers(&cpi->mt_info, cm, num_workers);
+ tpl_accumulate_txfm_stats(&cpi->td, &cpi->mt_info, num_workers);
}
// Deallocate memory for temporal filter multi-thread synchronization.
@@ -1752,6 +1818,331 @@ void av1_global_motion_estimation_mt(AV1_COMP *cpi) {
}
#endif // !CONFIG_REALTIME_ONLY
+// Compare and order tiles based on absolute sum of tx coeffs.
+static int compare_tile_order(const void *a, const void *b) {
+ const PackBSTileOrder *const tile_a = (const PackBSTileOrder *)a;
+ const PackBSTileOrder *const tile_b = (const PackBSTileOrder *)b;
+
+ if (tile_a->abs_sum_level > tile_b->abs_sum_level)
+ return -1;
+ else if (tile_a->abs_sum_level == tile_b->abs_sum_level)
+ return (tile_a->tile_idx > tile_b->tile_idx ? 1 : -1);
+ else
+ return 1;
+}
+
+// Get next tile index to be processed for pack bitstream
+static AOM_INLINE int get_next_pack_bs_tile_idx(
+ AV1EncPackBSSync *const pack_bs_sync, const int num_tiles) {
+ assert(pack_bs_sync->next_job_idx <= num_tiles);
+ if (pack_bs_sync->next_job_idx == num_tiles) return -1;
+
+ return pack_bs_sync->pack_bs_tile_order[pack_bs_sync->next_job_idx++]
+ .tile_idx;
+}
+
+// Calculates bitstream chunk size based on total buffer size and tile or tile
+// group size.
+static AOM_INLINE size_t get_bs_chunk_size(int tg_or_tile_size,
+ const int frame_or_tg_size,
+ size_t *remain_buf_size,
+ size_t max_buf_size,
+ int is_last_chunk) {
+ size_t this_chunk_size;
+ assert(*remain_buf_size > 0);
+ if (is_last_chunk) {
+ this_chunk_size = *remain_buf_size;
+ *remain_buf_size = 0;
+ } else {
+ const uint64_t size_scale = (uint64_t)max_buf_size * tg_or_tile_size;
+ this_chunk_size = (size_t)(size_scale / frame_or_tg_size);
+ *remain_buf_size -= this_chunk_size;
+ assert(*remain_buf_size > 0);
+ }
+ assert(this_chunk_size > 0);
+ return this_chunk_size;
+}
+
+// Initializes params required for pack bitstream tile.
+static void init_tile_pack_bs_params(AV1_COMP *const cpi, uint8_t *const dst,
+ struct aom_write_bit_buffer *saved_wb,
+ PackBSParams *const pack_bs_params_arr,
+ uint8_t obu_extn_header) {
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ AV1_COMMON *const cm = &cpi->common;
+ const CommonTileParams *const tiles = &cm->tiles;
+ const int num_tiles = tiles->cols * tiles->rows;
+ // Fixed size tile groups for the moment
+ const int num_tg_hdrs = cpi->num_tg;
+ // Tile group size in terms of number of tiles.
+ const int tg_size_in_tiles = (num_tiles + num_tg_hdrs - 1) / num_tg_hdrs;
+ uint8_t *tile_dst = dst;
+ uint8_t *tile_data_curr = dst;
+ // Max tile group count can not be more than MAX_TILES.
+ int tg_size_mi[MAX_TILES] = { 0 }; // Size of tile group in mi units
+ int tile_idx;
+ int tg_idx = 0;
+ int tile_count_in_tg = 0;
+ int new_tg = 1;
+
+ // Populate pack bitstream params of all tiles.
+ for (tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
+ const TileInfo *const tile_info = &cpi->tile_data[tile_idx].tile_info;
+ PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx];
+ // Calculate tile size in mi units.
+ const int tile_size_mi = (tile_info->mi_col_end - tile_info->mi_col_start) *
+ (tile_info->mi_row_end - tile_info->mi_row_start);
+ int is_last_tile_in_tg = 0;
+ tile_count_in_tg++;
+ if (tile_count_in_tg == tg_size_in_tiles || tile_idx == (num_tiles - 1))
+ is_last_tile_in_tg = 1;
+
+ // Populate pack bitstream params of this tile.
+ pack_bs_params->curr_tg_hdr_size = 0;
+ pack_bs_params->obu_extn_header = obu_extn_header;
+ pack_bs_params->saved_wb = saved_wb;
+ pack_bs_params->obu_header_size = 0;
+ pack_bs_params->is_last_tile_in_tg = is_last_tile_in_tg;
+ pack_bs_params->new_tg = new_tg;
+ pack_bs_params->tile_col = tile_info->tile_col;
+ pack_bs_params->tile_row = tile_info->tile_row;
+ pack_bs_params->tile_size_mi = tile_size_mi;
+ tg_size_mi[tg_idx] += tile_size_mi;
+
+ if (new_tg) new_tg = 0;
+ if (is_last_tile_in_tg) {
+ tile_count_in_tg = 0;
+ new_tg = 1;
+ tg_idx++;
+ }
+ }
+
+ assert(cpi->available_bs_size > 0);
+ size_t tg_buf_size[MAX_TILES] = { 0 };
+ size_t max_buf_size = cpi->available_bs_size;
+ size_t remain_buf_size = max_buf_size;
+ const int frame_size_mi = cm->mi_params.mi_rows * cm->mi_params.mi_cols;
+
+ tile_idx = 0;
+ // Prepare obu, tile group and frame header of each tile group.
+ for (tg_idx = 0; tg_idx < cpi->num_tg; tg_idx++) {
+ PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx];
+ int is_last_tg = tg_idx == cpi->num_tg - 1;
+ // Prorate bitstream buffer size based on tile group size and available
+ // buffer size. This buffer will be used to store headers and tile data.
+ tg_buf_size[tg_idx] =
+ get_bs_chunk_size(tg_size_mi[tg_idx], frame_size_mi, &remain_buf_size,
+ max_buf_size, is_last_tg);
+
+ pack_bs_params->dst = tile_dst;
+ pack_bs_params->tile_data_curr = tile_dst;
+
+ // Write obu, tile group and frame header at first tile in the tile
+ // group.
+ av1_write_obu_tg_tile_headers(cpi, xd, pack_bs_params, tile_idx);
+ tile_dst += tg_buf_size[tg_idx];
+
+ // Exclude headers from tile group buffer size.
+ tg_buf_size[tg_idx] -= pack_bs_params->curr_tg_hdr_size;
+ tile_idx += tg_size_in_tiles;
+ }
+
+ tg_idx = 0;
+ // Calculate bitstream buffer size of each tile in the tile group.
+ for (tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
+ PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx];
+
+ if (pack_bs_params->new_tg) {
+ max_buf_size = tg_buf_size[tg_idx];
+ remain_buf_size = max_buf_size;
+ }
+
+ // Prorate bitstream buffer size of this tile based on tile size and
+ // available buffer size. For this proration, header size is not accounted.
+ const size_t tile_buf_size = get_bs_chunk_size(
+ pack_bs_params->tile_size_mi, tg_size_mi[tg_idx], &remain_buf_size,
+ max_buf_size, pack_bs_params->is_last_tile_in_tg);
+ pack_bs_params->tile_buf_size = tile_buf_size;
+
+ // Update base address of bitstream buffer for tile and tile group.
+ if (pack_bs_params->new_tg) {
+ tile_dst = pack_bs_params->dst;
+ tile_data_curr = pack_bs_params->tile_data_curr;
+ // Account header size in first tile of a tile group.
+ pack_bs_params->tile_buf_size += pack_bs_params->curr_tg_hdr_size;
+ } else {
+ pack_bs_params->dst = tile_dst;
+ pack_bs_params->tile_data_curr = tile_data_curr;
+ }
+
+ if (pack_bs_params->is_last_tile_in_tg) tg_idx++;
+ tile_dst += pack_bs_params->tile_buf_size;
+ }
+}
+
+// Worker hook function of pack bitsteam multithreading.
+static int pack_bs_worker_hook(void *arg1, void *arg2) {
+ EncWorkerData *const thread_data = (EncWorkerData *)arg1;
+ PackBSParams *const pack_bs_params = (PackBSParams *)arg2;
+ AV1_COMP *const cpi = thread_data->cpi;
+ AV1_COMMON *const cm = &cpi->common;
+ AV1EncPackBSSync *const pack_bs_sync = &cpi->mt_info.pack_bs_sync;
+ const CommonTileParams *const tiles = &cm->tiles;
+ const int num_tiles = tiles->cols * tiles->rows;
+
+ while (1) {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(pack_bs_sync->mutex_);
+#endif
+ const int tile_idx = get_next_pack_bs_tile_idx(pack_bs_sync, num_tiles);
+#if CONFIG_MULTITHREAD
+ pthread_mutex_unlock(pack_bs_sync->mutex_);
+#endif
+ if (tile_idx == -1) break;
+ TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
+ thread_data->td->mb.e_mbd.tile_ctx = &this_tile->tctx;
+
+ av1_pack_tile_info(cpi, thread_data->td, &pack_bs_params[tile_idx]);
+ }
+
+ return 1;
+}
+
+// Prepares thread data and workers of pack bitsteam multithreading.
+static void prepare_pack_bs_workers(AV1_COMP *const cpi,
+ PackBSParams *const pack_bs_params,
+ AVxWorkerHook hook, const int num_workers) {
+ MultiThreadInfo *const mt_info = &cpi->mt_info;
+ for (int i = num_workers - 1; i >= 0; i--) {
+ AVxWorker *worker = &mt_info->workers[i];
+ EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
+ if (i == 0) thread_data->td = &cpi->td;
+
+ if (thread_data->td != &cpi->td) thread_data->td->mb = cpi->td.mb;
+
+ thread_data->cpi = cpi;
+ thread_data->start = i;
+ thread_data->thread_id = i;
+ av1_reset_pack_bs_thread_data(thread_data->td);
+
+ worker->hook = hook;
+ worker->data1 = thread_data;
+ worker->data2 = pack_bs_params;
+ }
+
+ AV1_COMMON *const cm = &cpi->common;
+ AV1EncPackBSSync *const pack_bs_sync = &mt_info->pack_bs_sync;
+ const uint16_t num_tiles = cm->tiles.rows * cm->tiles.cols;
+#if CONFIG_MULTITHREAD
+ if (pack_bs_sync->mutex_ == NULL) {
+ CHECK_MEM_ERROR(cm, pack_bs_sync->mutex_,
+ aom_malloc(sizeof(*pack_bs_sync->mutex_)));
+ if (pack_bs_sync->mutex_) pthread_mutex_init(pack_bs_sync->mutex_, NULL);
+ }
+#endif
+ pack_bs_sync->next_job_idx = 0;
+
+ PackBSTileOrder *const pack_bs_tile_order = pack_bs_sync->pack_bs_tile_order;
+ // Reset tile order data of pack bitstream
+ av1_zero_array(pack_bs_tile_order, num_tiles);
+
+ // Populate pack bitstream tile order structure
+ for (uint16_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
+ pack_bs_tile_order[tile_idx].abs_sum_level =
+ cpi->tile_data[tile_idx].abs_sum_level;
+ pack_bs_tile_order[tile_idx].tile_idx = tile_idx;
+ }
+
+ // Sort tiles in descending order based on tile area.
+ qsort(pack_bs_tile_order, num_tiles, sizeof(*pack_bs_tile_order),
+ compare_tile_order);
+}
+
+// Accumulates data after pack bitsteam processing.
+static void accumulate_pack_bs_data(
+ AV1_COMP *const cpi, const PackBSParams *const pack_bs_params_arr,
+ uint8_t *const dst, uint32_t *total_size, const FrameHeaderInfo *fh_info,
+ int *const largest_tile_id, unsigned int *max_tile_size,
+ uint32_t *const obu_header_size, uint8_t **tile_data_start,
+ const int num_workers) {
+ const AV1_COMMON *const cm = &cpi->common;
+ const CommonTileParams *const tiles = &cm->tiles;
+ const int tile_count = tiles->cols * tiles->rows;
+ // Fixed size tile groups for the moment
+ size_t curr_tg_data_size = 0;
+ int is_first_tg = 1;
+ uint8_t *curr_tg_start = dst;
+ size_t src_offset = 0;
+ size_t dst_offset = 0;
+
+ for (int tile_idx = 0; tile_idx < tile_count; tile_idx++) {
+ // PackBSParams stores all parameters required to pack tile and header
+ // info.
+ const PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx];
+ uint32_t tile_size = 0;
+
+ if (pack_bs_params->new_tg) {
+ curr_tg_start = dst + *total_size;
+ curr_tg_data_size = pack_bs_params->curr_tg_hdr_size;
+ *tile_data_start += pack_bs_params->curr_tg_hdr_size;
+ *obu_header_size = pack_bs_params->obu_header_size;
+ }
+ curr_tg_data_size +=
+ pack_bs_params->buf.size + (pack_bs_params->is_last_tile_in_tg ? 0 : 4);
+
+ if (pack_bs_params->buf.size > *max_tile_size) {
+ *largest_tile_id = tile_idx;
+ *max_tile_size = (unsigned int)pack_bs_params->buf.size;
+ }
+ tile_size +=
+ (uint32_t)pack_bs_params->buf.size + *pack_bs_params->total_size;
+
+ // Pack all the chunks of tile bitstreams together
+ if (tile_idx != 0) memmove(dst + dst_offset, dst + src_offset, tile_size);
+
+ if (pack_bs_params->is_last_tile_in_tg)
+ av1_write_last_tile_info(
+ cpi, fh_info, pack_bs_params->saved_wb, &curr_tg_data_size,
+ curr_tg_start, &tile_size, tile_data_start, largest_tile_id,
+ &is_first_tg, *obu_header_size, pack_bs_params->obu_extn_header);
+ src_offset += pack_bs_params->tile_buf_size;
+ dst_offset += tile_size;
+ *total_size += tile_size;
+ }
+
+ // Accumulate thread data
+ MultiThreadInfo *const mt_info = &cpi->mt_info;
+ for (int idx = num_workers - 1; idx >= 0; idx--) {
+ ThreadData const *td = mt_info->tile_thr_data[idx].td;
+ av1_accumulate_pack_bs_thread_data(cpi, td);
+ }
+}
+
+void av1_write_tile_obu_mt(
+ AV1_COMP *const cpi, uint8_t *const dst, uint32_t *total_size,
+ struct aom_write_bit_buffer *saved_wb, uint8_t obu_extn_header,
+ const FrameHeaderInfo *fh_info, int *const largest_tile_id,
+ unsigned int *max_tile_size, uint32_t *const obu_header_size,
+ uint8_t **tile_data_start) {
+ MultiThreadInfo *const mt_info = &cpi->mt_info;
+ const int num_workers = mt_info->num_mod_workers[MOD_PACK_BS];
+
+ PackBSParams pack_bs_params[MAX_TILES];
+ uint32_t tile_size[MAX_TILES] = { 0 };
+
+ for (int tile_idx = 0; tile_idx < MAX_TILES; tile_idx++)
+ pack_bs_params[tile_idx].total_size = &tile_size[tile_idx];
+
+ init_tile_pack_bs_params(cpi, dst, saved_wb, pack_bs_params, obu_extn_header);
+ prepare_pack_bs_workers(cpi, pack_bs_params, pack_bs_worker_hook,
+ num_workers);
+ launch_workers(mt_info, num_workers);
+ sync_enc_workers(mt_info, &cpi->common, num_workers);
+ accumulate_pack_bs_data(cpi, pack_bs_params, dst, total_size, fh_info,
+ largest_tile_id, max_tile_size, obu_header_size,
+ tile_data_start, num_workers);
+}
+
// Deallocate memory for CDEF search multi-thread synchronization.
void av1_cdef_mt_dealloc(AV1CdefSync *cdef_sync) {
(void)cdef_sync;
@@ -1780,6 +2171,9 @@ static void update_next_job_info(AV1CdefSync *cdef_sync, int nvfb, int nhfb) {
// Initializes cdef_sync parameters.
static AOM_INLINE void cdef_reset_job_info(AV1CdefSync *cdef_sync) {
+#if CONFIG_MULTITHREAD
+ if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
+#endif // CONFIG_MULTITHREAD
cdef_sync->end_of_frame = 0;
cdef_sync->fbr = 0;
cdef_sync->fbc = 0;
@@ -1896,6 +2290,12 @@ static AOM_INLINE int compute_num_lr_workers(AV1_COMP *cpi) {
return compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
}
+// Computes num_workers for pack bitstream multi-threading.
+static AOM_INLINE int compute_num_pack_bs_workers(AV1_COMP *cpi) {
+ if (cpi->oxcf.max_threads <= 1) return 1;
+ return compute_num_enc_tile_mt_workers(&cpi->common, cpi->oxcf.max_threads);
+}
+
int compute_num_mod_workers(AV1_COMP *cpi, MULTI_THREADED_MODULES mod_name) {
int num_mod_workers = 0;
switch (mod_name) {
@@ -1915,7 +2315,9 @@ int compute_num_mod_workers(AV1_COMP *cpi, MULTI_THREADED_MODULES mod_name) {
case MOD_CDEF_SEARCH:
num_mod_workers = compute_num_cdef_workers(cpi);
break;
+ case MOD_CDEF: num_mod_workers = compute_num_cdef_workers(cpi); break;
case MOD_LR: num_mod_workers = compute_num_lr_workers(cpi); break;
+ case MOD_PACK_BS: num_mod_workers = compute_num_pack_bs_workers(cpi); break;
default: assert(0); break;
}
return (num_mod_workers);
diff --git a/third_party/libaom/source/libaom/av1/encoder/ethread.h b/third_party/libaom/source/libaom/av1/encoder/ethread.h
index 55e7f7be39..c2ab864690 100644
--- a/third_party/libaom/source/libaom/av1/encoder/ethread.h
+++ b/third_party/libaom/source/libaom/av1/encoder/ethread.h
@@ -80,6 +80,10 @@ int av1_get_max_num_workers(AV1_COMP *cpi);
void av1_create_workers(AV1_COMP *cpi, int num_workers);
+#if CONFIG_MULTITHREAD
+void av1_init_mt_sync(AV1_COMP *cpi, int is_first_pass);
+#endif // CONFIG_MULTITHREAD
+
void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers);
void av1_cdef_mse_calc_frame_mt(AV1_COMMON *cm, MultiThreadInfo *mt_info,
@@ -87,6 +91,13 @@ void av1_cdef_mse_calc_frame_mt(AV1_COMMON *cm, MultiThreadInfo *mt_info,
void av1_cdef_mt_dealloc(AV1CdefSync *cdef_sync);
+void av1_write_tile_obu_mt(
+ AV1_COMP *const cpi, uint8_t *const dst, uint32_t *total_size,
+ struct aom_write_bit_buffer *saved_wb, uint8_t obu_extn_header,
+ const FrameHeaderInfo *fh_info, int *const largest_tile_id,
+ unsigned int *max_tile_size, uint32_t *const obu_header_size,
+ uint8_t **tile_data_start);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/libaom/source/libaom/av1/encoder/external_partition.c b/third_party/libaom/source/libaom/av1/encoder/external_partition.c
new file mode 100644
index 0000000000..542b2bb878
--- /dev/null
+++ b/third_party/libaom/source/libaom/av1/encoder/external_partition.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#include "av1/common/common.h"
+#include "av1/encoder/external_partition.h"
+
+aom_codec_err_t av1_ext_part_create(aom_ext_part_funcs_t funcs,
+ aom_ext_part_config_t config,
+ ExtPartController *ext_part_controller) {
+ if (ext_part_controller == NULL) {
+ return AOM_CODEC_INVALID_PARAM;
+ }
+ ext_part_controller->funcs = funcs;
+ ext_part_controller->config = config;
+ const aom_ext_part_status_t status = ext_part_controller->funcs.create_model(
+ ext_part_controller->funcs.priv, &ext_part_controller->config,
+ &ext_part_controller->model);
+ if (status == AOM_EXT_PART_ERROR) {
+ return AOM_CODEC_ERROR;
+ } else if (status == AOM_EXT_PART_TEST) {
+ ext_part_controller->test_mode = 1;
+ ext_part_controller->ready = 0;
+ return AOM_CODEC_OK;
+ }
+ assert(status == AOM_EXT_PART_OK);
+ ext_part_controller->ready = 1;
+ return AOM_CODEC_OK;
+}
+
+aom_codec_err_t av1_ext_part_init(ExtPartController *ext_part_controller) {
+ if (ext_part_controller == NULL) {
+ return AOM_CODEC_INVALID_PARAM;
+ }
+ av1_zero(ext_part_controller);
+ return AOM_CODEC_OK;
+}
+
+aom_codec_err_t av1_ext_part_delete(ExtPartController *ext_part_controller) {
+ if (ext_part_controller == NULL) {
+ return AOM_CODEC_INVALID_PARAM;
+ }
+ if (ext_part_controller->ready) {
+ const aom_ext_part_status_t status =
+ ext_part_controller->funcs.delete_model(ext_part_controller->model);
+ if (status != AOM_EXT_PART_OK) {
+ return AOM_CODEC_ERROR;
+ }
+ }
+ return av1_ext_part_init(ext_part_controller);
+}
+
+bool av1_ext_part_get_partition_decision(ExtPartController *ext_part_controller,
+ aom_partition_decision_t *decision) {
+ assert(ext_part_controller != NULL);
+ assert(ext_part_controller->ready);
+ assert(decision != NULL);
+ const aom_ext_part_status_t status =
+ ext_part_controller->funcs.get_partition_decision(
+ ext_part_controller->model, decision);
+ if (status != AOM_EXT_PART_OK) return false;
+ return true;
+}
+
+bool av1_ext_part_send_partition_stats(ExtPartController *ext_part_controller,
+ const aom_partition_stats_t *stats) {
+ assert(ext_part_controller != NULL);
+ assert(ext_part_controller->ready);
+ assert(stats != NULL);
+ const aom_ext_part_status_t status =
+ ext_part_controller->funcs.send_partition_stats(
+ ext_part_controller->model, stats);
+ if (status != AOM_EXT_PART_OK) return false;
+ return true;
+}
+
+bool av1_ext_part_send_features(ExtPartController *ext_part_controller,
+ const aom_partition_features_t *features) {
+ assert(ext_part_controller != NULL);
+ assert(ext_part_controller->ready);
+ assert(features != NULL);
+ const aom_ext_part_status_t status = ext_part_controller->funcs.send_features(
+ ext_part_controller->model, features);
+ if (status != AOM_EXT_PART_OK) return false;
+ return true;
+}
diff --git a/third_party/libaom/source/libaom/av1/encoder/external_partition.h b/third_party/libaom/source/libaom/av1/encoder/external_partition.h
new file mode 100644
index 0000000000..20f03ed752
--- /dev/null
+++ b/third_party/libaom/source/libaom/av1/encoder/external_partition.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2021, Alliance for Open Media. All rights reserved
+ *
+ * This source code is subject to the terms of the BSD 2 Clause License and
+ * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
+ * was not distributed with this source code in the LICENSE file, you can
+ * obtain it at www.aomedia.org/license/software. If the Alliance for Open
+ * Media Patent License 1.0 was not distributed with this source code in the
+ * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
+ */
+
+#ifndef AOM_AV1_ENCODER_EXTERNAL_PARTITION_H_
+#define AOM_AV1_ENCODER_EXTERNAL_PARTITION_H_
+
+#include <stdbool.h>
+
+#include "aom/aom_codec.h"
+#include "aom/aom_external_partition.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*!\cond */
+
+typedef struct ExtPartController {
+ int ready;
+ int test_mode;
+ aom_ext_part_config_t config;
+ aom_ext_part_model_t model;
+ aom_ext_part_funcs_t funcs;
+} ExtPartController;
+
+aom_codec_err_t av1_ext_part_create(aom_ext_part_funcs_t funcs,
+ aom_ext_part_config_t config,
+ ExtPartController *ext_part_controller);
+
+aom_codec_err_t av1_ext_part_init(ExtPartController *ext_part_controller);
+
+aom_codec_err_t av1_ext_part_delete(ExtPartController *ext_part_controller);
+
+bool av1_ext_part_get_partition_decision(ExtPartController *ext_part_controller,
+ aom_partition_decision_t *decision);
+
+bool av1_ext_part_send_partition_stats(ExtPartController *ext_part_controller,
+ const aom_partition_stats_t *stats);
+
+bool av1_ext_part_send_features(ExtPartController *ext_part_controller,
+ const aom_partition_features_t *features);
+
+/*!\endcond */
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#endif // AOM_AV1_ENCODER_EXTERNAL_PARTITION_H_
diff --git a/third_party/libaom/source/libaom/av1/encoder/firstpass.c b/third_party/libaom/source/libaom/av1/encoder/firstpass.c
index ff6814d04c..662b42c822 100644
--- a/third_party/libaom/source/libaom/av1/encoder/firstpass.c
+++ b/third_party/libaom/source/libaom/av1/encoder/firstpass.c
@@ -27,6 +27,7 @@
#include "av1/common/entropymv.h"
#include "av1/common/quant_common.h"
#include "av1/common/reconinter.h" // av1_setup_dst_planes()
+#include "av1/common/reconintra.h"
#include "av1/common/txb_common.h"
#include "av1/encoder/aq_variance.h"
#include "av1/encoder/av1_quantize.h"
@@ -54,6 +55,8 @@
#define NCOUNT_INTRA_THRESH 8192
#define NCOUNT_INTRA_FACTOR 3
+#define INVALID_FP_STATS_TO_PREDICT_FLAT_GOP -1
+
static AOM_INLINE void output_stats(FIRSTPASS_STATS *stats,
struct aom_codec_pkt_list *pktlist) {
struct aom_codec_cx_pkt pkt;
@@ -108,6 +111,9 @@ void av1_twopass_zero_stats(FIRSTPASS_STATS *section) {
section->new_mv_count = 0.0;
section->count = 0.0;
section->duration = 1.0;
+ section->is_flash = 0;
+ section->noise_var = 0;
+ section->cor_coeff = 1.0;
}
void av1_accumulate_stats(FIRSTPASS_STATS *section,
@@ -118,9 +124,11 @@ void av1_accumulate_stats(FIRSTPASS_STATS *section,
section->frame_avg_wavelet_energy += frame->frame_avg_wavelet_energy;
section->coded_error += frame->coded_error;
section->sr_coded_error += frame->sr_coded_error;
+ section->tr_coded_error += frame->tr_coded_error;
section->pcnt_inter += frame->pcnt_inter;
section->pcnt_motion += frame->pcnt_motion;
section->pcnt_second_ref += frame->pcnt_second_ref;
+ section->pcnt_third_ref += frame->pcnt_third_ref;
section->pcnt_neutral += frame->pcnt_neutral;
section->intra_skip_pct += frame->intra_skip_pct;
section->inactive_zone_rows += frame->inactive_zone_rows;
@@ -177,8 +185,9 @@ static int get_num_mbs(const BLOCK_SIZE fp_block_size,
}
void av1_end_first_pass(AV1_COMP *cpi) {
- if (cpi->twopass.stats_buf_ctx->total_stats)
- output_stats(cpi->twopass.stats_buf_ctx->total_stats, cpi->output_pkt_list);
+ if (cpi->ppi->twopass.stats_buf_ctx->total_stats && !cpi->ppi->lap_enabled)
+ output_stats(cpi->ppi->twopass.stats_buf_ctx->total_stats,
+ cpi->ppi->output_pkt_list);
}
static aom_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
@@ -261,15 +270,12 @@ static AOM_INLINE void first_pass_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
const BLOCK_SIZE bsize = xd->mi[0]->bsize;
const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY;
const int sr = get_search_range(&cpi->initial_dimensions);
- const int step_param = 3 + sr;
+ const int step_param = cpi->sf.fp_sf.reduce_mv_step_param + sr;
const search_site_config *first_pass_search_sites =
cpi->mv_search_params.search_site_cfg[SS_CFG_FPF];
const int fine_search_interval =
cpi->is_screen_content_type && cpi->common.features.allow_intrabc;
- if (fine_search_interval) {
- av1_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed);
- }
FULLPEL_MOTION_SEARCH_PARAMS ms_params;
av1_make_default_fullpel_ms_params(&ms_params, cpi, x, bsize, ref_mv,
first_pass_search_sites,
@@ -281,7 +287,7 @@ static AOM_INLINE void first_pass_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
&this_best_mv, NULL);
if (tmp_err < INT_MAX) {
- aom_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
+ aom_variance_fn_ptr_t v_fn_ptr = cpi->ppi->fn_ptr[bsize];
const MSBuffers *ms_buffers = &ms_params.ms_buffers;
tmp_err = av1_get_mvpred_sse(&ms_params.mv_cost_params, this_best_mv,
&v_fn_ptr, ms_buffers->src, ms_buffers->ref) +
@@ -355,6 +361,86 @@ static double raw_motion_error_stdev(int *raw_motion_err_list,
return raw_err_stdev;
}
+static AOM_INLINE int do_third_ref_motion_search(const RateControlCfg *rc_cfg,
+ const GFConfig *gf_cfg) {
+ return use_ml_model_to_decide_flat_gop(rc_cfg) && can_disable_altref(gf_cfg);
+}
+
+static AOM_INLINE int calc_wavelet_energy(const AV1EncoderConfig *oxcf) {
+ return (use_ml_model_to_decide_flat_gop(&oxcf->rc_cfg) &&
+ can_disable_altref(&oxcf->gf_cfg)) ||
+ (oxcf->q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL);
+}
+typedef struct intra_pred_block_pass1_args {
+ const SequenceHeader *seq_params;
+ MACROBLOCK *x;
+} intra_pred_block_pass1_args;
+
+static INLINE void copy_rect(uint8_t *dst, int dstride, const uint8_t *src,
+ int sstride, int width, int height, int use_hbd) {
+#if CONFIG_AV1_HIGHBITDEPTH
+ if (use_hbd) {
+ aom_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src), sstride,
+ CONVERT_TO_SHORTPTR(dst), dstride, width, height);
+ } else {
+ aom_convolve_copy(src, sstride, dst, dstride, width, height);
+ }
+#else
+ (void)use_hbd;
+ aom_convolve_copy(src, sstride, dst, dstride, width, height);
+#endif
+}
+
+static void first_pass_intra_pred_and_calc_diff(int plane, int block,
+ int blk_row, int blk_col,
+ BLOCK_SIZE plane_bsize,
+ TX_SIZE tx_size, void *arg) {
+ (void)block;
+ struct intra_pred_block_pass1_args *const args = arg;
+ MACROBLOCK *const x = args->x;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ MACROBLOCKD_PLANE *const pd = &xd->plane[plane];
+ MACROBLOCK_PLANE *const p = &x->plane[plane];
+ const int dst_stride = pd->dst.stride;
+ uint8_t *dst = &pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
+ const MB_MODE_INFO *const mbmi = xd->mi[0];
+ const SequenceHeader *seq_params = args->seq_params;
+ const int src_stride = p->src.stride;
+ uint8_t *src = &p->src.buf[(blk_row * src_stride + blk_col) << MI_SIZE_LOG2];
+
+ av1_predict_intra_block(
+ xd, seq_params->sb_size, seq_params->enable_intra_edge_filter, pd->width,
+ pd->height, tx_size, mbmi->mode, 0, 0, FILTER_INTRA_MODES, src,
+ src_stride, dst, dst_stride, blk_col, blk_row, plane);
+
+ av1_subtract_txb(x, plane, plane_bsize, blk_col, blk_row, tx_size);
+}
+
+static void first_pass_predict_intra_block_for_luma_plane(
+ const SequenceHeader *seq_params, MACROBLOCK *x, BLOCK_SIZE bsize) {
+ assert(bsize < BLOCK_SIZES_ALL);
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const int plane = AOM_PLANE_Y;
+ const MACROBLOCKD_PLANE *const pd = &xd->plane[plane];
+ const int ss_x = pd->subsampling_x;
+ const int ss_y = pd->subsampling_y;
+ const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y);
+ const int dst_stride = pd->dst.stride;
+ uint8_t *dst = pd->dst.buf;
+ const MACROBLOCK_PLANE *const p = &x->plane[plane];
+ const int src_stride = p->src.stride;
+ const uint8_t *src = p->src.buf;
+
+ intra_pred_block_pass1_args args = { seq_params, x };
+ av1_foreach_transformed_block_in_plane(
+ xd, plane_bsize, plane, first_pass_intra_pred_and_calc_diff, &args);
+
+ // copy source data to recon buffer, as the recon buffer will be used as a
+ // reference frame subsequently.
+ copy_rect(dst, dst_stride, src, src_stride, block_size_wide[bsize],
+ block_size_high[bsize], seq_params->use_highbitdepth);
+}
+
#define UL_INTRA_THRESH 50
#define INVALID_ROW -1
// Computes and returns the intra pred error of a block.
@@ -388,11 +474,10 @@ static int firstpass_intra_prediction(
const int qindex, FRAME_STATS *const stats) {
const AV1_COMMON *const cm = &cpi->common;
const CommonModeInfoParams *const mi_params = &cm->mi_params;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *const xd = &x->e_mbd;
const int unit_scale = mi_size_wide[fp_block_size];
- const int use_dc_pred = (unit_col || unit_row) && (!unit_col || !unit_row);
const int num_planes = av1_num_planes(cm);
const BLOCK_SIZE bsize =
get_bsize(mi_params, fp_block_size, unit_row, unit_col);
@@ -412,9 +497,12 @@ static int firstpass_intra_prediction(
xd->mi[0]->segment_id = 0;
xd->lossless[xd->mi[0]->segment_id] = (qindex == 0);
xd->mi[0]->mode = DC_PRED;
- xd->mi[0]->tx_size = use_dc_pred ? max_txsize_lookup[bsize] : TX_4X4;
+ xd->mi[0]->tx_size = TX_4X4;
- av1_encode_intra_block_plane(cpi, x, bsize, 0, DRY_RUN_NORMAL, 0);
+ if (cpi->sf.fp_sf.disable_recon)
+ first_pass_predict_intra_block_for_luma_plane(seq_params, x, bsize);
+ else
+ av1_encode_intra_block_plane(cpi, x, bsize, 0, DRY_RUN_NORMAL, 0);
int this_intra_error = aom_get_mb_ss(x->plane[0].src_diff);
if (seq_params->use_highbitdepth) {
switch (seq_params->bit_depth) {
@@ -480,16 +568,22 @@ static int firstpass_intra_prediction(
// Accumulate the intra error.
stats->intra_error += (int64_t)this_intra_error;
- const int hbd = is_cur_buf_hbd(xd);
- const int stride = x->plane[0].src.stride;
- const int num_8x8_rows = block_size_high[fp_block_size] / 8;
- const int num_8x8_cols = block_size_wide[fp_block_size] / 8;
- const uint8_t *buf = x->plane[0].src.buf;
- for (int r8 = 0; r8 < num_8x8_rows; ++r8) {
- for (int c8 = 0; c8 < num_8x8_cols; ++c8) {
- stats->frame_avg_wavelet_energy += av1_haar_ac_sad_8x8_uint8_input(
- buf + c8 * 8 + r8 * 8 * stride, stride, hbd);
- }
+ // Stats based on wavelet energy is used in the following cases :
+ // 1. ML model which predicts if a flat structure (golden-frame only structure
+ // without ALT-REF and Internal-ARFs) is better. This ML model is enabled in
+ // constant quality mode under certain conditions.
+ // 2. Delta qindex mode is set as DELTA_Q_PERCEPTUAL.
+ // Thus, wavelet energy calculation is enabled for the above cases.
+ if (calc_wavelet_energy(&cpi->oxcf)) {
+ const int hbd = is_cur_buf_hbd(xd);
+ const int stride = x->plane[0].src.stride;
+ const int num_8x8_rows = block_size_high[fp_block_size] / 8;
+ const int num_8x8_cols = block_size_wide[fp_block_size] / 8;
+ const uint8_t *buf = x->plane[0].src.buf;
+ stats->frame_avg_wavelet_energy += av1_haar_ac_sad_mxn_uint8_input(
+ buf, stride, hbd, num_8x8_rows, num_8x8_cols);
+ } else {
+ stats->frame_avg_wavelet_energy = INVALID_FP_STATS_TO_PREDICT_FLAT_GOP;
}
return this_intra_error;
@@ -516,13 +610,13 @@ static int get_prediction_error_bitdepth(const int is_high_bitdepth,
static void accumulate_mv_stats(const MV best_mv, const FULLPEL_MV mv,
const int mb_row, const int mb_col,
const int mb_rows, const int mb_cols,
- MV *last_mv, FRAME_STATS *stats) {
+ MV *last_non_zero_mv, FRAME_STATS *stats) {
if (is_zero_mv(&best_mv)) return;
++stats->mv_count;
// Non-zero vector, was it different from the last non zero vector?
- if (!is_equal_mv(&best_mv, last_mv)) ++stats->new_mv_count;
- *last_mv = best_mv;
+ if (!is_equal_mv(&best_mv, last_non_zero_mv)) ++stats->new_mv_count;
+ *last_non_zero_mv = best_mv;
// Does the row vector point inwards or outwards?
if (mb_row < mb_rows / 2) {
@@ -555,7 +649,6 @@ static void accumulate_mv_stats(const MV best_mv, const FULLPEL_MV mv,
}
}
-#define LOW_MOTION_ERROR_THRESH 25
// Computes and returns the inter prediction error from the last frame.
// Computes inter prediction errors from the golden and alt ref frams and
// Updates stats accordingly.
@@ -576,8 +669,9 @@ static void accumulate_mv_stats(const MV best_mv, const FULLPEL_MV mv,
// this_intra_error: the intra prediction error of this block.
// raw_motion_err_counts: the count of raw motion vectors.
// raw_motion_err_list: the array that records the raw motion error.
-// best_ref_mv: best reference mv found so far.
-// last_mv: last mv.
+// ref_mv: the reference used to start the motion search
+// best_mv: the best mv found
+// last_non_zero_mv: the last non zero mv found in this tile row.
// stats: frame encoding stats.
// Modifies:
// raw_motion_err_list
@@ -593,8 +687,8 @@ static int firstpass_inter_prediction(
const int unit_col, const int recon_yoffset, const int recon_uvoffset,
const int src_yoffset, const int alt_ref_frame_yoffset,
const BLOCK_SIZE fp_block_size, const int this_intra_error,
- const int raw_motion_err_counts, int *raw_motion_err_list, MV *best_ref_mv,
- MV *last_mv, FRAME_STATS *stats) {
+ const int raw_motion_err_counts, int *raw_motion_err_list, const MV ref_mv,
+ MV *best_mv, MV *last_non_zero_mv, FRAME_STATS *stats) {
int this_inter_error = this_intra_error;
AV1_COMMON *const cm = &cpi->common;
const CommonModeInfoParams *const mi_params = &cm->mi_params;
@@ -612,7 +706,6 @@ static int firstpass_inter_prediction(
const int unit_cols = get_unit_cols(fp_block_size, mi_params->mb_cols);
// Assume 0,0 motion with no mv overhead.
FULLPEL_MV mv = kZeroFullMv;
- FULLPEL_MV tmp_mv = kZeroFullMv;
xd->plane[0].pre[0].buf = last_frame->y_buffer + recon_yoffset;
// Set up limit values for motion vectors to prevent them extending
// outside the UMV borders.
@@ -636,15 +729,15 @@ static int firstpass_inter_prediction(
&unscaled_last_source_buf_2d);
raw_motion_err_list[raw_motion_err_counts] = raw_motion_error;
- // TODO(pengchong): Replace the hard-coded threshold
- if (raw_motion_error > LOW_MOTION_ERROR_THRESH || cpi->oxcf.speed <= 2) {
+ if (raw_motion_error > cpi->sf.fp_sf.skip_motion_search_threshold) {
// Test last reference frame using the previous best mv as the
// starting point (best reference) for the search.
- first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error);
+ first_pass_motion_search(cpi, x, &ref_mv, &mv, &motion_error);
// If the current best reference mv is not centered on 0,0 then do a
// 0,0 based search as well.
- if (!is_zero_mv(best_ref_mv)) {
+ if (!is_zero_mv(&ref_mv)) {
+ FULLPEL_MV tmp_mv = kZeroFullMv;
int tmp_err = INT_MAX;
first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv, &tmp_err);
@@ -657,6 +750,7 @@ static int firstpass_inter_prediction(
// Motion search in 2nd reference frame.
int gf_motion_error = motion_error;
if ((current_frame->frame_number > 1) && golden_frame != NULL) {
+ FULLPEL_MV tmp_mv = kZeroFullMv;
// Assume 0,0 motion with no mv overhead.
xd->plane[0].pre[0].buf = golden_frame->y_buffer + recon_yoffset;
xd->plane[0].pre[0].stride = golden_frame->y_stride;
@@ -682,13 +776,22 @@ static int firstpass_inter_prediction(
// Motion search in 3rd reference frame.
int alt_motion_error = motion_error;
- if (alt_ref_frame != NULL) {
- xd->plane[0].pre[0].buf = alt_ref_frame->y_buffer + alt_ref_frame_yoffset;
- xd->plane[0].pre[0].stride = alt_ref_frame->y_stride;
- alt_motion_error =
- get_prediction_error_bitdepth(is_high_bitdepth, bitdepth, bsize,
- &x->plane[0].src, &xd->plane[0].pre[0]);
- first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv, &alt_motion_error);
+ // The ML model to predict if a flat structure (golden-frame only structure
+ // without ALT-REF and Internal-ARFs) is better requires stats based on
+ // motion search w.r.t 3rd reference frame in the first pass. As the ML
+ // model is enabled under certain conditions, motion search in 3rd reference
+ // frame is also enabled for those cases.
+ if (do_third_ref_motion_search(&cpi->oxcf.rc_cfg, &cpi->oxcf.gf_cfg)) {
+ if (alt_ref_frame != NULL) {
+ FULLPEL_MV tmp_mv = kZeroFullMv;
+ xd->plane[0].pre[0].buf =
+ alt_ref_frame->y_buffer + alt_ref_frame_yoffset;
+ xd->plane[0].pre[0].stride = alt_ref_frame->y_stride;
+ alt_motion_error = get_prediction_error_bitdepth(
+ is_high_bitdepth, bitdepth, bsize, &x->plane[0].src,
+ &xd->plane[0].pre[0]);
+ first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv, &alt_motion_error);
+ }
}
if (alt_motion_error < motion_error && alt_motion_error < gf_motion_error &&
alt_motion_error < this_intra_error) {
@@ -716,8 +819,7 @@ static int firstpass_inter_prediction(
}
// Start by assuming that intra mode is best.
- best_ref_mv->row = 0;
- best_ref_mv->col = 0;
+ *best_mv = kZeroMv;
if (motion_error <= this_intra_error) {
aom_clear_system_state();
@@ -736,28 +838,30 @@ static int firstpass_inter_prediction(
(double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_intra_error);
}
- const MV best_mv = get_mv_from_fullmv(&mv);
+ *best_mv = get_mv_from_fullmv(&mv);
this_inter_error = motion_error;
xd->mi[0]->mode = NEWMV;
- xd->mi[0]->mv[0].as_mv = best_mv;
+ xd->mi[0]->mv[0].as_mv = *best_mv;
xd->mi[0]->tx_size = TX_4X4;
xd->mi[0]->ref_frame[0] = LAST_FRAME;
xd->mi[0]->ref_frame[1] = NONE_FRAME;
- av1_enc_build_inter_predictor(cm, xd, unit_row * unit_scale,
- unit_col * unit_scale, NULL, bsize,
- AOM_PLANE_Y, AOM_PLANE_Y);
- av1_encode_sby_pass1(cpi, x, bsize);
- stats->sum_mvr += best_mv.row;
- stats->sum_mvr_abs += abs(best_mv.row);
- stats->sum_mvc += best_mv.col;
- stats->sum_mvc_abs += abs(best_mv.col);
- stats->sum_mvrs += best_mv.row * best_mv.row;
- stats->sum_mvcs += best_mv.col * best_mv.col;
+
+ if (cpi->sf.fp_sf.disable_recon == 0) {
+ av1_enc_build_inter_predictor(cm, xd, unit_row * unit_scale,
+ unit_col * unit_scale, NULL, bsize,
+ AOM_PLANE_Y, AOM_PLANE_Y);
+ av1_encode_sby_pass1(cpi, x, bsize);
+ }
+ stats->sum_mvr += best_mv->row;
+ stats->sum_mvr_abs += abs(best_mv->row);
+ stats->sum_mvc += best_mv->col;
+ stats->sum_mvc_abs += abs(best_mv->col);
+ stats->sum_mvrs += best_mv->row * best_mv->row;
+ stats->sum_mvcs += best_mv->col * best_mv->col;
++stats->inter_count;
- *best_ref_mv = best_mv;
- accumulate_mv_stats(best_mv, mv, unit_row, unit_col, unit_rows, unit_cols,
- last_mv, stats);
+ accumulate_mv_stats(*best_mv, mv, unit_row, unit_col, unit_rows, unit_cols,
+ last_non_zero_mv, stats);
}
return this_inter_error;
@@ -783,7 +887,7 @@ static void update_firstpass_stats(AV1_COMP *cpi,
const int frame_number,
const int64_t ts_duration,
const BLOCK_SIZE fp_block_size) {
- TWO_PASS *twopass = &cpi->twopass;
+ TWO_PASS *twopass = &cpi->ppi->twopass;
AV1_COMMON *const cm = &cpi->common;
const CommonModeInfoParams *const mi_params = &cm->mi_params;
FIRSTPASS_STATS *this_frame_stats = twopass->stats_buf_ctx->stats_in_end;
@@ -817,6 +921,9 @@ static void update_firstpass_stats(AV1_COMP *cpi,
fps.inactive_zone_rows = (double)stats->image_data_start_row;
fps.inactive_zone_cols = (double)0; // TODO(paulwilkins): fix
fps.raw_error_stdev = raw_err_stdev;
+ fps.is_flash = 0;
+ fps.noise_var = (double)0;
+ fps.cor_coeff = (double)1.0;
if (stats->mv_count > 0) {
fps.MVr = (double)stats->sum_mvr / stats->mv_count;
@@ -849,12 +956,20 @@ static void update_firstpass_stats(AV1_COMP *cpi,
// cpi->source_time_stamp.
fps.duration = (double)ts_duration;
+ // Invalidate the stats related to third ref motion search if not valid.
+ // This helps to print a warning in second pass encoding.
+ if (do_third_ref_motion_search(&cpi->oxcf.rc_cfg, &cpi->oxcf.gf_cfg) == 0) {
+ fps.pcnt_third_ref = INVALID_FP_STATS_TO_PREDICT_FLAT_GOP;
+ fps.tr_coded_error = INVALID_FP_STATS_TO_PREDICT_FLAT_GOP;
+ }
+
// We will store the stats inside the persistent twopass struct (and NOT the
// local variable 'fps'), and then cpi->output_pkt_list will point to it.
*this_frame_stats = fps;
- output_stats(this_frame_stats, cpi->output_pkt_list);
- if (cpi->twopass.stats_buf_ctx->total_stats != NULL) {
- av1_accumulate_stats(cpi->twopass.stats_buf_ctx->total_stats, &fps);
+ if (!cpi->ppi->lap_enabled)
+ output_stats(this_frame_stats, cpi->ppi->output_pkt_list);
+ if (cpi->ppi->twopass.stats_buf_ctx->total_stats != NULL) {
+ av1_accumulate_stats(cpi->ppi->twopass.stats_buf_ctx->total_stats, &fps);
}
/*In the case of two pass, first pass uses it as a circular buffer,
* when LAP is enabled it is used as a linear buffer*/
@@ -982,6 +1097,17 @@ static void first_pass_tiles(AV1_COMP *cpi, const BLOCK_SIZE fp_block_size) {
AV1_COMMON *const cm = &cpi->common;
const int tile_cols = cm->tiles.cols;
const int tile_rows = cm->tiles.rows;
+ const int num_planes = av1_num_planes(&cpi->common);
+ for (int plane = 0; plane < num_planes; plane++) {
+ const int subsampling_xy =
+ plane ? cm->seq_params->subsampling_x + cm->seq_params->subsampling_y
+ : 0;
+ const int sb_size = MAX_SB_SQUARE >> subsampling_xy;
+ CHECK_MEM_ERROR(
+ cm, cpi->td.mb.plane[plane].src_diff,
+ (int16_t *)aom_memalign(
+ 32, sizeof(*cpi->td.mb.plane[plane].src_diff) * sb_size));
+ }
for (int tile_row = 0; tile_row < tile_rows; ++tile_row) {
for (int tile_col = 0; tile_col < tile_cols; ++tile_col) {
TileDataEnc *const tile_data =
@@ -989,6 +1115,12 @@ static void first_pass_tiles(AV1_COMP *cpi, const BLOCK_SIZE fp_block_size) {
first_pass_tile(cpi, &cpi->td, tile_data, fp_block_size);
}
}
+ for (int plane = 0; plane < num_planes; plane++) {
+ if (cpi->td.mb.plane[plane].src_diff) {
+ aom_free(cpi->td.mb.plane[plane].src_diff);
+ cpi->td.mb.plane[plane].src_diff = NULL;
+ }
+ }
}
void av1_first_pass_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
@@ -997,7 +1129,7 @@ void av1_first_pass_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
AV1_COMMON *const cm = &cpi->common;
const CommonModeInfoParams *const mi_params = &cm->mi_params;
CurrentFrame *const current_frame = &cm->current_frame;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const int num_planes = av1_num_planes(cm);
MACROBLOCKD *const xd = &x->e_mbd;
TileInfo *tile = &tile_data->tile_info;
@@ -1105,7 +1237,7 @@ void av1_first_pass_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
cpi, td, last_frame, golden_frame, alt_ref_frame, unit_row, unit_col,
recon_yoffset, recon_uvoffset, src_yoffset, alt_ref_frame_yoffset,
fp_block_size, this_intra_error, raw_motion_err_counts,
- raw_motion_err_list, &best_ref_mv, &last_mv, mb_stats);
+ raw_motion_err_list, best_ref_mv, &best_ref_mv, &last_mv, mb_stats);
if (unit_col_in_tile == 0) {
*first_top_mv = last_mv;
}
@@ -1138,7 +1270,7 @@ void av1_first_pass(AV1_COMP *cpi, const int64_t ts_duration) {
AV1_COMMON *const cm = &cpi->common;
const CommonModeInfoParams *const mi_params = &cm->mi_params;
CurrentFrame *const current_frame = &cm->current_frame;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const int num_planes = av1_num_planes(cm);
MACROBLOCKD *const xd = &x->e_mbd;
const int qindex = find_fp_qindex(seq_params->bit_depth);
@@ -1147,9 +1279,14 @@ void av1_first_pass(AV1_COMP *cpi, const int64_t ts_duration) {
FeatureFlags *const features = &cm->features;
av1_set_screen_content_options(cpi, features);
}
+
+ // Prepare the speed features
+ av1_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed);
+
// Unit size for the first pass encoding.
const BLOCK_SIZE fp_block_size =
- cpi->is_screen_content_type ? BLOCK_8X8 : BLOCK_16X16;
+ get_fp_block_size(cpi->is_screen_content_type);
+
// Number of rows in the unit size.
// Note mi_params->mb_rows and mi_params->mb_cols are in the unit of 16x16.
const int unit_rows = get_unit_rows(fp_block_size, mi_params->mb_rows);
@@ -1250,7 +1387,7 @@ void av1_first_pass(AV1_COMP *cpi, const int64_t ts_duration) {
(stats.image_data_start_row * unit_cols * 2));
}
- TWO_PASS *twopass = &cpi->twopass;
+ TWO_PASS *twopass = &cpi->ppi->twopass;
const int num_mbs_16X16 = (cpi->oxcf.resize_cfg.resize_mode != RESIZE_NONE)
? cpi->initial_mbs
: mi_params->MBs;
diff --git a/third_party/libaom/source/libaom/av1/encoder/firstpass.h b/third_party/libaom/source/libaom/av1/encoder/firstpass.h
index 22969e885b..122912f72a 100644
--- a/third_party/libaom/source/libaom/av1/encoder/firstpass.h
+++ b/third_party/libaom/source/libaom/av1/encoder/firstpass.h
@@ -152,6 +152,18 @@ typedef struct {
* standard deviation for (0, 0) motion prediction error
*/
double raw_error_stdev;
+ /*!
+ * Whether the frame contains a flash
+ */
+ int64_t is_flash;
+ /*!
+ * Estimated noise variance
+ */
+ double noise_var;
+ /*!
+ * Correlation coefficient with the previous frame
+ */
+ double cor_coeff;
} FIRSTPASS_STATS;
/*!\cond */
@@ -170,8 +182,6 @@ enum {
*/
typedef struct {
/*!\cond */
- // The frame processing order within a GOP
- unsigned char index;
// Frame update type, e.g. ARF/GF/LF/Overlay
FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH];
unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH];
@@ -191,6 +201,21 @@ typedef struct {
REFBUF_STATE refbuf_state[MAX_STATIC_GF_GROUP_LENGTH];
int arf_index; // the index in the gf group of ARF, if no arf, then -1
int size; // The total length of a GOP
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Indicates the level of parallelism in frame parallel encodes.
+ // 0 : frame is independently encoded (not part of parallel encodes).
+ // 1 : frame is the first in encode order in a given parallel encode set.
+ // 2 : frame occurs later in encode order in a given parallel encode set.
+ int frame_parallel_level[MAX_STATIC_GF_GROUP_LENGTH];
+ // Indicates whether a frame should act as non-reference frame.
+ // 0 : frame is a reference frame.
+ // 1 : frame is a non-reference frame.
+ int is_frame_non_ref[MAX_STATIC_GF_GROUP_LENGTH];
+
+ // The offset into lookahead_ctx for choosing
+ // source of frame parallel encodes.
+ int src_offset[MAX_STATIC_GF_GROUP_LENGTH];
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
/*!\endcond */
} GF_GROUP;
/*!\cond */
@@ -327,6 +352,15 @@ struct EncodeFrameParams;
struct AV1EncoderConfig;
struct TileDataEnc;
+static INLINE int is_fp_wavelet_energy_invalid(
+ const FIRSTPASS_STATS *fp_stats) {
+ return (fp_stats->frame_avg_wavelet_energy < 0);
+}
+
+static INLINE BLOCK_SIZE get_fp_block_size(int is_screen_content_type) {
+ return (is_screen_content_type ? BLOCK_8X8 : BLOCK_16X16);
+}
+
int av1_get_unit_rows_in_tile(TileInfo tile, const BLOCK_SIZE fp_block_size);
int av1_get_unit_cols_in_tile(TileInfo tile, const BLOCK_SIZE fp_block_size);
diff --git a/third_party/libaom/source/libaom/av1/encoder/global_motion_facade.c b/third_party/libaom/source/libaom/av1/encoder/global_motion_facade.c
index 31c69da7eb..01ef7b0843 100644
--- a/third_party/libaom/source/libaom/av1/encoder/global_motion_facade.c
+++ b/third_party/libaom/source/libaom/av1/encoder/global_motion_facade.c
@@ -108,10 +108,10 @@ static AOM_INLINE void compute_global_motion_for_ref_frame(
const int do_adaptive_gm_estimation = 0;
const int ref_frame_dist = get_relative_dist(
- &cm->seq_params.order_hint_info, cm->current_frame.order_hint,
+ &cm->seq_params->order_hint_info, cm->current_frame.order_hint,
cm->cur_frame->ref_order_hints[frame - LAST_FRAME]);
const GlobalMotionEstimationType gm_estimation_type =
- cm->seq_params.order_hint_info.enable_order_hint &&
+ cm->seq_params->order_hint_info.enable_order_hint &&
abs(ref_frame_dist) <= 2 && do_adaptive_gm_estimation
? GLOBAL_MOTION_DISFLOW_BASED
: GLOBAL_MOTION_FEATURE_BASED;
@@ -126,7 +126,7 @@ static AOM_INLINE void compute_global_motion_for_ref_frame(
av1_compute_global_motion(model, src_buffer, src_width, src_height,
src_stride, src_corners, num_src_corners,
- ref_buf[frame], cpi->common.seq_params.bit_depth,
+ ref_buf[frame], cpi->common.seq_params->bit_depth,
gm_estimation_type, inliers_by_motion,
params_by_motion, RANSAC_NUM_MOTIONS);
int64_t ref_frame_error = 0;
@@ -284,9 +284,9 @@ static AOM_INLINE void update_valid_ref_frames_for_gm(
AV1_COMMON *const cm = &cpi->common;
int *num_past_ref_frames = &num_ref_frames[0];
int *num_future_ref_frames = &num_ref_frames[1];
- const GF_GROUP *gf_group = &cpi->gf_group;
+ const GF_GROUP *gf_group = &cpi->ppi->gf_group;
int ref_pruning_enabled = is_frame_eligible_for_ref_pruning(
- gf_group, cpi->sf.inter_sf.selective_ref_frame, 1, gf_group->index);
+ gf_group, cpi->sf.inter_sf.selective_ref_frame, 1, cpi->gf_frame_index);
for (int frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) {
const MV_REFERENCE_FRAME ref_frame[2] = { frame, NONE_FRAME };
@@ -368,7 +368,7 @@ static AOM_INLINE void setup_global_motion_info_params(AV1_COMP *cpi) {
// The source buffer is 16-bit, so we need to convert to 8 bits for the
// following code. We cache the result until the source frame is released.
gm_info->src_buffer =
- av1_downconvert_frame(source, cpi->common.seq_params.bit_depth);
+ av1_downconvert_frame(source, cpi->common.seq_params->bit_depth);
}
gm_info->segment_map_w =
diff --git a/third_party/libaom/source/libaom/av1/encoder/gop_structure.c b/third_party/libaom/source/libaom/av1/encoder/gop_structure.c
index 0e4968a72f..9cf72d2733 100644
--- a/third_party/libaom/source/libaom/av1/encoder/gop_structure.c
+++ b/third_party/libaom/source/libaom/av1/encoder/gop_structure.c
@@ -26,12 +26,51 @@
#include "av1/encoder/firstpass.h"
#include "av1/encoder/gop_structure.h"
+#if CONFIG_FRAME_PARALLEL_ENCODE
+// This function sets gf_group->frame_parallel_level for LF_UPDATE frames based
+// on the value of parallel_frame_count.
+static void set_frame_parallel_level(int *frame_parallel_level,
+ int *parallel_frame_count,
+ int max_parallel_frames) {
+ assert(*parallel_frame_count > 0);
+ // parallel_frame_count > 1 indicates subsequent frame(s) in the current
+ // parallel encode set.
+ *frame_parallel_level = 1 + (*parallel_frame_count > 1);
+ // Update the count of no. of parallel frames.
+ (*parallel_frame_count)++;
+ if (*parallel_frame_count > max_parallel_frames) *parallel_frame_count = 1;
+}
+
+// This function sets gf_group->src_offset based on frame_parallel_level.
+// Outputs are gf_group->src_offset and first_frame_index
+static void set_src_offset(GF_GROUP *const gf_group, int *first_frame_index,
+ int cur_frame_idx, int frame_ind) {
+ if (gf_group->frame_parallel_level[frame_ind] > 0) {
+ if (gf_group->frame_parallel_level[frame_ind] == 1) {
+ *first_frame_index = cur_frame_idx;
+ }
+
+ // Obtain the offset of the frame at frame_ind in the lookahead queue by
+ // subtracting the display order hints of the current frame from the display
+ // order hint of the first frame in parallel encoding set (at
+ // first_frame_index).
+ gf_group->src_offset[frame_ind] =
+ (cur_frame_idx + gf_group->arf_src_offset[frame_ind]) -
+ *first_frame_index;
+ }
+}
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
// Set parameters for frames between 'start' and 'end' (excluding both).
-static void set_multi_layer_params(const TWO_PASS *twopass,
- GF_GROUP *const gf_group, RATE_CONTROL *rc,
- FRAME_INFO *frame_info, int start, int end,
- int *cur_frame_idx, int *frame_ind,
- int layer_depth) {
+static void set_multi_layer_params(
+ const TWO_PASS *twopass, GF_GROUP *const gf_group,
+ const PRIMARY_RATE_CONTROL *p_rc, RATE_CONTROL *rc, FRAME_INFO *frame_info,
+ int start, int end, int *cur_frame_idx, int *frame_ind,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ int *parallel_frame_count, int max_parallel_frames,
+ int do_frame_parallel_encode, int *first_frame_index,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ int layer_depth) {
const int num_frames_to_process = end - start;
// Either we are at the last level of the pyramid, or we don't have enough
@@ -45,11 +84,21 @@ static void set_multi_layer_params(const TWO_PASS *twopass,
gf_group->cur_frame_idx[*frame_ind] = *cur_frame_idx;
gf_group->layer_depth[*frame_ind] = MAX_ARF_LAYERS;
gf_group->arf_boost[*frame_ind] = av1_calc_arf_boost(
- twopass, rc, frame_info, start, end - start, 0, NULL, NULL);
+ twopass, p_rc, rc, frame_info, start, end - start, 0, NULL, NULL, 0);
gf_group->frame_type[*frame_ind] = INTER_FRAME;
gf_group->refbuf_state[*frame_ind] = REFBUF_UPDATE;
gf_group->max_layer_depth =
AOMMAX(gf_group->max_layer_depth, layer_depth);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Set the level of parallelism for the LF_UPDATE frame.
+ if (do_frame_parallel_encode) {
+ set_frame_parallel_level(&gf_group->frame_parallel_level[*frame_ind],
+ parallel_frame_count, max_parallel_frames);
+ // Set LF_UPDATE frames as non-reference frames.
+ gf_group->is_frame_non_ref[*frame_ind] = 1;
+ }
+ set_src_offset(gf_group, first_frame_index, *cur_frame_idx, *frame_ind);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
++(*frame_ind);
++(*cur_frame_idx);
++start;
@@ -65,14 +114,32 @@ static void set_multi_layer_params(const TWO_PASS *twopass,
gf_group->frame_type[*frame_ind] = INTER_FRAME;
gf_group->refbuf_state[*frame_ind] = REFBUF_UPDATE;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ if (do_frame_parallel_encode) {
+ // If max_parallel_frames is not exceeded, encode the next internal ARF
+ // frame in parallel.
+ if (*parallel_frame_count > 1 &&
+ *parallel_frame_count <= max_parallel_frames) {
+ gf_group->frame_parallel_level[*frame_ind] = 2;
+ *parallel_frame_count = 1;
+ }
+ }
+ set_src_offset(gf_group, first_frame_index, *cur_frame_idx, *frame_ind);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
// Get the boost factor for intermediate ARF frames.
gf_group->arf_boost[*frame_ind] = av1_calc_arf_boost(
- twopass, rc, frame_info, m, end - m, m - start, NULL, NULL);
+ twopass, p_rc, rc, frame_info, m, end - m, m - start, NULL, NULL, 0);
++(*frame_ind);
// Frames displayed before this internal ARF.
- set_multi_layer_params(twopass, gf_group, rc, frame_info, start, m,
- cur_frame_idx, frame_ind, layer_depth + 1);
+ set_multi_layer_params(twopass, gf_group, p_rc, rc, frame_info, start, m,
+ cur_frame_idx, frame_ind,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ parallel_frame_count, max_parallel_frames,
+ do_frame_parallel_encode, first_frame_index,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ layer_depth + 1);
// Overlay for internal ARF.
gf_group->update_type[*frame_ind] = INTNL_OVERLAY_UPDATE;
@@ -82,12 +149,21 @@ static void set_multi_layer_params(const TWO_PASS *twopass,
gf_group->layer_depth[*frame_ind] = layer_depth;
gf_group->frame_type[*frame_ind] = INTER_FRAME;
gf_group->refbuf_state[*frame_ind] = REFBUF_UPDATE;
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ set_src_offset(gf_group, first_frame_index, *cur_frame_idx, *frame_ind);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
++(*frame_ind);
++(*cur_frame_idx);
// Frames displayed after this internal ARF.
- set_multi_layer_params(twopass, gf_group, rc, frame_info, m + 1, end,
- cur_frame_idx, frame_ind, layer_depth + 1);
+ set_multi_layer_params(twopass, gf_group, p_rc, rc, frame_info, m + 1, end,
+ cur_frame_idx, frame_ind,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ parallel_frame_count, max_parallel_frames,
+ do_frame_parallel_encode, first_frame_index,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ layer_depth + 1);
}
}
@@ -95,6 +171,7 @@ static int construct_multi_layer_gf_structure(
AV1_COMP *cpi, TWO_PASS *twopass, GF_GROUP *const gf_group,
RATE_CONTROL *rc, FRAME_INFO *const frame_info, int gf_interval,
FRAME_UPDATE_TYPE first_frame_update_type) {
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
int frame_index = 0;
int cur_frame_index = 0;
@@ -103,6 +180,18 @@ static int construct_multi_layer_gf_structure(
first_frame_update_type == OVERLAY_UPDATE ||
first_frame_update_type == GF_UPDATE);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Initialize gf_group->frame_parallel_level and gf_group->is_frame_non_ref to
+ // 0.
+ memset(
+ gf_group->frame_parallel_level, 0,
+ sizeof(gf_group->frame_parallel_level[0]) * MAX_STATIC_GF_GROUP_LENGTH);
+ memset(gf_group->is_frame_non_ref, 0,
+ sizeof(gf_group->is_frame_non_ref[0]) * MAX_STATIC_GF_GROUP_LENGTH);
+ memset(gf_group->src_offset, 0,
+ sizeof(gf_group->src_offset[0]) * MAX_STATIC_GF_GROUP_LENGTH);
+#endif
+
if (first_frame_update_type == KF_UPDATE &&
cpi->oxcf.kf_cfg.enable_keyframe_filtering > 1) {
gf_group->update_type[frame_index] = ARF_UPDATE;
@@ -146,7 +235,7 @@ static int construct_multi_layer_gf_structure(
gf_group->arf_src_offset[frame_index] = gf_interval - cur_frame_index;
gf_group->cur_frame_idx[frame_index] = cur_frame_index;
gf_group->layer_depth[frame_index] = 1;
- gf_group->arf_boost[frame_index] = cpi->rc.gfu_boost;
+ gf_group->arf_boost[frame_index] = cpi->ppi->p_rc.gfu_boost;
gf_group->frame_type[frame_index] = is_fwd_kf ? KEY_FRAME : INTER_FRAME;
gf_group->refbuf_state[frame_index] = REFBUF_UPDATE;
gf_group->max_layer_depth = 1;
@@ -156,9 +245,25 @@ static int construct_multi_layer_gf_structure(
gf_group->arf_index = -1;
}
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Running count of no. of frames that is part of a given parallel
+ // encode set in a gf_group. Value of 1 indicates no parallel encode.
+ int parallel_frame_count = 1;
+ // Enable parallel encode of frames if gf_group has a multi-layer pyramid
+ // structure.
+ int do_frame_parallel_encode = (cpi->ppi->num_fp_contexts > 1 && use_altref);
+
+ int first_frame_index = cur_frame_index;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
// Rest of the frames.
- set_multi_layer_params(twopass, gf_group, rc, frame_info, cur_frame_index,
- gf_interval, &cur_frame_index, &frame_index,
+ set_multi_layer_params(twopass, gf_group, p_rc, rc, frame_info,
+ cur_frame_index, gf_interval, &cur_frame_index,
+ &frame_index,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ &parallel_frame_count, cpi->ppi->num_fp_contexts,
+ do_frame_parallel_encode, &first_frame_index,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
use_altref + 1);
if (use_altref) {
@@ -181,25 +286,41 @@ static int construct_multi_layer_gf_structure(
gf_group->frame_type[frame_index] = INTER_FRAME;
gf_group->refbuf_state[frame_index] = REFBUF_UPDATE;
gf_group->max_layer_depth = AOMMAX(gf_group->max_layer_depth, 2);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ set_src_offset(gf_group, &first_frame_index, cur_frame_index,
+ frame_index);
+#endif
++frame_index;
}
}
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ if (do_frame_parallel_encode) {
+ // If frame_parallel_level is set to 1 for the last LF_UPDATE
+ // frame in the gf_group, reset it to zero since there are no subsequent
+ // frames in the gf_group.
+ if (gf_group->frame_parallel_level[frame_index - 2] == 1) {
+ assert(gf_group->update_type[frame_index - 2] == LF_UPDATE);
+ gf_group->frame_parallel_level[frame_index - 2] = 0;
+ }
+ }
+#endif
return frame_index;
}
void av1_gop_setup_structure(AV1_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
- GF_GROUP *const gf_group = &cpi->gf_group;
- TWO_PASS *const twopass = &cpi->twopass;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
FRAME_INFO *const frame_info = &cpi->frame_info;
const int key_frame = rc->frames_since_key == 0;
const FRAME_UPDATE_TYPE first_frame_update_type =
- key_frame
- ? KF_UPDATE
- : cpi->gf_state.arf_gf_boost_lst || (rc->baseline_gf_interval == 1)
- ? OVERLAY_UPDATE
- : GF_UPDATE;
+ key_frame ? KF_UPDATE
+ : cpi->ppi->gf_state.arf_gf_boost_lst ||
+ (p_rc->baseline_gf_interval == 1)
+ ? OVERLAY_UPDATE
+ : GF_UPDATE;
gf_group->size = construct_multi_layer_gf_structure(
- cpi, twopass, gf_group, rc, frame_info, rc->baseline_gf_interval - 1,
+ cpi, twopass, gf_group, rc, frame_info, p_rc->baseline_gf_interval - 1,
first_frame_update_type);
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/gop_structure.h b/third_party/libaom/source/libaom/av1/encoder/gop_structure.h
index 6cfca22862..aeffb40acb 100644
--- a/third_party/libaom/source/libaom/av1/encoder/gop_structure.h
+++ b/third_party/libaom/source/libaom/av1/encoder/gop_structure.h
@@ -66,10 +66,11 @@ void av1_gop_bit_allocation(const AV1_COMP *cpi, RATE_CONTROL *const rc,
int64_t gf_group_bits);
/*!\cond */
-int av1_calc_arf_boost(const TWO_PASS *twopass, const RATE_CONTROL *rc,
+int av1_calc_arf_boost(const TWO_PASS *twopass,
+ const PRIMARY_RATE_CONTROL *p_rc, const RATE_CONTROL *rc,
FRAME_INFO *frame_info, int offset, int f_frames,
int b_frames, int *num_fpstats_used,
- int *num_fpstats_required);
+ int *num_fpstats_required, int project_gfu_boost);
/*!\endcond */
#ifdef __cplusplus
diff --git a/third_party/libaom/source/libaom/av1/encoder/hybrid_fwd_txfm.c b/third_party/libaom/source/libaom/av1/encoder/hybrid_fwd_txfm.c
index 08c167a9d6..eda5ddf78c 100644
--- a/third_party/libaom/source/libaom/av1/encoder/hybrid_fwd_txfm.c
+++ b/third_party/libaom/source/libaom/av1/encoder/hybrid_fwd_txfm.c
@@ -14,6 +14,7 @@
#include "config/aom_dsp_rtcd.h"
#include "av1/common/idct.h"
+#include "av1/common/blockd.h"
#include "av1/encoder/hybrid_fwd_txfm.h"
/* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per
@@ -313,3 +314,26 @@ void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
default: assert(0); break;
}
}
+
+void av1_quick_txfm(int use_hadamard, TX_SIZE tx_size, BitDepthInfo bd_info,
+ const int16_t *src_diff, int src_stride,
+ tran_low_t *coeff) {
+ if (use_hadamard) {
+ switch (tx_size) {
+ case TX_4X4: aom_hadamard_4x4(src_diff, src_stride, coeff); break;
+ case TX_8X8: aom_hadamard_8x8(src_diff, src_stride, coeff); break;
+ case TX_16X16: aom_hadamard_16x16(src_diff, src_stride, coeff); break;
+ case TX_32X32: aom_hadamard_32x32(src_diff, src_stride, coeff); break;
+ default: assert(0);
+ }
+ } else {
+ TxfmParam txfm_param;
+ txfm_param.tx_type = DCT_DCT;
+ txfm_param.tx_size = tx_size;
+ txfm_param.lossless = 0;
+ txfm_param.bd = bd_info.bit_depth;
+ txfm_param.is_hbd = bd_info.use_highbitdepth_buf;
+ txfm_param.tx_set_type = EXT_TX_SET_ALL16;
+ av1_fwd_txfm(src_diff, coeff, src_stride, &txfm_param);
+ }
+}
diff --git a/third_party/libaom/source/libaom/av1/encoder/hybrid_fwd_txfm.h b/third_party/libaom/source/libaom/av1/encoder/hybrid_fwd_txfm.h
index daabc7119a..30f8a2258b 100644
--- a/third_party/libaom/source/libaom/av1/encoder/hybrid_fwd_txfm.h
+++ b/third_party/libaom/source/libaom/av1/encoder/hybrid_fwd_txfm.h
@@ -24,6 +24,15 @@ void av1_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff, int diff_stride,
void av1_highbd_fwd_txfm(const int16_t *src_diff, tran_low_t *coeff,
int diff_stride, TxfmParam *txfm_param);
+/*!\brief Apply Hadamard or DCT transform
+ *
+ * \callergraph
+ * DCT and Hadamard transforms are commonly used for quick RD score estimation.
+ * The coeff buffer's size should be equal to the number of pixels
+ * corresponding to tx_size.
+ */
+void av1_quick_txfm(int use_hadamard, TX_SIZE tx_size, BitDepthInfo bd_info,
+ const int16_t *src_diff, int src_stride, tran_low_t *coeff);
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/libaom/source/libaom/av1/encoder/interp_search.c b/third_party/libaom/source/libaom/av1/encoder/interp_search.c
index 0066c35434..dd77f6a1c0 100644
--- a/third_party/libaom/source/libaom/av1/encoder/interp_search.c
+++ b/third_party/libaom/source/libaom/av1/encoder/interp_search.c
@@ -178,7 +178,7 @@ static INLINE int64_t interpolation_filter_rd(
mbmi->interp_filters = filter_sets[filter_idx];
const int tmp_rs =
get_switchable_rate(x, mbmi->interp_filters, switchable_ctx,
- cm->seq_params.enable_dual_filter);
+ cm->seq_params->enable_dual_filter);
int64_t min_rd = RDCOST(x->rdmult, tmp_rs, 0);
if (min_rd > *rd) {
@@ -449,14 +449,23 @@ static INLINE void find_best_non_dual_interp_filter(
interp_search_flags->interp_filter_search_mask;
if (cpi->sf.interp_sf.adaptive_interp_filter_search == 2) {
- const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
const int ctx0 = av1_get_pred_context_switchable_interp(xd, 0);
const int ctx1 = av1_get_pred_context_switchable_interp(xd, 1);
- const int *switchable_interp_p0 =
- cpi->frame_probs.switchable_interp_probs[update_type][ctx0];
- const int *switchable_interp_p1 =
- cpi->frame_probs.switchable_interp_probs[update_type][ctx1];
-
+ int *switchable_interp_p0;
+ int *switchable_interp_p1;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ switchable_interp_p0 = (int *)cpi->ppi->temp_frame_probs
+ .switchable_interp_probs[update_type][ctx0];
+ switchable_interp_p1 = (int *)cpi->ppi->temp_frame_probs
+ .switchable_interp_probs[update_type][ctx1];
+#else
+ switchable_interp_p0 =
+ (int *)cpi->frame_probs.switchable_interp_probs[update_type][ctx0];
+ switchable_interp_p1 =
+ (int *)cpi->frame_probs.switchable_interp_probs[update_type][ctx1];
+#endif
static const int thr[7] = { 0, 8, 8, 8, 8, 0, 8 };
const int thresh = thr[update_type];
for (i = 0; i < SWITCHABLE_FILTERS; i++) {
@@ -683,7 +692,7 @@ int64_t av1_interpolation_filter_search(
switchable_ctx[1] = av1_get_pred_context_switchable_interp(xd, 1);
*switchable_rate =
get_switchable_rate(x, mbmi->interp_filters, switchable_ctx,
- cm->seq_params.enable_dual_filter);
+ cm->seq_params->enable_dual_filter);
// Do MC evaluation for default filter_type.
// Luma MC
@@ -747,7 +756,7 @@ int64_t av1_interpolation_filter_search(
restore_dst_buf(xd, *tmp_dst, num_planes);
const BUFFER_SET *dst_bufs[2] = { tmp_dst, orig_dst };
// Evaluate dual interp filters
- if (cm->seq_params.enable_dual_filter) {
+ if (cm->seq_params->enable_dual_filter) {
if (cpi->sf.interp_sf.use_fast_interpolation_filter_search) {
fast_dual_interp_filter_rd(x, cpi, tile_data, bsize, orig_dst, rd,
&rd_stats_luma, &rd_stats, switchable_rate,
diff --git a/third_party/libaom/source/libaom/av1/encoder/interp_search.h b/third_party/libaom/source/libaom/av1/encoder/interp_search.h
index 1ee26d11ba..902b69960a 100644
--- a/third_party/libaom/source/libaom/av1/encoder/interp_search.h
+++ b/third_party/libaom/source/libaom/av1/encoder/interp_search.h
@@ -37,7 +37,7 @@ typedef struct {
/*!\brief Miscellaneous arguments for inter mode search.
*/
-typedef struct {
+typedef struct HandleInterModeArgs {
/*!
* Buffer for the above predictor in OBMC
*/
@@ -139,6 +139,16 @@ typedef struct {
* Estimated cmp mode.
*/
int cmp_mode[MODE_CTX_REF_FRAMES];
+ /*!
+ * The best sse during single new_mv search. Note that the sse here comes from
+ * single_motion_search, and not from interpolation_filter_search. This has
+ * two implications:
+ * 1. The mv used to calculate the sse here does not have to be the best sse
+ * found in handle_inter_mode.
+ * 2. Even if the mvs agree, the sse here can differ from the sse in \ref
+ * MACROBLOCK::pred_sse due to different interpolation filter used.
+ */
+ unsigned int best_single_sse_in_refs[REF_FRAMES];
} HandleInterModeArgs;
/*!\cond */
diff --git a/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.c b/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.c
index 9cb0f4a118..50e53fdde1 100644
--- a/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.c
+++ b/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.c
@@ -32,6 +32,31 @@ static const UV_PREDICTION_MODE uv_rd_search_mode_order[UV_INTRA_MODES] = {
UV_D113_PRED, UV_D45_PRED,
};
+// The bitmask corresponds to the filter intra modes as defined in enums.h
+// FILTER_INTRA_MODE enumeration type. Setting a bit to 0 in the mask means to
+// disable the evaluation of corresponding filter intra mode. The table
+// av1_derived_filter_intra_mode_used_flag is used when speed feature
+// prune_filter_intra_level is 1. The evaluated filter intra modes are union
+// of the following:
+// 1) FILTER_DC_PRED
+// 2) mode that corresponds to best mode so far of DC_PRED, V_PRED, H_PRED,
+// D157_PRED and PAETH_PRED. (Eg: FILTER_V_PRED if best mode so far is V_PRED).
+static const uint8_t av1_derived_filter_intra_mode_used_flag[INTRA_MODES] = {
+ 0x01, // DC_PRED: 0000 0001
+ 0x03, // V_PRED: 0000 0011
+ 0x05, // H_PRED: 0000 0101
+ 0x01, // D45_PRED: 0000 0001
+ 0x01, // D135_PRED: 0000 0001
+ 0x01, // D113_PRED: 0000 0001
+ 0x09, // D157_PRED: 0000 1001
+ 0x01, // D203_PRED: 0000 0001
+ 0x01, // D67_PRED: 0000 0001
+ 0x01, // SMOOTH_PRED: 0000 0001
+ 0x01, // SMOOTH_V_PRED: 0000 0001
+ 0x01, // SMOOTH_H_PRED: 0000 0001
+ 0x11 // PAETH_PRED: 0001 0001
+};
+
// The bitmask corresponds to the chroma intra modes as defined in enums.h
// UV_PREDICTION_MODE enumeration type. Setting a bit to 0 in the mask means to
// disable the evaluation of corresponding chroma intra mode. The table
@@ -60,59 +85,6 @@ static const uint16_t av1_derived_chroma_intra_mode_used_flag[INTRA_MODES] = {
};
/*!\endcond */
-/*!\brief Calculate the rdcost of a given luma intra angle
- *
- * \ingroup intra_mode_search
- * \callergraph
- * This function runs rd calculation for a given luma intra prediction angle.
- * This is used to select the best angle delta.
- *
- * \return Returns the rdcost of the angle and updates the mbmi if the
- * new rdcost is better.
- */
-static int64_t calc_rd_given_intra_angle(
- const AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mode_cost,
- int64_t best_rd_in, int8_t angle_delta, int max_angle_delta, int *rate,
- RD_STATS *rd_stats, int *best_angle_delta, TX_SIZE *best_tx_size,
- int64_t *best_rd, int64_t *best_model_rd, uint8_t *best_tx_type_map,
- uint8_t *best_blk_skip, int skip_model_rd) {
- RD_STATS tokenonly_rd_stats;
- int64_t this_rd;
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- const int n4 = bsize_to_num_blk(bsize);
- assert(!is_inter_block(mbmi));
- mbmi->angle_delta[PLANE_TYPE_Y] = angle_delta;
- if (!skip_model_rd) {
- if (model_intra_yrd_and_prune(cpi, x, bsize, best_model_rd)) {
- return INT64_MAX;
- }
- }
- av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize,
- best_rd_in);
- if (tokenonly_rd_stats.rate == INT_MAX) return INT64_MAX;
-
- int this_rate =
- mode_cost + tokenonly_rd_stats.rate +
- x->mode_costs
- .angle_delta_cost[mbmi->mode - V_PRED][max_angle_delta + angle_delta];
- this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
-
- if (this_rd < *best_rd) {
- memcpy(best_blk_skip, x->txfm_search_info.blk_skip,
- sizeof(best_blk_skip[0]) * n4);
- av1_copy_array(best_tx_type_map, xd->tx_type_map, n4);
- *best_rd = this_rd;
- *best_angle_delta = mbmi->angle_delta[PLANE_TYPE_Y];
- *best_tx_size = mbmi->tx_size;
- *rate = this_rate;
- rd_stats->rate = tokenonly_rd_stats.rate;
- rd_stats->dist = tokenonly_rd_stats.dist;
- rd_stats->skip_txfm = tokenonly_rd_stats.skip_txfm;
- }
- return this_rd;
-}
-
/*!\brief Search for the best filter_intra mode when coding intra frame.
*
* \ingroup intra_mode_search
@@ -125,8 +97,12 @@ static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
int *rate, int *rate_tokenonly,
int64_t *distortion, int *skippable,
BLOCK_SIZE bsize, int mode_cost,
+ PREDICTION_MODE best_mode_so_far,
int64_t *best_rd, int64_t *best_model_rd,
PICK_MODE_CONTEXT *ctx) {
+ // Skip the evaluation of filter intra modes.
+ if (cpi->sf.intra_sf.prune_filter_intra_level == 2) return 0;
+
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *mbmi = xd->mi[0];
int filter_intra_selected_flag = 0;
@@ -134,17 +110,33 @@ static int rd_pick_filter_intra_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
TX_SIZE best_tx_size = TX_8X8;
FILTER_INTRA_MODE_INFO filter_intra_mode_info;
uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
- (void)ctx;
av1_zero(filter_intra_mode_info);
mbmi->filter_intra_mode_info.use_filter_intra = 1;
mbmi->mode = DC_PRED;
mbmi->palette_mode_info.palette_size[0] = 0;
+ // Skip the evaluation of filter-intra if cached MB_MODE_INFO does not have
+ // filter-intra as winner.
+ if (x->use_mb_mode_cache &&
+ !x->mb_mode_cache->filter_intra_mode_info.use_filter_intra)
+ return 0;
+
for (mode = 0; mode < FILTER_INTRA_MODES; ++mode) {
int64_t this_rd;
RD_STATS tokenonly_rd_stats;
mbmi->filter_intra_mode_info.filter_intra_mode = mode;
+ if ((cpi->sf.intra_sf.prune_filter_intra_level == 1) &&
+ !(av1_derived_filter_intra_mode_used_flag[best_mode_so_far] &
+ (1 << mode)))
+ continue;
+
+ // Skip the evaluation of modes that do not match with the winner mode in
+ // x->mb_mode_cache.
+ if (x->use_mb_mode_cache &&
+ mode != x->mb_mode_cache->filter_intra_mode_info.filter_intra_mode)
+ continue;
+
if (model_intra_yrd_and_prune(cpi, x, bsize, best_model_rd)) {
continue;
}
@@ -248,6 +240,42 @@ void av1_count_colors_highbd(const uint8_t *src8, int stride, int rows,
}
}
+void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi) {
+ if (mode_idx < INTRA_MODE_END) {
+ mbmi->mode = intra_rd_search_mode_order[mode_idx];
+ mbmi->angle_delta[PLANE_TYPE_Y] = 0;
+ } else {
+ mbmi->mode = (mode_idx - INTRA_MODE_END) / (MAX_ANGLE_DELTA * 2) + V_PRED;
+ int angle_delta = (mode_idx - INTRA_MODE_END) % (MAX_ANGLE_DELTA * 2);
+ mbmi->angle_delta[PLANE_TYPE_Y] =
+ (angle_delta < 3 ? (angle_delta - 3) : (angle_delta - 2));
+ }
+}
+
+int prune_intra_y_mode(int64_t this_model_rd, int64_t *best_model_rd,
+ int64_t top_intra_model_rd[], int model_cnt_allowed) {
+ const double thresh_best = 1.50;
+ const double thresh_top = 1.00;
+ for (int i = 0; i < model_cnt_allowed; i++) {
+ if (this_model_rd < top_intra_model_rd[i]) {
+ for (int j = model_cnt_allowed - 1; j > i; j--) {
+ top_intra_model_rd[j] = top_intra_model_rd[j - 1];
+ }
+ top_intra_model_rd[i] = this_model_rd;
+ break;
+ }
+ }
+ if (top_intra_model_rd[model_cnt_allowed - 1] != INT64_MAX &&
+ this_model_rd > thresh_top * top_intra_model_rd[model_cnt_allowed - 1])
+ return 1;
+
+ if (this_model_rd != INT64_MAX &&
+ this_model_rd > thresh_best * (*best_model_rd))
+ return 1;
+ if (this_model_rd < *best_model_rd) *best_model_rd = this_model_rd;
+ return 0;
+}
+
// Run RD calculation with given chroma intra prediction angle., and return
// the RD cost. Update the best mode info. if the RD cost is the best so far.
static int64_t pick_intra_angle_routine_sbuv(
@@ -342,125 +370,199 @@ static int rd_pick_intra_angle_sbuv(const AV1_COMP *const cpi, MACROBLOCK *x,
#define PLANE_SIGN_TO_JOINT_SIGN(plane, a, b) \
(plane == CFL_PRED_U ? a * CFL_SIGNS + b - 1 : b * CFL_SIGNS + a - 1)
-static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
- TX_SIZE tx_size, int64_t best_rd) {
+
+static void cfl_idx_to_sign_and_alpha(int cfl_idx, CFL_SIGN_TYPE *cfl_sign,
+ int *cfl_alpha) {
+ int cfl_linear_idx = cfl_idx - CFL_INDEX_ZERO;
+ if (cfl_linear_idx == 0) {
+ *cfl_sign = CFL_SIGN_ZERO;
+ *cfl_alpha = 0;
+ } else {
+ *cfl_sign = cfl_linear_idx > 0 ? CFL_SIGN_POS : CFL_SIGN_NEG;
+ *cfl_alpha = abs(cfl_linear_idx) - 1;
+ }
+}
+
+static int64_t cfl_compute_rd(const AV1_COMP *const cpi, MACROBLOCK *x,
+ int plane, TX_SIZE tx_size,
+ BLOCK_SIZE plane_bsize, int cfl_idx,
+ int fast_mode, RD_STATS *rd_stats) {
+ assert(IMPLIES(fast_mode, rd_stats == NULL));
+ const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
- const MACROBLOCKD_PLANE *pd = &xd->plane[AOM_PLANE_U];
- const ModeCosts *mode_costs = &x->mode_costs;
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
-
- assert(is_cfl_allowed(xd) && cpi->oxcf.intra_mode_cfg.enable_cfl_intra);
- assert(plane_bsize < BLOCK_SIZES_ALL);
- if (!xd->lossless[mbmi->segment_id]) {
- assert(block_size_wide[plane_bsize] == tx_size_wide[tx_size]);
- assert(block_size_high[plane_bsize] == tx_size_high[tx_size]);
+ int cfl_plane = get_cfl_pred_type(plane);
+ CFL_SIGN_TYPE cfl_sign;
+ int cfl_alpha;
+ cfl_idx_to_sign_and_alpha(cfl_idx, &cfl_sign, &cfl_alpha);
+ // We conly build CFL for a given plane, the other plane's sign is dummy
+ int dummy_sign = CFL_SIGN_NEG;
+ const int8_t orig_cfl_alpha_signs = mbmi->cfl_alpha_signs;
+ const uint8_t orig_cfl_alpha_idx = mbmi->cfl_alpha_idx;
+ mbmi->cfl_alpha_signs =
+ PLANE_SIGN_TO_JOINT_SIGN(cfl_plane, cfl_sign, dummy_sign);
+ mbmi->cfl_alpha_idx = (cfl_alpha << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha;
+ int64_t cfl_cost;
+ if (fast_mode) {
+ cfl_cost =
+ intra_model_rd(cm, x, plane, plane_bsize, tx_size, /*use_hadamard=*/0);
+ } else {
+ av1_init_rd_stats(rd_stats);
+ av1_txfm_rd_in_plane(x, cpi, rd_stats, INT64_MAX, 0, plane, plane_bsize,
+ tx_size, FTXS_NONE, 0);
+ av1_rd_cost_update(x->rdmult, rd_stats);
+ cfl_cost = rd_stats->rdcost;
}
+ mbmi->cfl_alpha_signs = orig_cfl_alpha_signs;
+ mbmi->cfl_alpha_idx = orig_cfl_alpha_idx;
+ return cfl_cost;
+}
+
+static void cfl_pick_plane_parameter(const AV1_COMP *const cpi, MACROBLOCK *x,
+ int plane, TX_SIZE tx_size,
+ int cfl_search_range,
+ RD_STATS cfl_rd_arr[CFL_MAGS_SIZE]) {
+ assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
+ MACROBLOCKD *const xd = &x->e_mbd;
xd->cfl.use_dc_pred_cache = 1;
- const int64_t mode_rd = RDCOST(
- x->rdmult,
- mode_costs->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED], 0);
- int64_t best_rd_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
- int best_c[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
-#if CONFIG_DEBUG
- int best_rate_uv[CFL_JOINT_SIGNS][CFL_PRED_PLANES];
-#endif // CONFIG_DEBUG
-
- const int skip_trellis = 0;
- for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
- RD_STATS rd_stats;
- av1_init_rd_stats(&rd_stats);
- for (int joint_sign = 0; joint_sign < CFL_JOINT_SIGNS; joint_sign++) {
- best_rd_uv[joint_sign][plane] = INT64_MAX;
- best_c[joint_sign][plane] = 0;
- }
- // Collect RD stats for an alpha value of zero in this plane.
- // Skip i == CFL_SIGN_ZERO as (0, 0) is invalid.
- for (int i = CFL_SIGN_NEG; i < CFL_SIGNS; i++) {
- const int8_t joint_sign =
- PLANE_SIGN_TO_JOINT_SIGN(plane, CFL_SIGN_ZERO, i);
- if (i == CFL_SIGN_NEG) {
- mbmi->cfl_alpha_idx = 0;
- mbmi->cfl_alpha_signs = joint_sign;
- av1_txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, 0, plane + 1,
- plane_bsize, tx_size, FTXS_NONE, skip_trellis);
- if (rd_stats.rate == INT_MAX) break;
- }
- const int alpha_rate = mode_costs->cfl_cost[joint_sign][plane][0];
- best_rd_uv[joint_sign][plane] =
- RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist);
-#if CONFIG_DEBUG
- best_rate_uv[joint_sign][plane] = rd_stats.rate;
-#endif // CONFIG_DEBUG
- }
- }
- int8_t best_joint_sign = -1;
-
- for (int plane = 0; plane < CFL_PRED_PLANES; plane++) {
- for (int pn_sign = CFL_SIGN_NEG; pn_sign < CFL_SIGNS; pn_sign++) {
- int progress = 0;
- for (int c = 0; c < CFL_ALPHABET_SIZE; c++) {
- int flag = 0;
- RD_STATS rd_stats;
- if (c > 2 && progress < c) break;
- av1_init_rd_stats(&rd_stats);
- for (int i = 0; i < CFL_SIGNS; i++) {
- const int8_t joint_sign = PLANE_SIGN_TO_JOINT_SIGN(plane, pn_sign, i);
- if (i == 0) {
- mbmi->cfl_alpha_idx = (c << CFL_ALPHABET_SIZE_LOG2) + c;
- mbmi->cfl_alpha_signs = joint_sign;
- av1_txfm_rd_in_plane(x, cpi, &rd_stats, best_rd, 0, plane + 1,
- plane_bsize, tx_size, FTXS_NONE, skip_trellis);
- if (rd_stats.rate == INT_MAX) break;
- }
- const int alpha_rate = mode_costs->cfl_cost[joint_sign][plane][c];
- int64_t this_rd =
- RDCOST(x->rdmult, rd_stats.rate + alpha_rate, rd_stats.dist);
- if (this_rd >= best_rd_uv[joint_sign][plane]) continue;
- best_rd_uv[joint_sign][plane] = this_rd;
- best_c[joint_sign][plane] = c;
-#if CONFIG_DEBUG
- best_rate_uv[joint_sign][plane] = rd_stats.rate;
-#endif // CONFIG_DEBUG
- flag = 2;
- if (best_rd_uv[joint_sign][!plane] == INT64_MAX) continue;
- this_rd += mode_rd + best_rd_uv[joint_sign][!plane];
- if (this_rd >= best_rd) continue;
- best_rd = this_rd;
- best_joint_sign = joint_sign;
+ MB_MODE_INFO *const mbmi = xd->mi[0];
+ assert(mbmi->uv_mode == UV_CFL_PRED);
+ const MACROBLOCKD_PLANE *pd = &xd->plane[plane];
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
+
+ const int dir_ls[2] = { 1, -1 };
+
+ int est_best_cfl_idx = CFL_INDEX_ZERO;
+ if (cfl_search_range < CFL_MAGS_SIZE) {
+ int fast_mode = 1;
+ int start_cfl_idx = CFL_INDEX_ZERO;
+ int64_t best_cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
+ start_cfl_idx, fast_mode, NULL);
+ for (int si = 0; si < 2; ++si) {
+ const int dir = dir_ls[si];
+ for (int i = 1; i < CFL_MAGS_SIZE; ++i) {
+ int cfl_idx = start_cfl_idx + dir * i;
+ if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
+ int64_t cfl_cost = cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize,
+ cfl_idx, fast_mode, NULL);
+ if (cfl_cost < best_cfl_cost) {
+ best_cfl_cost = cfl_cost;
+ est_best_cfl_idx = cfl_idx;
+ } else {
+ break;
}
- progress += flag;
}
}
}
- int best_rate_overhead = INT_MAX;
- uint8_t ind = 0;
- if (best_joint_sign >= 0) {
- const int u = best_c[best_joint_sign][CFL_PRED_U];
- const int v = best_c[best_joint_sign][CFL_PRED_V];
- ind = (u << CFL_ALPHABET_SIZE_LOG2) + v;
- best_rate_overhead = mode_costs->cfl_cost[best_joint_sign][CFL_PRED_U][u] +
- mode_costs->cfl_cost[best_joint_sign][CFL_PRED_V][v];
-#if CONFIG_DEBUG
- xd->cfl.rate =
- mode_costs->intra_uv_mode_cost[CFL_ALLOWED][mbmi->mode][UV_CFL_PRED] +
- best_rate_overhead + best_rate_uv[best_joint_sign][CFL_PRED_U] +
- best_rate_uv[best_joint_sign][CFL_PRED_V];
-#endif // CONFIG_DEBUG
- } else {
- best_joint_sign = 0;
+ for (int cfl_idx = 0; cfl_idx < CFL_MAGS_SIZE; ++cfl_idx) {
+ av1_invalid_rd_stats(&cfl_rd_arr[cfl_idx]);
}
- mbmi->cfl_alpha_idx = ind;
- mbmi->cfl_alpha_signs = best_joint_sign;
+ int fast_mode = 0;
+ int start_cfl_idx = est_best_cfl_idx;
+ cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, start_cfl_idx, fast_mode,
+ &cfl_rd_arr[start_cfl_idx]);
+ for (int si = 0; si < 2; ++si) {
+ const int dir = dir_ls[si];
+ for (int i = 1; i < cfl_search_range; ++i) {
+ int cfl_idx = start_cfl_idx + dir * i;
+ if (cfl_idx < 0 || cfl_idx >= CFL_MAGS_SIZE) break;
+ cfl_compute_rd(cpi, x, plane, tx_size, plane_bsize, cfl_idx, fast_mode,
+ &cfl_rd_arr[cfl_idx]);
+ }
+ }
xd->cfl.use_dc_pred_cache = 0;
xd->cfl.dc_pred_is_cached[0] = 0;
xd->cfl.dc_pred_is_cached[1] = 0;
- return best_rate_overhead;
+}
+
+/*!\brief Pick the optimal parameters for Chroma to Luma (CFL) component
+ *
+ * \ingroup intra_mode_search
+ * \callergraph
+ *
+ * This function will use DCT_DCT followed by computing SATD (sum of absolute
+ * transformed differences) to estimate the RD score and find the best possible
+ * CFL parameter.
+ *
+ * Then the function will apply a full RD search near the best possible CFL
+ * parameter to find the best actual CFL parameter.
+ *
+ * Side effect:
+ * We use ths buffers in x->plane[] and xd->plane[] as throw-away buffers for RD
+ * search.
+ *
+ * \param[in] x Encoder prediction block structure.
+ * \param[in] cpi Top-level encoder instance structure.
+ * \param[in] tx_size Transform size.
+ * \param[in] ref_best_rd Reference best RD.
+ * \param[in] cfl_search_range The search range of full RD search near the
+ * estimated best CFL parameter.
+ *
+ * \param[out] best_rd_stats RD stats of the best CFL parameter
+ * \param[out] best_cfl_alpha_idx Best CFL alpha index
+ * \param[out] best_cfl_alpha_signs Best CFL joint signs
+ *
+ */
+static int cfl_rd_pick_alpha(MACROBLOCK *const x, const AV1_COMP *const cpi,
+ TX_SIZE tx_size, int64_t ref_best_rd,
+ int cfl_search_range, RD_STATS *best_rd_stats,
+ uint8_t *best_cfl_alpha_idx,
+ int8_t *best_cfl_alpha_signs) {
+ assert(cfl_search_range >= 1 && cfl_search_range <= CFL_MAGS_SIZE);
+ const ModeCosts *mode_costs = &x->mode_costs;
+ RD_STATS cfl_rd_arr_u[CFL_MAGS_SIZE];
+ RD_STATS cfl_rd_arr_v[CFL_MAGS_SIZE];
+
+ av1_invalid_rd_stats(best_rd_stats);
+
+ cfl_pick_plane_parameter(cpi, x, 1, tx_size, cfl_search_range, cfl_rd_arr_u);
+ cfl_pick_plane_parameter(cpi, x, 2, tx_size, cfl_search_range, cfl_rd_arr_v);
+
+ for (int ui = 0; ui < CFL_MAGS_SIZE; ++ui) {
+ if (cfl_rd_arr_u[ui].rate == INT_MAX) continue;
+ int cfl_alpha_u;
+ CFL_SIGN_TYPE cfl_sign_u;
+ cfl_idx_to_sign_and_alpha(ui, &cfl_sign_u, &cfl_alpha_u);
+ for (int vi = 0; vi < CFL_MAGS_SIZE; ++vi) {
+ if (cfl_rd_arr_v[vi].rate == INT_MAX) continue;
+ int cfl_alpha_v;
+ CFL_SIGN_TYPE cfl_sign_v;
+ cfl_idx_to_sign_and_alpha(vi, &cfl_sign_v, &cfl_alpha_v);
+ // cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO is not a
+ // valid parameter for CFL
+ if (cfl_sign_u == CFL_SIGN_ZERO && cfl_sign_v == CFL_SIGN_ZERO) continue;
+ int joint_sign = cfl_sign_u * CFL_SIGNS + cfl_sign_v - 1;
+ RD_STATS rd_stats = cfl_rd_arr_u[ui];
+ av1_merge_rd_stats(&rd_stats, &cfl_rd_arr_v[vi]);
+ if (rd_stats.rate != INT_MAX) {
+ rd_stats.rate +=
+ mode_costs->cfl_cost[joint_sign][CFL_PRED_U][cfl_alpha_u];
+ rd_stats.rate +=
+ mode_costs->cfl_cost[joint_sign][CFL_PRED_V][cfl_alpha_v];
+ }
+ av1_rd_cost_update(x->rdmult, &rd_stats);
+ if (rd_stats.rdcost < best_rd_stats->rdcost) {
+ *best_rd_stats = rd_stats;
+ *best_cfl_alpha_idx =
+ (cfl_alpha_u << CFL_ALPHABET_SIZE_LOG2) + cfl_alpha_v;
+ *best_cfl_alpha_signs = joint_sign;
+ }
+ }
+ }
+ if (best_rd_stats->rdcost >= ref_best_rd) {
+ av1_invalid_rd_stats(best_rd_stats);
+ // Set invalid CFL parameters here since the rdcost is not better than
+ // ref_best_rd.
+ *best_cfl_alpha_idx = 0;
+ *best_cfl_alpha_signs = 0;
+ return 0;
+ }
+ return 1;
}
int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
@@ -532,19 +634,19 @@ int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
mbmi->uv_mode = mode;
// Init variables for cfl and angle delta
- int cfl_alpha_rate = 0;
+ const SPEED_FEATURES *sf = &cpi->sf;
+ mbmi->angle_delta[PLANE_TYPE_UV] = 0;
if (mode == UV_CFL_PRED) {
if (!is_cfl_allowed(xd) || !intra_mode_cfg->enable_cfl_intra) continue;
assert(!is_directional_mode);
const TX_SIZE uv_tx_size = av1_get_tx_size(AOM_PLANE_U, xd);
- cfl_alpha_rate = cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd);
- if (cfl_alpha_rate == INT_MAX) continue;
- }
- mbmi->angle_delta[PLANE_TYPE_UV] = 0;
-
- if (is_directional_mode && av1_use_angle_delta(mbmi->bsize) &&
- intra_mode_cfg->enable_angle_delta) {
- const SPEED_FEATURES *sf = &cpi->sf;
+ if (!cfl_rd_pick_alpha(x, cpi, uv_tx_size, best_rd,
+ sf->intra_sf.cfl_search_range, &tokenonly_rd_stats,
+ &mbmi->cfl_alpha_idx, &mbmi->cfl_alpha_signs)) {
+ continue;
+ }
+ } else if (is_directional_mode && av1_use_angle_delta(mbmi->bsize) &&
+ intra_mode_cfg->enable_angle_delta) {
if (sf->intra_sf.chroma_intra_pruning_with_hog &&
!intra_search_state.dir_mode_skip_mask_ready) {
static const float thresh[2][4] = {
@@ -554,7 +656,7 @@ int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
const int is_chroma = 1;
const int is_intra_frame = frame_is_intra_only(cm);
prune_intra_mode_with_hog(
- x, bsize,
+ x, bsize, cm->seq_params->sb_size,
thresh[is_intra_frame]
[sf->intra_sf.chroma_intra_pruning_with_hog - 1],
intra_search_state.directional_mode_skip_mask, is_chroma);
@@ -577,17 +679,9 @@ int64_t av1_rd_pick_intra_sbuv_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
}
}
const int mode_cost =
- mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode] +
- cfl_alpha_rate;
+ mode_costs->intra_uv_mode_cost[is_cfl_allowed(xd)][mbmi->mode][mode];
this_rate = tokenonly_rd_stats.rate +
intra_mode_info_cost_uv(cpi, x, mbmi, bsize, mode_cost);
- if (mode == UV_CFL_PRED) {
- assert(is_cfl_allowed(xd) && intra_mode_cfg->enable_cfl_intra);
-#if CONFIG_DEBUG
- if (!xd->lossless[mbmi->segment_id])
- assert(xd->cfl.rate == tokenonly_rd_stats.rate + mode_cost);
-#endif // CONFIG_DEBUG
- }
this_rd = RDCOST(x->rdmult, this_rate, tokenonly_rd_stats.dist);
if (this_rd < best_rd) {
@@ -633,8 +727,7 @@ int av1_search_palette_mode(IntraModeSearchState *intra_search_state,
const int num_planes = av1_num_planes(cm);
MACROBLOCKD *const xd = &x->e_mbd;
int rate2 = 0;
- int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd,
- best_model_rd_palette = INT64_MAX;
+ int64_t distortion2 = 0, best_rd_palette = best_rd, this_rd;
int skippable = 0;
uint8_t *const best_palette_color_map =
x->palette_buffer->best_palette_color_map;
@@ -656,11 +749,11 @@ int av1_search_palette_mode(IntraModeSearchState *intra_search_state,
RD_STATS rd_stats_y;
av1_invalid_rd_stats(&rd_stats_y);
- av1_rd_pick_palette_intra_sby(
- cpi, x, bsize, intra_mode_cost[DC_PRED], &best_mbmi_palette,
- best_palette_color_map, &best_rd_palette, &best_model_rd_palette,
- &rd_stats_y.rate, NULL, &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL,
- ctx, best_blk_skip, best_tx_type_map);
+ av1_rd_pick_palette_intra_sby(cpi, x, bsize, intra_mode_cost[DC_PRED],
+ &best_mbmi_palette, best_palette_color_map,
+ &best_rd_palette, &rd_stats_y.rate, NULL,
+ &rd_stats_y.dist, &rd_stats_y.skip_txfm, NULL,
+ ctx, best_blk_skip, best_tx_type_map);
if (rd_stats_y.rate == INT_MAX || pmi->palette_size[0] == 0) {
this_rd_cost->rdcost = INT64_MAX;
return skippable;
@@ -766,81 +859,6 @@ static AOM_INLINE int intra_block_yrd(const AV1_COMP *const cpi, MACROBLOCK *x,
return 0;
}
-/*!\brief Search for the best angle delta for luma prediction
- *
- * \ingroup intra_mode_search
- * \callergraph
- * Given a luma directional intra prediction mode, this function will try to
- * estimate the best delta_angle.
- *
- * \return Returns the new rdcost of the best intra angle.
- */
-static int64_t rd_pick_intra_angle_sby(const AV1_COMP *const cpi, MACROBLOCK *x,
- int *rate, RD_STATS *rd_stats,
- BLOCK_SIZE bsize, int mode_cost,
- int64_t best_rd, int64_t *best_model_rd,
- int skip_model_rd_for_zero_deg) {
- MACROBLOCKD *xd = &x->e_mbd;
- MB_MODE_INFO *mbmi = xd->mi[0];
- assert(!is_inter_block(mbmi));
-
- int best_angle_delta = 0;
- int64_t rd_cost[2 * (MAX_ANGLE_DELTA + 2)];
- TX_SIZE best_tx_size = mbmi->tx_size;
- uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
- uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
-
- for (int i = 0; i < 2 * (MAX_ANGLE_DELTA + 2); ++i) rd_cost[i] = INT64_MAX;
-
- int first_try = 1;
- for (int angle_delta = 0; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
- for (int i = 0; i < 2; ++i) {
- const int64_t best_rd_in =
- (best_rd == INT64_MAX) ? INT64_MAX
- : (best_rd + (best_rd >> (first_try ? 3 : 5)));
- const int64_t this_rd = calc_rd_given_intra_angle(
- cpi, x, bsize, mode_cost, best_rd_in, (1 - 2 * i) * angle_delta,
- MAX_ANGLE_DELTA, rate, rd_stats, &best_angle_delta, &best_tx_size,
- &best_rd, best_model_rd, best_tx_type_map, best_blk_skip,
- (skip_model_rd_for_zero_deg & !angle_delta));
- rd_cost[2 * angle_delta + i] = this_rd;
- if (first_try && this_rd == INT64_MAX) return best_rd;
- first_try = 0;
- if (angle_delta == 0) {
- rd_cost[1] = this_rd;
- break;
- }
- }
- }
-
- assert(best_rd != INT64_MAX);
- for (int angle_delta = 1; angle_delta <= MAX_ANGLE_DELTA; angle_delta += 2) {
- for (int i = 0; i < 2; ++i) {
- int skip_search = 0;
- const int64_t rd_thresh = best_rd + (best_rd >> 5);
- if (rd_cost[2 * (angle_delta + 1) + i] > rd_thresh &&
- rd_cost[2 * (angle_delta - 1) + i] > rd_thresh)
- skip_search = 1;
- if (!skip_search) {
- calc_rd_given_intra_angle(
- cpi, x, bsize, mode_cost, best_rd, (1 - 2 * i) * angle_delta,
- MAX_ANGLE_DELTA, rate, rd_stats, &best_angle_delta, &best_tx_size,
- &best_rd, best_model_rd, best_tx_type_map, best_blk_skip, 0);
- }
- }
- }
-
- if (rd_stats->rate != INT_MAX) {
- mbmi->tx_size = best_tx_size;
- mbmi->angle_delta[PLANE_TYPE_Y] = best_angle_delta;
- const int n4 = bsize_to_num_blk(bsize);
- memcpy(x->txfm_search_info.blk_skip, best_blk_skip,
- sizeof(best_blk_skip[0]) * n4);
- av1_copy_array(xd->tx_type_map, best_tx_type_map, n4);
- }
- return best_rd;
-}
-
/*!\brief Search for the best filter_intra mode when coding inter frame.
*
* \ingroup intra_mode_search
@@ -909,11 +927,14 @@ static INLINE void handle_filter_intra_mode(const AV1_COMP *cpi, MACROBLOCK *x,
}
}
+// Evaluate a given luma intra-mode in inter frames.
int av1_handle_intra_y_mode(IntraModeSearchState *intra_search_state,
const AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, unsigned int ref_frame_cost,
const PICK_MODE_CONTEXT *ctx, RD_STATS *rd_stats_y,
- int64_t best_rd, int *mode_cost_y, int64_t *rd_y) {
+ int64_t best_rd, int *mode_cost_y, int64_t *rd_y,
+ int64_t *best_model_rd,
+ int64_t top_intra_model_rd[]) {
const AV1_COMMON *cm = &cpi->common;
const SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
@@ -928,7 +949,7 @@ int av1_handle_intra_y_mode(IntraModeSearchState *intra_search_state,
int known_rate = mode_cost;
const int intra_cost_penalty = av1_get_intra_cost_penalty(
cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
if (mode != DC_PRED && mode != PAETH_PRED) known_rate += intra_cost_penalty;
known_rate += AOMMIN(mode_costs->skip_txfm_cost[skip_ctx][0],
@@ -946,32 +967,34 @@ int av1_handle_intra_y_mode(IntraModeSearchState *intra_search_state,
!intra_search_state->dir_mode_skip_mask_ready) {
const float thresh[4] = { -1.2f, 0.0f, 0.0f, 1.2f };
const int is_chroma = 0;
- prune_intra_mode_with_hog(
- x, bsize, thresh[sf->intra_sf.intra_pruning_with_hog - 1],
- intra_search_state->directional_mode_skip_mask, is_chroma);
+ prune_intra_mode_with_hog(x, bsize, cm->seq_params->sb_size,
+ thresh[sf->intra_sf.intra_pruning_with_hog - 1],
+ intra_search_state->directional_mode_skip_mask,
+ is_chroma);
intra_search_state->dir_mode_skip_mask_ready = 1;
}
if (intra_search_state->directional_mode_skip_mask[mode]) return 0;
- av1_init_rd_stats(rd_stats_y);
- rd_stats_y->rate = INT_MAX;
- int64_t model_rd = INT64_MAX;
- int rate_dummy;
- rd_pick_intra_angle_sby(cpi, x, &rate_dummy, rd_stats_y, bsize, mode_cost,
- best_rd, &model_rd, 0);
-
- } else {
- av1_init_rd_stats(rd_stats_y);
- mbmi->angle_delta[PLANE_TYPE_Y] = 0;
- av1_pick_uniform_tx_size_type_yrd(cpi, x, rd_stats_y, bsize, best_rd);
}
+ const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
+ const int64_t this_model_rd =
+ intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1);
+ if (prune_intra_y_mode(this_model_rd, best_model_rd, top_intra_model_rd,
+ sf->intra_sf.top_intra_model_count_allowed))
+ return 0;
+ av1_init_rd_stats(rd_stats_y);
+ av1_pick_uniform_tx_size_type_yrd(cpi, x, rd_stats_y, bsize, best_rd);
// Pick filter intra modes.
if (mode == DC_PRED && av1_filter_intra_allowed_bsize(cm, bsize)) {
int try_filter_intra = 1;
int64_t best_rd_so_far = INT64_MAX;
if (rd_stats_y->rate != INT_MAX) {
- const int tmp_rate = rd_stats_y->rate +
- mode_costs->filter_intra_cost[bsize][0] + mode_cost;
+ // best_rd_so_far is the rdcost of DC_PRED without using filter_intra.
+ // Later, in filter intra search, best_rd_so_far is used for comparison.
+ mbmi->filter_intra_mode_info.use_filter_intra = 0;
+ const int tmp_rate =
+ rd_stats_y->rate +
+ intra_mode_info_cost_y(cpi, x, mbmi, bsize, mode_cost);
best_rd_so_far = RDCOST(x->rdmult, tmp_rate, rd_stats_y->dist);
try_filter_intra = (best_rd_so_far / 2) <= best_rd;
}
@@ -1095,7 +1118,8 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
const float thresh[4] = { -1.2f, -1.2f, -0.6f, 0.4f };
const int is_chroma = 0;
prune_intra_mode_with_hog(
- x, bsize, thresh[cpi->sf.intra_sf.intra_pruning_with_hog - 1],
+ x, bsize, cpi->common.seq_params->sb_size,
+ thresh[cpi->sf.intra_sf.intra_pruning_with_hog - 1],
directional_mode_skip_mask, is_chroma);
}
mbmi->filter_intra_mode_info.use_filter_intra = 0;
@@ -1105,16 +1129,21 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
set_mode_eval_params(cpi, x, MODE_EVAL);
MB_MODE_INFO best_mbmi = *mbmi;
- av1_zero(x->winner_mode_stats);
+ av1_zero_array(x->winner_mode_stats, MAX_WINNER_MODE_COUNT_INTRA);
x->winner_mode_count = 0;
// Searches the intra-modes except for intrabc, palette, and filter_intra.
- for (int mode_idx = INTRA_MODE_START; mode_idx < INTRA_MODE_END; ++mode_idx) {
+ int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
+ for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
+ top_intra_model_rd[i] = INT64_MAX;
+ }
+ for (int mode_idx = INTRA_MODE_START; mode_idx < LUMA_MODE_COUNT;
+ ++mode_idx) {
+ set_y_mode_and_delta_angle(mode_idx, mbmi);
RD_STATS this_rd_stats;
int this_rate, this_rate_tokenonly, s;
int is_diagonal_mode;
int64_t this_distortion, this_rd;
- mbmi->mode = intra_rd_search_mode_order[mode_idx];
is_diagonal_mode = av1_is_diagonal_mode(mbmi->mode);
if (is_diagonal_mode && !cpi->oxcf.intra_mode_cfg.enable_diagonal_intra)
@@ -1132,36 +1161,43 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
continue;
// The functionality of filter intra modes and smooth prediction
- // overlap. Retain the smooth prediction if filter intra modes are
- // disabled.
+ // overlap. Hence smooth prediction is pruned only if all the
+ // filter intra modes are enabled.
if (cpi->sf.intra_sf.disable_smooth_intra &&
- !cpi->sf.intra_sf.disable_filter_intra && mbmi->mode == SMOOTH_PRED)
+ cpi->sf.intra_sf.prune_filter_intra_level == 0 &&
+ mbmi->mode == SMOOTH_PRED)
continue;
if (!cpi->oxcf.intra_mode_cfg.enable_paeth_intra &&
mbmi->mode == PAETH_PRED)
continue;
- mbmi->angle_delta[PLANE_TYPE_Y] = 0;
+
+ // Skip the evaluation of modes that do not match with the winner mode in
+ // x->mb_mode_cache.
+ if (x->use_mb_mode_cache && mbmi->mode != x->mb_mode_cache->mode) continue;
is_directional_mode = av1_is_directional_mode(mbmi->mode);
if (is_directional_mode && directional_mode_skip_mask[mbmi->mode]) continue;
- if (is_directional_mode && av1_use_angle_delta(bsize) &&
- cpi->oxcf.intra_mode_cfg.enable_angle_delta) {
- // Searches through the best angle_delta if this option is available.
- this_rd_stats.rate = INT_MAX;
- rd_pick_intra_angle_sby(cpi, x, &this_rate, &this_rd_stats, bsize,
- bmode_costs[mbmi->mode], best_rd, &best_model_rd,
- 1);
- } else {
- if (model_intra_yrd_and_prune(cpi, x, bsize, &best_model_rd)) {
- continue;
- }
+ if (is_directional_mode && av1_use_angle_delta(bsize) == 0 &&
+ mbmi->angle_delta[PLANE_TYPE_Y] != 0)
+ continue;
- // Builds the actual prediction. The prediction from
- // model_intra_yrd_and_prune was just an estimation that did not take into
- // account the effect of txfm pipeline, so we need to redo it for real
- // here.
- av1_pick_uniform_tx_size_type_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
- }
+ // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
+ if (!(cpi->sf.intra_sf.intra_y_mode_mask[max_txsize_lookup[bsize]] &
+ (1 << mbmi->mode)))
+ continue;
+
+ const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
+ const int64_t this_model_rd =
+ intra_model_rd(&cpi->common, x, 0, bsize, tx_size, /*use_hadamard=*/1);
+ if (prune_intra_y_mode(this_model_rd, &best_model_rd, top_intra_model_rd,
+ cpi->sf.intra_sf.top_intra_model_count_allowed))
+ continue;
+
+ // Builds the actual prediction. The prediction from
+ // model_intra_yrd_and_prune was just an estimation that did not take into
+ // account the effect of txfm pipeline, so we need to redo it for real
+ // here.
+ av1_pick_uniform_tx_size_type_yrd(cpi, x, &this_rd_stats, bsize, best_rd);
this_rate_tokenonly = this_rd_stats.rate;
this_distortion = this_rd_stats.dist;
s = this_rd_stats.skip_txfm;
@@ -1204,16 +1240,16 @@ int64_t av1_rd_pick_intra_sby_mode(const AV1_COMP *const cpi, MACROBLOCK *x,
if (try_palette) {
av1_rd_pick_palette_intra_sby(
cpi, x, bsize, bmode_costs[DC_PRED], &best_mbmi, best_palette_color_map,
- &best_rd, &best_model_rd, rate, rate_tokenonly, distortion, skippable,
- &beat_best_rd, ctx, ctx->blk_skip, ctx->tx_type_map);
+ &best_rd, rate, rate_tokenonly, distortion, skippable, &beat_best_rd,
+ ctx, ctx->blk_skip, ctx->tx_type_map);
}
// Searches filter_intra
- if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize) &&
- !cpi->sf.intra_sf.disable_filter_intra) {
+ if (beat_best_rd && av1_filter_intra_allowed_bsize(&cpi->common, bsize)) {
if (rd_pick_filter_intra_sby(cpi, x, rate, rate_tokenonly, distortion,
skippable, bsize, bmode_costs[DC_PRED],
- &best_rd, &best_model_rd, ctx)) {
+ best_mbmi.mode, &best_rd, &best_model_rd,
+ ctx)) {
best_mbmi = *mbmi;
}
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.h b/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.h
index cc2a87b098..5a52440909 100644
--- a/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.h
+++ b/third_party/libaom/source/libaom/av1/encoder/intra_mode_search.h
@@ -95,6 +95,9 @@ typedef struct IntraModeSearchState {
* \param[out] mode_cost_y The cost needed to signal the current
* intra mode.
* \param[out] rd_y The rdcost of the chosen mode.
+ * \param[in] best_model_rd Best model RD seen for this block so far
+ * \param[in] top_intra_model_rd Top intra model RD seen for this
+ * block so far.
*
* \return Returns 1 if a valid intra mode is found, 0 otherwise.
* The corresponding values in x->e_mbd.mi[0], rd_stats_y, mode_cost_y, and
@@ -106,7 +109,9 @@ int av1_handle_intra_y_mode(IntraModeSearchState *intra_search_state,
const AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, unsigned int ref_frame_cost,
const PICK_MODE_CONTEXT *ctx, RD_STATS *rd_stats_y,
- int64_t best_rd, int *mode_cost_y, int64_t *rd_y);
+ int64_t best_rd, int *mode_cost_y, int64_t *rd_y,
+ int64_t *best_model_rd,
+ int64_t top_intra_model_rd[]);
/*!\brief Search through all chroma intra-modes for inter frames.
*
@@ -262,6 +267,29 @@ static AOM_INLINE void init_intra_mode_search_state(
intra_search_state->rate_uv_intra = INT_MAX;
}
+/*! \brief set the luma intra mode and delta angles for a given mode index.
+ * The total number of luma intra mode is LUMA_MODE_COUNT = 61.
+ * The first 13 modes are from DC_PRED to PAETH_PRED, followed by directional
+ * modes. Each of the main 8 directional modes have 6 = MAX_ANGLE_DELTA * 2
+ * delta angles.
+ * \param[in] mode_idx mode index in intra mode decision
+ * process.
+ * \param[in] mbmi Pointer to structure holding
+ * the mode info for the current macroblock.
+ */
+void set_y_mode_and_delta_angle(const int mode_idx, MB_MODE_INFO *const mbmi);
+
+/*! \brief prune luma intra mode based on the model rd.
+ * \param[in] this_model_rd model rd for current mode.
+ * \param[in] best_model_rd Best model RD seen for this block so
+ * far.
+ * \param[in] top_intra_model_rd Top intra model RD seen for this
+ * block so far.
+ * \param[in] model_cnt_allowed The number of top intra model RD allowed.
+ */
+int prune_intra_y_mode(int64_t this_model_rd, int64_t *best_model_rd,
+ int64_t top_intra_model_rd[], int model_cnt_allowed);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/libaom/source/libaom/av1/encoder/intra_mode_search_utils.h b/third_party/libaom/source/libaom/av1/encoder/intra_mode_search_utils.h
index 532482896a..0bf77ac9f5 100644
--- a/third_party/libaom/source/libaom/av1/encoder/intra_mode_search_utils.h
+++ b/third_party/libaom/source/libaom/av1/encoder/intra_mode_search_utils.h
@@ -22,8 +22,10 @@
#include "av1/common/reconintra.h"
#include "av1/encoder/encoder.h"
+#include "av1/encoder/encodeframe.h"
#include "av1/encoder/model_rd.h"
#include "av1/encoder/palette.h"
+#include "av1/encoder/hybrid_fwd_txfm.h"
#ifdef __cplusplus
extern "C" {
@@ -134,8 +136,13 @@ static AOM_INLINE int get_hist_bin_idx(int dx, int dy) {
}
#undef FIX_PREC_BITS
-static AOM_INLINE void generate_hog(const uint8_t *src, int stride, int rows,
- int cols, float *hist) {
+// Normalizes the hog data.
+static AOM_INLINE void normalize_hog(float total, float *hist) {
+ for (int i = 0; i < BINS; ++i) hist[i] /= total;
+}
+
+static AOM_INLINE void lowbd_generate_hog(const uint8_t *src, int stride,
+ int rows, int cols, float *hist) {
float total = 0.1f;
src += stride;
for (int r = 1; r < rows - 1; ++r) {
@@ -144,7 +151,7 @@ static AOM_INLINE void generate_hog(const uint8_t *src, int stride, int rows,
const uint8_t *below = &src[c + stride];
const uint8_t *left = &src[c - 1];
const uint8_t *right = &src[c + 1];
- // Calculate gradient using Sobel fitlers.
+ // Calculate gradient using Sobel filters.
const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
(left[-stride] + 2 * left[0] + left[stride]);
const int dy = (below[-1] + 2 * below[0] + below[1]) -
@@ -165,13 +172,49 @@ static AOM_INLINE void generate_hog(const uint8_t *src, int stride, int rows,
src += stride;
}
- for (int i = 0; i < BINS; ++i) hist[i] /= total;
+ normalize_hog(total, hist);
}
-static AOM_INLINE void generate_hog_hbd(const uint8_t *src8, int stride,
- int rows, int cols, float *hist) {
+// Computes and stores pixel level gradient information of a given superblock
+// for LBD encode.
+static AOM_INLINE void lowbd_compute_gradient_info_sb(MACROBLOCK *const x,
+ BLOCK_SIZE sb_size,
+ PLANE_TYPE plane) {
+ PixelLevelGradientInfo *const grad_info_sb =
+ x->pixel_gradient_info + plane * MAX_SB_SQUARE;
+ const uint8_t *src = x->plane[plane].src.buf;
+ const int stride = x->plane[plane].src.stride;
+ const int ss_x = x->e_mbd.plane[plane].subsampling_x;
+ const int ss_y = x->e_mbd.plane[plane].subsampling_y;
+ const int sb_height = block_size_high[sb_size] >> ss_y;
+ const int sb_width = block_size_wide[sb_size] >> ss_x;
+ src += stride;
+ for (int r = 1; r < sb_height - 1; ++r) {
+ for (int c = 1; c < sb_width - 1; ++c) {
+ const uint8_t *above = &src[c - stride];
+ const uint8_t *below = &src[c + stride];
+ const uint8_t *left = &src[c - 1];
+ const uint8_t *right = &src[c + 1];
+ // Calculate gradient using Sobel filters.
+ const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
+ (left[-stride] + 2 * left[0] + left[stride]);
+ const int dy = (below[-1] + 2 * below[0] + below[1]) -
+ (above[-1] + 2 * above[0] + above[1]);
+ grad_info_sb[r * sb_width + c].is_dx_zero = (dx == 0);
+ grad_info_sb[r * sb_width + c].abs_dx_abs_dy_sum =
+ (uint16_t)(abs(dx) + abs(dy));
+ grad_info_sb[r * sb_width + c].hist_bin_idx =
+ (dx != 0) ? get_hist_bin_idx(dx, dy) : -1;
+ }
+ src += stride;
+ }
+}
+
+#if CONFIG_AV1_HIGHBITDEPTH
+static AOM_INLINE void highbd_generate_hog(const uint8_t *src8, int stride,
+ int rows, int cols, float *hist) {
float total = 0.1f;
- uint16_t *src = CONVERT_TO_SHORTPTR(src8);
+ const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
src += stride;
for (int r = 1; r < rows - 1; ++r) {
for (int c = 1; c < cols - 1; ++c) {
@@ -179,7 +222,7 @@ static AOM_INLINE void generate_hog_hbd(const uint8_t *src8, int stride,
const uint16_t *below = &src[c + stride];
const uint16_t *left = &src[c - 1];
const uint16_t *right = &src[c + 1];
- // Calculate gradient using Sobel fitlers.
+ // Calculate gradient using Sobel filters.
const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
(left[-stride] + 2 * left[0] + left[stride]);
const int dy = (below[-1] + 2 * below[0] + below[1]) -
@@ -200,11 +243,151 @@ static AOM_INLINE void generate_hog_hbd(const uint8_t *src8, int stride,
src += stride;
}
- for (int i = 0; i < BINS; ++i) hist[i] /= total;
+ normalize_hog(total, hist);
+}
+
+// Computes and stores pixel level gradient information of a given superblock
+// for HBD encode.
+static AOM_INLINE void highbd_compute_gradient_info_sb(MACROBLOCK *const x,
+ BLOCK_SIZE sb_size,
+ PLANE_TYPE plane) {
+ PixelLevelGradientInfo *const grad_info_sb =
+ x->pixel_gradient_info + plane * MAX_SB_SQUARE;
+ const uint16_t *src = CONVERT_TO_SHORTPTR(x->plane[plane].src.buf);
+ const int stride = x->plane[plane].src.stride;
+ const int ss_x = x->e_mbd.plane[plane].subsampling_x;
+ const int ss_y = x->e_mbd.plane[plane].subsampling_y;
+ const int sb_height = block_size_high[sb_size] >> ss_y;
+ const int sb_width = block_size_wide[sb_size] >> ss_x;
+ src += stride;
+ for (int r = 1; r < sb_height - 1; ++r) {
+ for (int c = 1; c < sb_width - 1; ++c) {
+ const uint16_t *above = &src[c - stride];
+ const uint16_t *below = &src[c + stride];
+ const uint16_t *left = &src[c - 1];
+ const uint16_t *right = &src[c + 1];
+ // Calculate gradient using Sobel filters.
+ const int dx = (right[-stride] + 2 * right[0] + right[stride]) -
+ (left[-stride] + 2 * left[0] + left[stride]);
+ const int dy = (below[-1] + 2 * below[0] + below[1]) -
+ (above[-1] + 2 * above[0] + above[1]);
+ grad_info_sb[r * sb_width + c].is_dx_zero = (dx == 0);
+ grad_info_sb[r * sb_width + c].abs_dx_abs_dy_sum =
+ (uint16_t)(abs(dx) + abs(dy));
+ grad_info_sb[r * sb_width + c].hist_bin_idx =
+ (dx != 0) ? get_hist_bin_idx(dx, dy) : -1;
+ }
+ src += stride;
+ }
+}
+#endif // CONFIG_AV1_HIGHBITDEPTH
+
+static AOM_INLINE void generate_hog(const uint8_t *src8, int stride, int rows,
+ int cols, float *hist, int highbd) {
+#if CONFIG_AV1_HIGHBITDEPTH
+ if (highbd) {
+ highbd_generate_hog(src8, stride, rows, cols, hist);
+ return;
+ }
+#else
+ (void)highbd;
+#endif // CONFIG_AV1_HIGHBITDEPTH
+ lowbd_generate_hog(src8, stride, rows, cols, hist);
+}
+
+static AOM_INLINE void compute_gradient_info_sb(MACROBLOCK *const x,
+ BLOCK_SIZE sb_size,
+ PLANE_TYPE plane) {
+#if CONFIG_AV1_HIGHBITDEPTH
+ if (is_cur_buf_hbd(&x->e_mbd)) {
+ highbd_compute_gradient_info_sb(x, sb_size, plane);
+ return;
+ }
+#endif // CONFIG_AV1_HIGHBITDEPTH
+ lowbd_compute_gradient_info_sb(x, sb_size, plane);
+}
+
+// Function to generate pixel level gradient information for a given superblock.
+// Sets the flags 'is_sb_gradient_cached' for the specific plane-type if
+// gradient info is generated for the same.
+static AOM_INLINE void produce_gradients_for_sb(AV1_COMP *cpi, MACROBLOCK *x,
+ BLOCK_SIZE sb_size, int mi_row,
+ int mi_col) {
+ const SPEED_FEATURES *sf = &cpi->sf;
+ // Initialise flags related to hog data caching.
+ x->is_sb_gradient_cached[PLANE_TYPE_Y] = false;
+ x->is_sb_gradient_cached[PLANE_TYPE_UV] = false;
+
+ // SB level caching of gradient data may not help in speedup for the following
+ // cases:
+ // (1) Inter frames (due to early intra gating)
+ // (2) When partition_search_type is not SEARCH_PARTITION
+ // Hence, gradient data is computed at block level in such cases.
+
+ if (!frame_is_intra_only(&cpi->common) ||
+ sf->part_sf.partition_search_type != SEARCH_PARTITION)
+ return;
+
+ const int num_planes = av1_num_planes(&cpi->common);
+
+ av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
+
+ if (sf->intra_sf.intra_pruning_with_hog) {
+ compute_gradient_info_sb(x, sb_size, PLANE_TYPE_Y);
+ x->is_sb_gradient_cached[PLANE_TYPE_Y] = true;
+ }
+ if (sf->intra_sf.chroma_intra_pruning_with_hog && num_planes > 1) {
+ compute_gradient_info_sb(x, sb_size, PLANE_TYPE_UV);
+ x->is_sb_gradient_cached[PLANE_TYPE_UV] = true;
+ }
+}
+
+// Reuses the pixel level gradient data generated at superblock level for block
+// level histogram computation.
+static AOM_INLINE void generate_hog_using_gradient_cache(const MACROBLOCK *x,
+ int rows, int cols,
+ BLOCK_SIZE sb_size,
+ PLANE_TYPE plane,
+ float *hist) {
+ float total = 0.1f;
+ const int ss_x = x->e_mbd.plane[plane].subsampling_x;
+ const int ss_y = x->e_mbd.plane[plane].subsampling_y;
+ const int sb_width = block_size_wide[sb_size] >> ss_x;
+
+ // Derive the offset from the starting of the superblock in order to locate
+ // the block level gradient data in the cache.
+ const int mi_row_in_sb = x->e_mbd.mi_row & (mi_size_high[sb_size] - 1);
+ const int mi_col_in_sb = x->e_mbd.mi_col & (mi_size_wide[sb_size] - 1);
+ const int block_offset_in_grad_cache =
+ sb_width * (mi_row_in_sb << (MI_SIZE_LOG2 - ss_y)) +
+ (mi_col_in_sb << (MI_SIZE_LOG2 - ss_x));
+ const PixelLevelGradientInfo *grad_info_blk = x->pixel_gradient_info +
+ plane * MAX_SB_SQUARE +
+ block_offset_in_grad_cache;
+
+ // Retrieve the cached gradient information and generate the histogram.
+ for (int r = 1; r < rows - 1; ++r) {
+ for (int c = 1; c < cols - 1; ++c) {
+ const uint16_t abs_dx_abs_dy_sum =
+ grad_info_blk[r * sb_width + c].abs_dx_abs_dy_sum;
+ if (!abs_dx_abs_dy_sum) continue;
+ total += abs_dx_abs_dy_sum;
+ const bool is_dx_zero = grad_info_blk[r * sb_width + c].is_dx_zero;
+ if (is_dx_zero) {
+ hist[0] += abs_dx_abs_dy_sum >> 1;
+ hist[BINS - 1] += abs_dx_abs_dy_sum >> 1;
+ } else {
+ const int8_t idx = grad_info_blk[r * sb_width + c].hist_bin_idx;
+ assert(idx >= 0 && idx < BINS);
+ hist[idx] += abs_dx_abs_dy_sum;
+ }
+ }
+ }
+ normalize_hog(total, hist);
}
static INLINE void collect_hog_data(const MACROBLOCK *x, BLOCK_SIZE bsize,
- int plane, float *hog) {
+ BLOCK_SIZE sb_size, int plane, float *hog) {
const MACROBLOCKD *xd = &x->e_mbd;
const struct macroblockd_plane *const pd = &xd->plane[plane];
const int ss_x = pd->subsampling_x;
@@ -217,12 +400,15 @@ static INLINE void collect_hog_data(const MACROBLOCK *x, BLOCK_SIZE bsize,
const int cols =
((xd->mb_to_right_edge >= 0) ? bw : (xd->mb_to_right_edge >> 3) + bw) >>
ss_x;
- const int src_stride = x->plane[plane].src.stride;
- const uint8_t *src = x->plane[plane].src.buf;
- if (is_cur_buf_hbd(xd)) {
- generate_hog_hbd(src, src_stride, rows, cols, hog);
+
+ // If gradient data is already generated at SB level, reuse the cached data.
+ // Otherwise, compute the data.
+ if (x->is_sb_gradient_cached[plane]) {
+ generate_hog_using_gradient_cache(x, rows, cols, sb_size, plane, hog);
} else {
- generate_hog(src, src_stride, rows, cols, hog);
+ const uint8_t *src = x->plane[plane].src.buf;
+ const int src_stride = x->plane[plane].src.stride;
+ generate_hog(src, src_stride, rows, cols, hog, is_cur_buf_hbd(xd));
}
// Scale the hog so the luma and chroma are on the same scale
@@ -232,13 +418,13 @@ static INLINE void collect_hog_data(const MACROBLOCK *x, BLOCK_SIZE bsize,
}
static AOM_INLINE void prune_intra_mode_with_hog(
- const MACROBLOCK *x, BLOCK_SIZE bsize, float th,
+ const MACROBLOCK *x, BLOCK_SIZE bsize, BLOCK_SIZE sb_size, float th,
uint8_t *directional_mode_skip_mask, int is_chroma) {
aom_clear_system_state();
const int plane = is_chroma ? AOM_PLANE_U : AOM_PLANE_Y;
float hist[BINS] = { 0.0f };
- collect_hog_data(x, bsize, plane, hist);
+ collect_hog_data(x, bsize, sb_size, plane, hist);
// Make prediction for each of the mode
float scores[DIRECTIONAL_MODES] = { 0.0f };
@@ -305,7 +491,7 @@ static AOM_INLINE int intra_mode_info_cost_y(const AV1_COMP *cpi,
const int n_cache = av1_get_palette_cache(xd, 0, color_cache);
palette_mode_cost +=
av1_palette_color_cost_y(&mbmi->palette_mode_info, color_cache,
- n_cache, cpi->common.seq_params.bit_depth);
+ n_cache, cpi->common.seq_params->bit_depth);
palette_mode_cost +=
av1_cost_color_map(x, 0, bsize, mbmi->tx_size, PALETTE_MAP);
total_rate += palette_mode_cost;
@@ -365,7 +551,7 @@ static AOM_INLINE int intra_mode_info_cost_uv(const AV1_COMP *cpi,
uint16_t color_cache[2 * PALETTE_MAX_SIZE];
const int n_cache = av1_get_palette_cache(xd, 1, color_cache);
palette_mode_cost += av1_palette_color_cost_uv(
- pmi, color_cache, n_cache, cpi->common.seq_params.bit_depth);
+ pmi, color_cache, n_cache, cpi->common.seq_params->bit_depth);
palette_mode_cost +=
av1_cost_color_map(x, 1, bsize, mbmi->tx_size, PALETTE_MAP);
total_rate += palette_mode_cost;
@@ -385,11 +571,11 @@ static AOM_INLINE int intra_mode_info_cost_uv(const AV1_COMP *cpi,
/*!\cond */
// Makes a quick intra prediction and estimate the rdcost with a model without
// going through the whole txfm/quantize/itxfm process.
-static int64_t intra_model_rd(const AV1_COMP *const cpi, MACROBLOCK *const x,
+static int64_t intra_model_rd(const AV1_COMMON *cm, MACROBLOCK *const x,
int plane, BLOCK_SIZE plane_bsize,
- TX_SIZE tx_size) {
- const AV1_COMMON *cm = &cpi->common;
+ TX_SIZE tx_size, int use_hadamard) {
MACROBLOCKD *const xd = &x->e_mbd;
+ const BitDepthInfo bd_info = get_bit_depth_info(xd);
int row, col;
assert(!is_inter_block(xd->mi[0]));
const int stepr = tx_size_high_unit[tx_size];
@@ -405,27 +591,16 @@ static int64_t intra_model_rd(const AV1_COMP *const cpi, MACROBLOCK *const x,
for (row = 0; row < max_blocks_high; row += stepr) {
for (col = 0; col < max_blocks_wide; col += stepc) {
av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size);
+ // Here we use p->src_diff and p->coeff as temporary buffers for
+ // prediction residue and transform coefficients. The buffers are only
+ // used in this for loop, therefore we don't need to properly add offset
+ // to the buffers.
av1_subtract_block(
- xd, txbh, txbw, p->src_diff, block_size_wide[plane_bsize],
+ bd_info, txbh, txbw, p->src_diff, block_size_wide[plane_bsize],
p->src.buf + (((row * p->src.stride) + col) << 2), p->src.stride,
pd->dst.buf + (((row * pd->dst.stride) + col) << 2), pd->dst.stride);
- switch (tx_size) {
- case TX_4X4:
- aom_hadamard_4x4(p->src_diff, block_size_wide[plane_bsize], p->coeff);
- break;
- case TX_8X8:
- aom_hadamard_8x8(p->src_diff, block_size_wide[plane_bsize], p->coeff);
- break;
- case TX_16X16:
- aom_hadamard_16x16(p->src_diff, block_size_wide[plane_bsize],
- p->coeff);
- break;
- case TX_32X32:
- aom_hadamard_32x32(p->src_diff, block_size_wide[plane_bsize],
- p->coeff);
- break;
- default: assert(0);
- }
+ av1_quick_txfm(use_hadamard, tx_size, bd_info, p->src_diff,
+ block_size_wide[plane_bsize], p->coeff);
satd_cost += aom_satd(p->coeff, tx_size_2d[tx_size]);
}
}
@@ -448,7 +623,9 @@ static AOM_INLINE int model_intra_yrd_and_prune(const AV1_COMP *const cpi,
int64_t *best_model_rd) {
const TX_SIZE tx_size = AOMMIN(TX_32X32, max_txsize_lookup[bsize]);
const int plane = 0;
- const int64_t this_model_rd = intra_model_rd(cpi, x, plane, bsize, tx_size);
+ const AV1_COMMON *cm = &cpi->common;
+ const int64_t this_model_rd =
+ intra_model_rd(cm, x, plane, bsize, tx_size, /*use_hadamard=*/1);
if (*best_model_rd != INT64_MAX &&
this_model_rd > *best_model_rd + (*best_model_rd >> 2)) {
return 1;
diff --git a/third_party/libaom/source/libaom/av1/encoder/level.c b/third_party/libaom/source/libaom/av1/encoder/level.c
index 7a74c460e4..4e1749a1dd 100644
--- a/third_party/libaom/source/libaom/av1/encoder/level.c
+++ b/third_party/libaom/source/libaom/av1/encoder/level.c
@@ -353,7 +353,7 @@ static double time_to_decode_frame(const AV1_COMMON *const cm,
if (spatial_layer_dimensions_present_flag) {
assert(0 && "Spatial layer dimensions not supported yet.");
} else {
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const int max_frame_width = seq_params->max_frame_width;
const int max_frame_height = seq_params->max_frame_height;
luma_samples = max_frame_width * max_frame_height;
@@ -473,7 +473,7 @@ void av1_decoder_model_init(const AV1_COMP *const cpi, AV1_LEVEL level,
decoder_model->level = level;
const AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
decoder_model->bit_rate = get_max_bitrate(
av1_level_defs + level, seq_params->tier[op_index], seq_params->profile);
@@ -690,7 +690,7 @@ void av1_decoder_model_process_frame(const AV1_COMP *const cpi,
void av1_init_level_info(AV1_COMP *cpi) {
for (int op_index = 0; op_index < MAX_NUM_OPERATING_POINTS; ++op_index) {
AV1LevelInfo *const this_level_info =
- cpi->level_params.level_info[op_index];
+ cpi->ppi->level_params.level_info[op_index];
if (!this_level_info) continue;
memset(this_level_info, 0, sizeof(*this_level_info));
AV1LevelSpec *const level_spec = &this_level_info->level_spec;
@@ -1048,7 +1048,7 @@ static void scan_past_frames(const FrameWindowBuffer *const buffer,
void av1_update_level_info(AV1_COMP *cpi, size_t size, int64_t ts_start,
int64_t ts_end) {
AV1_COMMON *const cm = &cpi->common;
- const AV1LevelParams *const level_params = &cpi->level_params;
+ const AV1LevelParams *const level_params = &cpi->ppi->level_params;
const int upscaled_width = cm->superres_upscaled_width;
const int width = cm->width;
@@ -1057,7 +1057,7 @@ void av1_update_level_info(AV1_COMP *cpi, size_t size, int64_t ts_start,
const int tile_rows = cm->tiles.rows;
const int tiles = tile_cols * tile_rows;
const int luma_pic_size = upscaled_width * height;
- const int frame_header_count = level_params->frame_header_count;
+ const int frame_header_count = cpi->frame_header_count;
const int show_frame = cm->show_frame;
const int show_existing_frame = cm->show_existing_frame;
@@ -1075,7 +1075,7 @@ void av1_update_level_info(AV1_COMP *cpi, size_t size, int64_t ts_start,
const int temporal_layer_id = cm->temporal_layer_id;
const int spatial_layer_id = cm->spatial_layer_id;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
const BITSTREAM_PROFILE profile = seq_params->profile;
const int is_still_picture = seq_params->still_picture;
// update level_stats
@@ -1148,7 +1148,7 @@ void av1_update_level_info(AV1_COMP *cpi, size_t size, int64_t ts_start,
if (fail_id != TARGET_LEVEL_OK) {
const int target_level_major = 2 + (target_level >> 2);
const int target_level_minor = target_level & 3;
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Failed to encode to the target level %d_%d. %s",
target_level_major, target_level_minor,
level_fail_messages[fail_id]);
diff --git a/third_party/libaom/source/libaom/av1/encoder/level.h b/third_party/libaom/source/libaom/av1/encoder/level.h
index 5e0cce2007..2800e3d40d 100644
--- a/third_party/libaom/source/libaom/av1/encoder/level.h
+++ b/third_party/libaom/source/libaom/av1/encoder/level.h
@@ -164,8 +164,6 @@ typedef struct AV1LevelParams {
uint32_t keep_level_stats;
// Level information for each operating point.
AV1LevelInfo *level_info[MAX_NUM_OPERATING_POINTS];
- // Count the number of OBU_FRAME and OBU_FRAME_HEADER for level calculation.
- int frame_header_count;
} AV1LevelParams;
static INLINE int is_in_operating_point(int operating_point,
diff --git a/third_party/libaom/source/libaom/av1/encoder/mcomp.c b/third_party/libaom/source/libaom/av1/encoder/mcomp.c
index 06f9386102..1a53c23c74 100644
--- a/third_party/libaom/source/libaom/av1/encoder/mcomp.c
+++ b/third_party/libaom/source/libaom/av1/encoder/mcomp.c
@@ -95,7 +95,7 @@ void av1_make_default_fullpel_ms_params(
// High level params
ms_params->bsize = bsize;
- ms_params->vfp = &cpi->fn_ptr[bsize];
+ ms_params->vfp = &cpi->ppi->fn_ptr[bsize];
init_ms_buffers(&ms_params->ms_buffers, x);
@@ -145,8 +145,8 @@ void av1_set_ms_to_intra_mode(FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params;
mv_cost_params->mvjcost = dv_costs->joint_mv;
- mv_cost_params->mvcost[0] = &dv_costs->mv_component[0][MV_MAX];
- mv_cost_params->mvcost[1] = &dv_costs->mv_component[1][MV_MAX];
+ mv_cost_params->mvcost[0] = dv_costs->dv_costs[0];
+ mv_cost_params->mvcost[1] = dv_costs->dv_costs[1];
}
void av1_make_default_subpel_ms_params(SUBPEL_MOTION_SEARCH_PARAMS *ms_params,
@@ -167,7 +167,7 @@ void av1_make_default_subpel_ms_params(SUBPEL_MOTION_SEARCH_PARAMS *ms_params,
x->errorperbit, x->sadperbit);
// Subpel variance params
- ms_params->var_params.vfp = &cpi->fn_ptr[bsize];
+ ms_params->var_params.vfp = &cpi->ppi->fn_ptr[bsize];
ms_params->var_params.subpel_search_type =
cpi->sf.mv_sf.use_accurate_subpel_search;
ms_params->var_params.w = block_size_wide[bsize];
@@ -253,7 +253,7 @@ static INLINE int mv_cost(const MV *mv, const int *joint_cost,
// nearest 2 ** 7.
// This is NOT used during motion compensation.
int av1_mv_bit_cost(const MV *mv, const MV *ref_mv, const int *mvjcost,
- int *mvcost[2], int weight) {
+ int *const mvcost[2], int weight) {
const MV diff = { mv->row - ref_mv->row, mv->col - ref_mv->col };
return ROUND_POWER_OF_TWO(
mv_cost(&diff, mvjcost, CONVERT_TO_CONST_MVCOST(mvcost)) * weight, 7);
@@ -290,6 +290,9 @@ static INLINE int mv_err_cost(const MV *mv, const MV *ref_mv,
static INLINE int mv_err_cost_(const MV *mv,
const MV_COST_PARAMS *mv_cost_params) {
+ if (mv_cost_params->mv_cost_type == MV_COST_NONE) {
+ return 0;
+ }
return mv_err_cost(mv, mv_cost_params->ref_mv, mv_cost_params->mvjcost,
mv_cost_params->mvcost, mv_cost_params->error_per_bit,
mv_cost_params->mv_cost_type);
@@ -1830,7 +1833,7 @@ int av1_intrabc_hash_search(const AV1_COMP *cpi, const MACROBLOCKD *xd,
const MV dv = { GET_MV_SUBPEL(ref_block_hash.y - y_pos),
GET_MV_SUBPEL(ref_block_hash.x - x_pos) };
if (!av1_is_dv_valid(dv, &cpi->common, xd, mi_row, mi_col, bsize,
- cpi->common.seq_params.mib_size_log2))
+ cpi->common.seq_params->mib_size_log2))
continue;
FULLPEL_MV hash_mv;
@@ -1957,8 +1960,8 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
if (xd->bd != 8) {
unsigned int sad;
best_int_mv->as_fullmv = kZeroFullMv;
- sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
- xd->plane[0].pre[0].buf, ref_stride);
+ sad = cpi->ppi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride,
+ xd->plane[0].pre[0].buf, ref_stride);
if (scaled_ref_frame) {
int i;
@@ -2001,7 +2004,8 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
FULLPEL_MV this_mv = best_int_mv->as_fullmv;
src_buf = x->plane[0].src.buf;
ref_buf = get_buf_from_fullmv(&xd->plane[0].pre[0], &this_mv);
- best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
+ best_sad =
+ cpi->ppi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
{
const uint8_t *const pos[4] = {
@@ -2011,7 +2015,8 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
ref_buf + ref_stride,
};
- cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad);
+ cpi->ppi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride,
+ this_sad);
}
for (idx = 0; idx < 4; ++idx) {
@@ -2034,7 +2039,8 @@ unsigned int av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
ref_buf = get_buf_from_fullmv(&xd->plane[0].pre[0], &this_mv);
- tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
+ tmp_sad =
+ cpi->ppi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride);
if (best_sad > tmp_sad) {
best_int_mv->as_fullmv = this_mv;
best_sad = tmp_sad;
@@ -2265,7 +2271,6 @@ static INLINE int get_subpel_part(int x) { return x & 7; }
// Gets the address of the ref buffer at subpel location (r, c), rounded to the
// nearest fullpel precision toward - \infty
-
static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf,
const MV mv) {
const int offset = (mv.row >> 3) * buf->stride + (mv.col >> 3);
diff --git a/third_party/libaom/source/libaom/av1/encoder/mcomp.h b/third_party/libaom/source/libaom/av1/encoder/mcomp.h
index 901671e27f..b2539f5100 100644
--- a/third_party/libaom/source/libaom/av1/encoder/mcomp.h
+++ b/third_party/libaom/source/libaom/av1/encoder/mcomp.h
@@ -84,7 +84,7 @@ typedef struct {
} MV_COST_PARAMS;
int av1_mv_bit_cost(const MV *mv, const MV *ref_mv, const int *mvjcost,
- int *mvcost[2], int weight);
+ int *const mvcost[2], int weight);
int av1_get_mvpred_sse(const MV_COST_PARAMS *mv_cost_params,
const FULLPEL_MV best_mv,
diff --git a/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.c b/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.c
index 96b77b754d..07485bd68c 100644
--- a/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.c
+++ b/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.c
@@ -15,6 +15,7 @@
#include "av1/encoder/encodemv.h"
#include "av1/encoder/encoder.h"
+#include "av1/encoder/interp_search.h"
#include "av1/encoder/mcomp.h"
#include "av1/encoder/motion_search_facade.h"
#include "av1/encoder/partition_strategy.h"
@@ -41,7 +42,7 @@ static int compare_weight(const void *a, const void *b) {
// Allow more mesh searches for screen content type on the ARF.
static int use_fine_search_interval(const AV1_COMP *const cpi) {
return cpi->is_screen_content_type &&
- cpi->gf_group.update_type[cpi->gf_group.index] == ARF_UPDATE &&
+ cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == ARF_UPDATE &&
cpi->oxcf.speed <= 2;
}
@@ -62,15 +63,15 @@ static INLINE void get_mv_candidate_from_tpl(const AV1_COMP *const cpi,
const int mi_col = xd->mi_col;
const BLOCK_SIZE tpl_bsize =
- convert_length_to_bsize(cpi->tpl_data.tpl_bsize_1d);
+ convert_length_to_bsize(cpi->ppi->tpl_data.tpl_bsize_1d);
const int tplw = mi_size_wide[tpl_bsize];
const int tplh = mi_size_high[tpl_bsize];
const int nw = mi_size_wide[bsize] / tplw;
const int nh = mi_size_high[bsize] / tplh;
if (nw >= 1 && nh >= 1) {
- const int of_h = mi_row % mi_size_high[cm->seq_params.sb_size];
- const int of_w = mi_col % mi_size_wide[cm->seq_params.sb_size];
+ const int of_h = mi_row % mi_size_high[cm->seq_params->sb_size];
+ const int of_w = mi_col % mi_size_wide[cm->seq_params->sb_size];
const int start = of_h / tplh * sb_enc->tpl_stride + of_w / tplw;
int valid = 1;
@@ -119,7 +120,8 @@ static INLINE void get_mv_candidate_from_tpl(const AV1_COMP *const cpi,
void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
int search_range, inter_mode_info *mode_info,
- int_mv *best_mv) {
+ int_mv *best_mv,
+ struct HandleInterModeArgs *const args) {
MACROBLOCKD *xd = &x->e_mbd;
const AV1_COMMON *cm = &cpi->common;
const MotionVectorSearchParams *mv_search_params = &cpi->mv_search_params;
@@ -243,13 +245,9 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
}
}
- // Terminate search with the current ref_idx if we have already encountered
- // another ref_mv in the drl such that:
- // 1. The other drl has the same fullpel_mv during the SIMPLE_TRANSLATION
- // search process as the current fullpel_mv.
- // 2. The rate needed to encode the current fullpel_mv is larger than that
- // for the other ref_mv.
- if (cpi->sf.inter_sf.skip_repeated_full_newmv &&
+ // Terminate search with the current ref_idx based on fullpel mv, rate cost,
+ // and other know cost.
+ if (cpi->sf.inter_sf.skip_newmv_in_drl >= 2 &&
mbmi->motion_mode == SIMPLE_TRANSLATION &&
best_mv->as_int != INVALID_MV) {
int_mv this_mv;
@@ -260,6 +258,7 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
mv_costs->mv_cost_stack, MV_COST_WEIGHT);
mode_info[ref_mv_idx].full_search_mv.as_int = this_mv.as_int;
mode_info[ref_mv_idx].full_mv_rate = this_mv_rate;
+ mode_info[ref_mv_idx].full_mv_bestsme = bestsme;
for (int prev_ref_idx = 0; prev_ref_idx < ref_mv_idx; ++prev_ref_idx) {
// Check if the motion search result same as previous results
@@ -280,6 +279,19 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
return;
}
}
+
+ // Terminate the evaluation of current ref_mv_idx based on bestsme and
+ // drl_cost.
+ const int psme = mode_info[prev_ref_idx].full_mv_bestsme;
+ if (psme == INT_MAX) continue;
+ const int thr =
+ cpi->sf.inter_sf.skip_newmv_in_drl == 3 ? (psme + (psme >> 2)) : psme;
+ if (cpi->sf.inter_sf.skip_newmv_in_drl >= 3 &&
+ mode_info[ref_mv_idx].full_mv_bestsme > thr &&
+ mode_info[prev_ref_idx].drl_cost < mode_info[ref_mv_idx].drl_cost) {
+ best_mv->as_int = INVALID_MV;
+ return;
+ }
}
}
@@ -289,6 +301,8 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
const int use_fractional_mv =
bestsme < INT_MAX && cpi->common.features.cur_frame_force_integer_mv == 0;
+ int best_mv_rate = 0;
+ int mv_rate_calculated = 0;
if (use_fractional_mv) {
int_mv fractional_ms_list[3];
av1_set_fractional_mv(fractional_ms_list);
@@ -337,9 +351,10 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
subpel_start_mv = get_mv_from_fullmv(&second_best_mv.as_fullmv);
if (av1_is_subpelmv_in_range(&ms_params.mv_limits,
subpel_start_mv)) {
+ unsigned int sse;
const int this_var = mv_search_params->find_fractional_mv_step(
xd, cm, &ms_params, subpel_start_mv, &this_best_mv, &dis,
- &x->pred_sse[ref], fractional_ms_list);
+ &sse, fractional_ms_list);
if (!cpi->sf.mv_sf.disable_second_mv) {
// If cpi->sf.mv_sf.disable_second_mv is 0, use actual rd cost
@@ -358,11 +373,17 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
int64_t tmp_rd =
RDCOST(x->rdmult, tmp_rd_stats.rate + tmp_mv_rate,
tmp_rd_stats.dist);
- if (tmp_rd < rd) best_mv->as_mv = this_best_mv;
+ if (tmp_rd < rd) {
+ best_mv->as_mv = this_best_mv;
+ x->pred_sse[ref] = sse;
+ }
} else {
// If cpi->sf.mv_sf.disable_second_mv = 1, use var to decide the
// best MV.
- if (this_var < best_mv_var) best_mv->as_mv = this_best_mv;
+ if (this_var < best_mv_var) {
+ best_mv->as_mv = this_best_mv;
+ x->pred_sse[ref] = sse;
+ }
}
}
}
@@ -379,9 +400,52 @@ void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
break;
default: assert(0 && "Invalid motion mode!\n");
}
+
+ // Terminate search with the current ref_idx based on subpel mv and rate
+ // cost.
+ if (cpi->sf.inter_sf.skip_newmv_in_drl >= 1 && args != NULL &&
+ mbmi->motion_mode == SIMPLE_TRANSLATION &&
+ best_mv->as_int != INVALID_MV) {
+ const int ref_mv_idx = mbmi->ref_mv_idx;
+ best_mv_rate =
+ av1_mv_bit_cost(&best_mv->as_mv, &ref_mv, mv_costs->nmv_joint_cost,
+ mv_costs->mv_cost_stack, MV_COST_WEIGHT);
+ mv_rate_calculated = 1;
+
+ for (int prev_ref_idx = 0; prev_ref_idx < ref_mv_idx; ++prev_ref_idx) {
+ if (!args->single_newmv_valid[prev_ref_idx][ref]) continue;
+ // Check if the motion vectors are the same.
+ if (best_mv->as_int == args->single_newmv[prev_ref_idx][ref].as_int) {
+ // Skip this evaluation if the previous one is skipped.
+ if (mode_info[prev_ref_idx].skip) {
+ mode_info[ref_mv_idx].skip = 1;
+ break;
+ }
+ // Compare the rate cost that we current know.
+ const int prev_rate_cost =
+ args->single_newmv_rate[prev_ref_idx][ref] +
+ mode_info[prev_ref_idx].drl_cost;
+ const int this_rate_cost =
+ best_mv_rate + mode_info[ref_mv_idx].drl_cost;
+
+ if (prev_rate_cost <= this_rate_cost) {
+ // If the current rate_cost is worse than the previous rate_cost,
+ // then we terminate the search for this ref_mv_idx.
+ mode_info[ref_mv_idx].skip = 1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if (mv_rate_calculated) {
+ *rate_mv = best_mv_rate;
+ } else {
+ *rate_mv =
+ av1_mv_bit_cost(&best_mv->as_mv, &ref_mv, mv_costs->nmv_joint_cost,
+ mv_costs->mv_cost_stack, MV_COST_WEIGHT);
}
- *rate_mv = av1_mv_bit_cost(&best_mv->as_mv, &ref_mv, mv_costs->nmv_joint_cost,
- mv_costs->mv_cost_stack, MV_COST_WEIGHT);
}
int av1_joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
@@ -920,7 +984,7 @@ int_mv av1_simple_motion_sse_var(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
const uint8_t *dst = xd->plane[0].dst.buf;
const int dst_stride = xd->plane[0].dst.stride;
- *var = cpi->fn_ptr[bsize].vf(src, src_stride, dst, dst_stride, sse);
+ *var = cpi->ppi->fn_ptr[bsize].vf(src, src_stride, dst, dst_stride, sse);
return best_mv;
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.h b/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.h
index 5736f2b756..bf81fe243a 100644
--- a/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.h
+++ b/third_party/libaom/source/libaom/av1/encoder/motion_search_facade.h
@@ -21,20 +21,19 @@ extern "C" {
// TODO(any): rename this struct to something else. There is already another
// struct called inter_modes_info, which makes this terribly confusing.
typedef struct {
- int64_t rd;
int drl_cost;
-
- int rate_mv;
- int_mv mv;
-
int_mv full_search_mv;
int full_mv_rate;
+ int full_mv_bestsme;
+ int skip;
} inter_mode_info;
+struct HandleInterModeArgs;
void av1_single_motion_search(const AV1_COMP *const cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int ref_idx, int *rate_mv,
int search_range, inter_mode_info *mode_info,
- int_mv *best_mv);
+ int_mv *best_mv,
+ struct HandleInterModeArgs *const args);
int av1_joint_motion_search(const AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int_mv *cur_mv,
diff --git a/third_party/libaom/source/libaom/av1/encoder/mv_prec.c b/third_party/libaom/source/libaom/av1/encoder/mv_prec.c
index cc81d72170..ae9dc35af4 100644
--- a/third_party/libaom/source/libaom/av1/encoder/mv_prec.c
+++ b/third_party/libaom/source/libaom/av1/encoder/mv_prec.c
@@ -230,7 +230,7 @@ static AOM_INLINE void collect_mv_stats_b(MV_STATS *mv_stats,
const int y_stride = cpi->source->y_stride;
const int px_row = 4 * mi_row, px_col = 4 * mi_col;
const int buf_is_hbd = cpi->source->flags & YV12_FLAG_HIGHBITDEPTH;
- const int bd = cm->seq_params.bit_depth;
+ const int bd = cm->seq_params->bit_depth;
if (buf_is_hbd) {
uint16_t *source_buf =
CONVERT_TO_SHORTPTR(cpi->source->y_buffer) + px_row * y_stride + px_col;
@@ -339,8 +339,8 @@ static AOM_INLINE void collect_mv_stats_tile(MV_STATS *mv_stats,
const int mi_row_end = tile_info->mi_row_end;
const int mi_col_start = tile_info->mi_col_start;
const int mi_col_end = tile_info->mi_col_end;
- const int sb_size_mi = cm->seq_params.mib_size;
- BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+ const int sb_size_mi = cm->seq_params->mib_size;
+ BLOCK_SIZE sb_size = cm->seq_params->sb_size;
for (int mi_row = mi_row_start; mi_row < mi_row_end; mi_row += sb_size_mi) {
for (int mi_col = mi_col_start; mi_col < mi_col_end; mi_col += sb_size_mi) {
collect_mv_stats_sb(mv_stats, cpi, mi_row, mi_col, sb_size);
@@ -349,7 +349,7 @@ static AOM_INLINE void collect_mv_stats_tile(MV_STATS *mv_stats,
}
void av1_collect_mv_stats(AV1_COMP *cpi, int current_q) {
- MV_STATS *mv_stats = &cpi->mv_stats;
+ MV_STATS *mv_stats = &cpi->ppi->mv_stats;
const AV1_COMMON *cm = &cpi->common;
const int tile_cols = cm->tiles.cols;
const int tile_rows = cm->tiles.rows;
@@ -420,8 +420,8 @@ void av1_pick_and_set_high_precision_mv(AV1_COMP *cpi, int qindex) {
}
#if !CONFIG_REALTIME_ONLY
else if (cpi->sf.hl_sf.high_precision_mv_usage == LAST_MV_DATA &&
- av1_frame_allows_smart_mv(cpi) && cpi->mv_stats.valid) {
- use_hp = get_smart_mv_prec(cpi, &cpi->mv_stats, qindex);
+ av1_frame_allows_smart_mv(cpi) && cpi->ppi->mv_stats.valid) {
+ use_hp = get_smart_mv_prec(cpi, &cpi->ppi->mv_stats, qindex);
}
#endif // !CONFIG_REALTIME_ONLY
diff --git a/third_party/libaom/source/libaom/av1/encoder/mv_prec.h b/third_party/libaom/source/libaom/av1/encoder/mv_prec.h
index 89f95f553e..11dcdd8806 100644
--- a/third_party/libaom/source/libaom/av1/encoder/mv_prec.h
+++ b/third_party/libaom/source/libaom/av1/encoder/mv_prec.h
@@ -21,8 +21,8 @@
void av1_collect_mv_stats(AV1_COMP *cpi, int current_q);
static AOM_INLINE int av1_frame_allows_smart_mv(const AV1_COMP *cpi) {
- const int gf_group_index = cpi->gf_group.index;
- const int gf_update_type = cpi->gf_group.update_type[gf_group_index];
+ const int gf_group_index = cpi->gf_frame_index;
+ const int gf_update_type = cpi->ppi->gf_group.update_type[gf_group_index];
return !frame_is_intra_only(&cpi->common) &&
!(gf_update_type == INTNL_OVERLAY_UPDATE ||
gf_update_type == OVERLAY_UPDATE);
diff --git a/third_party/libaom/source/libaom/av1/encoder/nonrd_pickmode.c b/third_party/libaom/source/libaom/av1/encoder/nonrd_pickmode.c
index 279fd922dd..088135a2dd 100644
--- a/third_party/libaom/source/libaom/av1/encoder/nonrd_pickmode.c
+++ b/third_party/libaom/source/libaom/av1/encoder/nonrd_pickmode.c
@@ -353,6 +353,8 @@ static INLINE void find_predictors(AV1_COMP *cpi, MACROBLOCK *x,
(void)tile_data;
x->pred_mv_sad[ref_frame] = INT_MAX;
+ x->pred_mv0_sad[ref_frame] = INT_MAX;
+ x->pred_mv1_sad[ref_frame] = INT_MAX;
frame_mv[NEWMV][ref_frame].as_int = INVALID_MV;
// TODO(kyslov) this needs various further optimizations. to be continued..
assert(yv12 != NULL);
@@ -518,7 +520,7 @@ static TX_SIZE calculate_tx_size(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
TX_SIZE tx_size;
const TxfmSearchParams *txfm_params = &x->txfm_search_params;
if (txfm_params->tx_mode_search_type == TX_MODE_SELECT) {
- if (sse > (var << 2))
+ if (sse > (var << 1))
tx_size =
AOMMIN(max_txsize_lookup[bsize],
tx_mode_to_biggest_tx_size[txfm_params->tx_mode_search_type]);
@@ -729,9 +731,9 @@ static void model_skip_for_sb_y_large(AV1_COMP *cpi, BLOCK_SIZE bsize,
(puv->dequant_QTX[1] * puv->dequant_QTX[1]) >> 3;
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, i,
i);
- var_uv[j] = cpi->fn_ptr[uv_bsize].vf(puv->src.buf, puv->src.stride,
- puvd->dst.buf, puvd->dst.stride,
- &sse_uv[j]);
+ var_uv[j] = cpi->ppi->fn_ptr[uv_bsize].vf(
+ puv->src.buf, puv->src.stride, puvd->dst.buf, puvd->dst.stride,
+ &sse_uv[j]);
if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) &&
(sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j]))
skip_uv[j] = 1;
@@ -776,8 +778,8 @@ static void model_rd_for_sb_y(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
int rate;
int64_t dist;
- unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride,
- pd->dst.buf, pd->dst.stride, &sse);
+ unsigned int var = cpi->ppi->fn_ptr[bsize].vf(
+ p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse);
xd->mi[0]->tx_size = calculate_tx_size(cpi, bsize, x, var, sse);
if (calculate_rd) {
@@ -1171,8 +1173,8 @@ static void model_rd_for_sb_uv(AV1_COMP *cpi, BLOCK_SIZE plane_bsize,
unsigned int var;
if (!x->color_sensitivity[i - 1]) continue;
- var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
- pd->dst.stride, &sse);
+ var = cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
+ pd->dst.stride, &sse);
assert(sse >= var);
tot_sse += sse;
@@ -1251,12 +1253,12 @@ static void estimate_block_intra(int plane, int block, int row, int col,
(void)block;
- p->src.buf = &src_buf_base[4 * (row * src_stride + col)];
- pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)];
-
av1_predict_intra_block_facade(cm, xd, plane, col, row, tx_size);
av1_invalid_rd_stats(&this_rdc);
+ p->src.buf = &src_buf_base[4 * (row * src_stride + col)];
+ pd->dst.buf = &dst_buf_base[4 * (row * dst_stride + col)];
+
if (plane == 0) {
block_yrd(cpi, x, 0, 0, &this_rdc, &args->skippable, bsize_tx,
AOMMIN(tx_size, TX_16X16));
@@ -1562,7 +1564,7 @@ static void search_filter_ref(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *this_rdc,
else
model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rd_stats[i], 1);
pf_rd_stats[i].rate += av1_get_switchable_rate(
- x, xd, cm->features.interp_filter, cm->seq_params.enable_dual_filter);
+ x, xd, cm->features.interp_filter, cm->seq_params->enable_dual_filter);
cost = RDCOST(x->rdmult, pf_rd_stats[i].rate, pf_rd_stats[i].dist);
pf_tx_size[i] = mi->tx_size;
if (cost < best_cost) {
@@ -1618,6 +1620,7 @@ typedef struct _mode_search_stat {
static void compute_intra_yprediction(const AV1_COMMON *cm,
PREDICTION_MODE mode, BLOCK_SIZE bsize,
MACROBLOCK *x, MACROBLOCKD *xd) {
+ const SequenceHeader *seq_params = cm->seq_params;
struct macroblockd_plane *const pd = &xd->plane[0];
struct macroblock_plane *const p = &x->plane[0];
uint8_t *const src_buf_base = p->src.buf;
@@ -1644,10 +1647,11 @@ static void compute_intra_yprediction(const AV1_COMMON *cm,
for (col = 0; col < max_blocks_wide; col += (1 << tx_size)) {
p->src.buf = &src_buf_base[4 * (row * (int64_t)src_stride + col)];
pd->dst.buf = &dst_buf_base[4 * (row * (int64_t)dst_stride + col)];
- av1_predict_intra_block(cm, xd, block_size_wide[bsize],
- block_size_high[bsize], tx_size, mode, 0, 0,
- FILTER_INTRA_MODES, pd->dst.buf, dst_stride,
- pd->dst.buf, dst_stride, 0, 0, plane);
+ av1_predict_intra_block(
+ xd, seq_params->sb_size, seq_params->enable_intra_edge_filter,
+ block_size_wide[bsize], block_size_high[bsize], tx_size, mode, 0, 0,
+ FILTER_INTRA_MODES, pd->dst.buf, dst_stride, pd->dst.buf, dst_stride,
+ 0, 0, plane);
}
}
p->src.buf = src_buf_base;
@@ -1671,7 +1675,9 @@ void av1_nonrd_pick_intra_mode(AV1_COMP *cpi, MACROBLOCK *x, RD_STATS *rd_cost,
const MB_MODE_INFO *left_mi = xd->left_mbmi;
const PREDICTION_MODE A = av1_above_block_mode(above_mi);
const PREDICTION_MODE L = av1_left_block_mode(left_mi);
- bmode_costs = x->mode_costs.y_mode_costs[A][L];
+ const int above_ctx = intra_mode_context[A];
+ const int left_ctx = intra_mode_context[L];
+ bmode_costs = x->mode_costs.y_mode_costs[above_ctx][left_ctx];
av1_invalid_rd_stats(&best_rdc);
av1_invalid_rd_stats(&this_rdc);
@@ -1734,10 +1740,11 @@ static AOM_INLINE void get_ref_frame_use_mask(AV1_COMP *cpi, MACROBLOCK *x,
int *force_skip_low_temp_var) {
AV1_COMMON *const cm = &cpi->common;
const struct segmentation *const seg = &cm->seg;
- const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64);
+ const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
// For SVC the usage of alt_ref is determined by the ref_frame_flags.
- int use_alt_ref_frame = cpi->use_svc || cpi->sf.rt_sf.use_nonrd_altref_frame;
+ int use_alt_ref_frame =
+ cpi->ppi->use_svc || cpi->sf.rt_sf.use_nonrd_altref_frame;
int use_golden_ref_frame = 1;
use_ref_frame[LAST_FRAME] = 1; // we never skip LAST
@@ -1832,7 +1839,7 @@ static void estimate_intra_mode(
int intra_cost_penalty = av1_get_intra_cost_penalty(
quant_params->base_qindex, quant_params->y_dc_delta_q,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
int64_t inter_mode_thresh = RDCOST(x->rdmult, intra_cost_penalty, 0);
int perform_intra_pred = cpi->sf.rt_sf.check_intra_pred_nonrd;
// For spatial enhancemanent layer: turn off intra prediction if the
@@ -1851,8 +1858,8 @@ static void estimate_intra_mode(
// Adjust thresholds to make intra mode likely tested if the other
// references (golden, alt) are skipped/not checked. For now always
// adjust for svc mode.
- if (cpi->use_svc || (cpi->sf.rt_sf.use_nonrd_altref_frame == 0 &&
- cpi->sf.rt_sf.nonrd_prune_ref_frame_search > 0)) {
+ if (cpi->ppi->use_svc || (cpi->sf.rt_sf.use_nonrd_altref_frame == 0 &&
+ cpi->sf.rt_sf.nonrd_prune_ref_frame_search > 0)) {
spatial_var_thresh = 150;
motion_thresh = 0;
}
@@ -2063,6 +2070,40 @@ static AOM_INLINE int skip_mode_by_bsize_and_ref_frame(
return 0;
}
+void set_color_sensitivity(AV1_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd,
+ BLOCK_SIZE bsize, int y_sad,
+ unsigned int source_variance) {
+ const int factor = (bsize >= BLOCK_32X32) ? 2 : 3;
+ NOISE_LEVEL noise_level = kLow;
+ int norm_sad =
+ y_sad >> (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
+ // If the spatial source variance is high and the normalized y_sad
+ // is low, then y-channel is likely good for mode estimation, so keep
+ // color_sensitivity off. For low noise content for now, since there is
+ // some bdrate regression for noisy color clip.
+ if (cpi->noise_estimate.enabled)
+ noise_level = av1_noise_estimate_extract_level(&cpi->noise_estimate);
+ if (noise_level == kLow && source_variance > 1000 && norm_sad < 50) {
+ x->color_sensitivity[0] = 0;
+ x->color_sensitivity[1] = 0;
+ return;
+ }
+ for (int i = 1; i <= 2; ++i) {
+ if (x->color_sensitivity[i - 1] == 2) {
+ struct macroblock_plane *const p = &x->plane[i];
+ struct macroblockd_plane *const pd = &xd->plane[i];
+ const BLOCK_SIZE bs =
+ get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
+ const int uv_sad = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
+ pd->dst.buf, pd->dst.stride);
+ const int norm_uv_sad =
+ uv_sad >> (b_width_log2_lookup[bs] + b_height_log2_lookup[bs]);
+ x->color_sensitivity[i - 1] =
+ uv_sad > (factor * (y_sad >> 3)) && norm_uv_sad > 40;
+ }
+ }
+}
+
void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
MACROBLOCK *x, RD_STATS *rd_cost,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
@@ -2104,7 +2145,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 128 * 128]);
PRED_BUFFER *this_mode_pred = NULL;
const int reuse_inter_pred = cpi->sf.rt_sf.reuse_inter_pred_nonrd &&
- cm->seq_params.bit_depth == AOM_BITS_8;
+ cm->seq_params->bit_depth == AOM_BITS_8;
const int bh = block_size_high[bsize];
const int bw = block_size_wide[bsize];
@@ -2135,7 +2176,8 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
cpi->common.height != cpi->resize_pending_params.height));
#endif
-
+ x->color_sensitivity[0] = x->color_sensitivity_sb[0];
+ x->color_sensitivity[1] = x->color_sensitivity_sb[1];
init_best_pickmode(&best_pickmode);
const ModeCosts *mode_costs = &x->mode_costs;
@@ -2170,7 +2212,8 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
#if CONFIG_AV1_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
- // if (cpi->use_svc) denoise_svc_pickmode = av1_denoise_svc_non_key(cpi);
+ // if (cpi->ppi->use_svc) denoise_svc_pickmode =
+ // av1_denoise_svc_non_key(cpi);
if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode)
av1_denoiser_reset_frame_stats(ctx);
}
@@ -2183,7 +2226,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
// to source, so use subpel motion vector to compensate. The nonzero motion
// is half pixel shifted to left and top, so (-4, -4). This has more effect
// on higher resolutins, so condition it on that for now.
- if (cpi->use_svc && svc->spatial_layer_id > 0 &&
+ if (cpi->ppi->use_svc && svc->spatial_layer_id > 0 &&
svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 &&
cm->width * cm->height > 640 * 480) {
svc_mv_col = -4;
@@ -2210,7 +2253,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
const int use_model_yrd_large =
cpi->oxcf.rc_cfg.mode == AOM_CBR && large_block &&
!cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) &&
- quant_params->base_qindex && cm->seq_params.bit_depth == 8;
+ quant_params->base_qindex && cm->seq_params->bit_depth == 8;
const int enable_filter_search =
is_filter_search_enabled(cpi, mi_row, mi_col, bsize);
@@ -2264,7 +2307,7 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
if (!use_ref_frame_mask[ref_frame]) continue;
force_mv_inter_layer = 0;
- if (cpi->use_svc && svc->spatial_layer_id > 0 &&
+ if (cpi->ppi->use_svc && svc->spatial_layer_id > 0 &&
((ref_frame == LAST_FRAME && svc->skip_mvsearch_last) ||
(ref_frame == GOLDEN_FRAME && svc->skip_mvsearch_gf))) {
// Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row),
@@ -2306,6 +2349,10 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
if ((int64_t)(x->pred_mv_sad[ref_frame]) > thresh_sad_pred) continue;
}
}
+ // Check for skipping NEARMV based on pred_mv_sad.
+ if (this_mode == NEARMV && x->pred_mv1_sad[ref_frame] != INT_MAX &&
+ x->pred_mv1_sad[ref_frame] > (x->pred_mv0_sad[ref_frame] << 1))
+ continue;
if (skip_mode_by_threshold(
this_mode, ref_frame, frame_mv[this_mode][ref_frame],
@@ -2357,6 +2404,22 @@ void av1_nonrd_pick_inter_mode_sb(AV1_COMP *cpi, TileDataEnc *tile_data,
#if COLLECT_PICK_MODE_STAT
ms_stat.num_nonskipped_searches[bsize][this_mode]++;
#endif
+
+ if (idx == 0) {
+ // Set color sensitivity on first tested mode only.
+ // Use y-sad already computed in find_predictors: take the sad with motion
+ // vector closest to 0; the uv-sad computed below in set_color_sensitivity
+ // is for zeromv.
+ int y_sad = x->pred_mv0_sad[LAST_FRAME];
+ if (x->pred_mv1_sad[LAST_FRAME] != INT_MAX &&
+ (abs(frame_mv[NEARMV][LAST_FRAME].as_mv.col) +
+ abs(frame_mv[NEARMV][LAST_FRAME].as_mv.row)) <
+ (abs(frame_mv[NEARESTMV][LAST_FRAME].as_mv.col) +
+ abs(frame_mv[NEARESTMV][LAST_FRAME].as_mv.row)))
+ y_sad = x->pred_mv1_sad[LAST_FRAME];
+ set_color_sensitivity(cpi, x, xd, bsize, y_sad, x->source_variance);
+ }
+
if (enable_filter_search && !force_mv_inter_layer &&
((mi->mv[0].as_mv.row & 0x07) || (mi->mv[0].as_mv.col & 0x07)) &&
(ref_frame == LAST_FRAME || !x->nonrd_prune_ref_frame_search)) {
diff --git a/third_party/libaom/source/libaom/av1/encoder/optical_flow.c b/third_party/libaom/source/libaom/av1/encoder/optical_flow.c
index 82ae9c5774..d2f03ed641 100644
--- a/third_party/libaom/source/libaom/av1/encoder/optical_flow.c
+++ b/third_party/libaom/source/libaom/av1/encoder/optical_flow.c
@@ -819,7 +819,7 @@ static void solve_horn_schunck(const double *ix, const double *iy,
}
av1_init_sparse_mtx(row_pos, col_pos, values, c, 2 * width * height,
2 * width * height, &A);
- // substract init mv part from b
+ // subtract init mv part from b
av1_mtx_vect_multi_left(&A, mv_init_vec, temp_b, 2 * width * height);
for (int i = 0; i < 2 * width * height; i++) {
b[i] = -temp_b[i];
@@ -882,10 +882,11 @@ static void solve_horn_schunck(const double *ix, const double *iy,
}
// Calculate optical flow from from_frame to to_frame using the H-S method.
-void horn_schunck(const YV12_BUFFER_CONFIG *from_frame,
- const YV12_BUFFER_CONFIG *to_frame, const int level,
- const int mv_stride, const int mv_height, const int mv_width,
- const OPFL_PARAMS *opfl_params, LOCALMV *mvs) {
+static void horn_schunck(const YV12_BUFFER_CONFIG *from_frame,
+ const YV12_BUFFER_CONFIG *to_frame, const int level,
+ const int mv_stride, const int mv_height,
+ const int mv_width, const OPFL_PARAMS *opfl_params,
+ LOCALMV *mvs) {
// mvs are always on level 0, here we define two new mv arrays that is of size
// of this level.
const int fw = from_frame->y_crop_width;
diff --git a/third_party/libaom/source/libaom/av1/encoder/palette.c b/third_party/libaom/source/libaom/av1/encoder/palette.c
index fd579b7f7f..fbc16ca742 100644
--- a/third_party/libaom/source/libaom/av1/encoder/palette.c
+++ b/third_party/libaom/source/libaom/av1/encoder/palette.c
@@ -218,12 +218,12 @@ static AOM_INLINE void palette_rd_y(
const AV1_COMP *const cpi, MACROBLOCK *x, MB_MODE_INFO *mbmi,
BLOCK_SIZE bsize, int dc_mode_cost, const int *data, int *centroids, int n,
uint16_t *color_cache, int n_cache, MB_MODE_INFO *best_mbmi,
- uint8_t *best_palette_color_map, int64_t *best_rd, int64_t *best_model_rd,
- int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
- int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *blk_skip,
- uint8_t *tx_type_map, int *beat_best_palette_rd) {
+ uint8_t *best_palette_color_map, int64_t *best_rd, int *rate,
+ int *rate_tokenonly, int64_t *distortion, int *skippable, int *beat_best_rd,
+ PICK_MODE_CONTEXT *ctx, uint8_t *blk_skip, uint8_t *tx_type_map,
+ int *beat_best_palette_rd) {
optimize_palette_colors(color_cache, n_cache, n, 1, centroids,
- cpi->common.seq_params.bit_depth);
+ cpi->common.seq_params->bit_depth);
const int num_unique_colors = av1_remove_duplicates(centroids, n);
if (num_unique_colors < PALETTE_MIN_SIZE) {
// Too few unique colors to create a palette. And DC_PRED will work
@@ -231,10 +231,10 @@ static AOM_INLINE void palette_rd_y(
return;
}
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
- if (cpi->common.seq_params.use_highbitdepth) {
+ if (cpi->common.seq_params->use_highbitdepth) {
for (int i = 0; i < num_unique_colors; ++i) {
pmi->palette_colors[i] = clip_pixel_highbd(
- (int)centroids[i], cpi->common.seq_params.bit_depth);
+ (int)centroids[i], cpi->common.seq_params->bit_depth);
}
} else {
for (int i = 0; i < num_unique_colors; ++i) {
@@ -251,10 +251,6 @@ static AOM_INLINE void palette_rd_y(
1);
extend_palette_color_map(color_map, cols, rows, block_width, block_height);
- if (model_intra_yrd_and_prune(cpi, x, bsize, best_model_rd)) {
- return;
- }
-
RD_STATS tokenonly_rd_stats;
av1_pick_uniform_tx_size_type_yrd(cpi, x, &tokenonly_rd_stats, bsize,
*best_rd);
@@ -304,10 +300,9 @@ static AOM_INLINE int perform_top_color_palette_search(
BLOCK_SIZE bsize, int dc_mode_cost, const int *data, int *top_colors,
int start_n, int end_n, int step_size, int *last_n_searched,
uint16_t *color_cache, int n_cache, MB_MODE_INFO *best_mbmi,
- uint8_t *best_palette_color_map, int64_t *best_rd, int64_t *best_model_rd,
- int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
- int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip,
- uint8_t *tx_type_map) {
+ uint8_t *best_palette_color_map, int64_t *best_rd, int *rate,
+ int *rate_tokenonly, int64_t *distortion, int *skippable, int *beat_best_rd,
+ PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip, uint8_t *tx_type_map) {
int centroids[PALETTE_MAX_SIZE];
int n = start_n;
int top_color_winner = end_n;
@@ -320,8 +315,8 @@ static AOM_INLINE int perform_top_color_palette_search(
memcpy(centroids, top_colors, n * sizeof(top_colors[0]));
palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n,
color_cache, n_cache, best_mbmi, best_palette_color_map,
- best_rd, best_model_rd, rate, rate_tokenonly, distortion,
- skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
+ best_rd, rate, rate_tokenonly, distortion, skippable,
+ beat_best_rd, ctx, best_blk_skip, tx_type_map,
&beat_best_palette_rd);
*last_n_searched = n;
if (beat_best_palette_rd) {
@@ -345,10 +340,9 @@ static AOM_INLINE int perform_k_means_palette_search(
int upper_bound, int start_n, int end_n, int step_size,
int *last_n_searched, uint16_t *color_cache, int n_cache,
MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
- int64_t *best_model_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
- int *skippable, int *beat_best_rd, PICK_MODE_CONTEXT *ctx,
- uint8_t *best_blk_skip, uint8_t *tx_type_map, uint8_t *color_map,
- int data_points) {
+ int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
+ int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip,
+ uint8_t *tx_type_map, uint8_t *color_map, int data_points) {
int centroids[PALETTE_MAX_SIZE];
const int max_itr = 50;
int n = start_n;
@@ -366,8 +360,8 @@ static AOM_INLINE int perform_k_means_palette_search(
av1_k_means(data, centroids, color_map, data_points, n, 1, max_itr);
palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, n,
color_cache, n_cache, best_mbmi, best_palette_color_map,
- best_rd, best_model_rd, rate, rate_tokenonly, distortion,
- skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
+ best_rd, rate, rate_tokenonly, distortion, skippable,
+ beat_best_rd, ctx, best_blk_skip, tx_type_map,
&beat_best_palette_rd);
*last_n_searched = n;
if (beat_best_palette_rd) {
@@ -434,9 +428,9 @@ static AOM_INLINE void fill_data_and_get_bounds(
void av1_rd_pick_palette_intra_sby(
const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int dc_mode_cost,
MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map, int64_t *best_rd,
- int64_t *best_model_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
- int *skippable, int *beat_best_rd, PICK_MODE_CONTEXT *ctx,
- uint8_t *best_blk_skip, uint8_t *tx_type_map) {
+ int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable,
+ int *beat_best_rd, PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip,
+ uint8_t *tx_type_map) {
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
assert(!is_inter_block(mbmi));
@@ -450,7 +444,7 @@ void av1_rd_pick_palette_intra_sby(
int block_width, block_height, rows, cols;
av1_get_block_dimensions(bsize, 0, xd, &block_width, &block_height, &rows,
&cols);
- const SequenceHeader *const seq_params = &cpi->common.seq_params;
+ const SequenceHeader *const seq_params = cpi->common.seq_params;
const int is_hbd = seq_params->use_highbitdepth;
const int bit_depth = seq_params->bit_depth;
int unused;
@@ -532,8 +526,8 @@ void av1_rd_pick_palette_intra_sby(
const int top_color_winner = perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n, max_n + 1,
step_size, &unused, color_cache, n_cache, best_mbmi,
- best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly,
- distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
+ best_palette_color_map, best_rd, rate, rate_tokenonly, distortion,
+ skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
// Evaluate neighbors for the winner color (if winner is found) in the
// above coarse search for dominant colors
if (top_color_winner <= max_n) {
@@ -544,18 +538,18 @@ void av1_rd_pick_palette_intra_sby(
perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, stage2_min_n,
stage2_max_n + 1, stage2_step_size, &unused, color_cache, n_cache,
- best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate,
- rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
- best_blk_skip, tx_type_map);
+ best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
+ distortion, skippable, beat_best_rd, ctx, best_blk_skip,
+ tx_type_map);
}
// K-means clustering.
// Perform k-means coarse palette search to find the winner candidate
const int k_means_winner = perform_k_means_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
min_n, max_n + 1, step_size, &unused, color_cache, n_cache, best_mbmi,
- best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly,
- distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
- color_map, rows * cols);
+ best_palette_color_map, best_rd, rate, rate_tokenonly, distortion,
+ skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map,
+ rows * cols);
// Evaluate neighbors for the winner color (if winner is found) in the
// above coarse search for k-means
if (k_means_winner <= max_n) {
@@ -567,9 +561,8 @@ void av1_rd_pick_palette_intra_sby(
cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
start_n_stage2, end_n_stage2 + 1, step_size_stage2, &unused,
color_cache, n_cache, best_mbmi, best_palette_color_map, best_rd,
- best_model_rd, rate, rate_tokenonly, distortion, skippable,
- beat_best_rd, ctx, best_blk_skip, tx_type_map, color_map,
- rows * cols);
+ rate, rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
+ best_blk_skip, tx_type_map, color_map, rows * cols);
}
} else {
const int max_n = AOMMIN(colors, PALETTE_MAX_SIZE),
@@ -579,17 +572,16 @@ void av1_rd_pick_palette_intra_sby(
perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, max_n, min_n - 1,
-1, &last_n_searched, color_cache, n_cache, best_mbmi,
- best_palette_color_map, best_rd, best_model_rd, rate, rate_tokenonly,
- distortion, skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
+ best_palette_color_map, best_rd, rate, rate_tokenonly, distortion,
+ skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
if (last_n_searched > min_n) {
// Search in ascending order until we get to the previous best
perform_top_color_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, top_colors, min_n,
last_n_searched, 1, &unused, color_cache, n_cache, best_mbmi,
- best_palette_color_map, best_rd, best_model_rd, rate,
- rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
- best_blk_skip, tx_type_map);
+ best_palette_color_map, best_rd, rate, rate_tokenonly, distortion,
+ skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map);
}
// K-means clustering.
if (colors == PALETTE_MIN_SIZE) {
@@ -599,26 +591,25 @@ void av1_rd_pick_palette_intra_sby(
centroids[1] = upper_bound;
palette_rd_y(cpi, x, mbmi, bsize, dc_mode_cost, data, centroids, colors,
color_cache, n_cache, best_mbmi, best_palette_color_map,
- best_rd, best_model_rd, rate, rate_tokenonly, distortion,
- skippable, beat_best_rd, ctx, best_blk_skip, tx_type_map,
- NULL);
+ best_rd, rate, rate_tokenonly, distortion, skippable,
+ beat_best_rd, ctx, best_blk_skip, tx_type_map, NULL);
} else {
// Perform k-means palette search in descending order
last_n_searched = max_n;
perform_k_means_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
max_n, min_n - 1, -1, &last_n_searched, color_cache, n_cache,
- best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate,
- rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
- best_blk_skip, tx_type_map, color_map, rows * cols);
+ best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
+ distortion, skippable, beat_best_rd, ctx, best_blk_skip,
+ tx_type_map, color_map, rows * cols);
if (last_n_searched > min_n) {
// Search in ascending order until we get to the previous best
perform_k_means_palette_search(
cpi, x, mbmi, bsize, dc_mode_cost, data, lower_bound, upper_bound,
min_n, last_n_searched, 1, &unused, color_cache, n_cache,
- best_mbmi, best_palette_color_map, best_rd, best_model_rd, rate,
- rate_tokenonly, distortion, skippable, beat_best_rd, ctx,
- best_blk_skip, tx_type_map, color_map, rows * cols);
+ best_mbmi, best_palette_color_map, best_rd, rate, rate_tokenonly,
+ distortion, skippable, beat_best_rd, ctx, best_blk_skip,
+ tx_type_map, color_map, rows * cols);
}
}
}
@@ -645,7 +636,7 @@ void av1_rd_pick_palette_intra_sbuv(const AV1_COMP *cpi, MACROBLOCK *x,
mbmi->bsize));
PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
const BLOCK_SIZE bsize = mbmi->bsize;
- const SequenceHeader *const seq_params = &cpi->common.seq_params;
+ const SequenceHeader *const seq_params = cpi->common.seq_params;
int this_rate;
int64_t this_rd;
int colors_u, colors_v, colors;
@@ -737,7 +728,7 @@ void av1_rd_pick_palette_intra_sbuv(const AV1_COMP *cpi, MACROBLOCK *x,
}
av1_k_means(data, centroids, color_map, rows * cols, n, 2, max_itr);
optimize_palette_colors(color_cache, n_cache, n, 2, centroids,
- cpi->common.seq_params.bit_depth);
+ cpi->common.seq_params->bit_depth);
// Sort the U channel colors in ascending order.
for (i = 0; i < 2 * (n - 1); i += 2) {
int min_idx = i;
@@ -811,7 +802,7 @@ void av1_restore_uv_color_map(const AV1_COMP *cpi, MACROBLOCK *x) {
for (r = 0; r < rows; ++r) {
for (c = 0; c < cols; ++c) {
- if (cpi->common.seq_params.use_highbitdepth) {
+ if (cpi->common.seq_params->use_highbitdepth) {
data[(r * cols + c) * 2] = src_u16[r * src_stride + c];
data[(r * cols + c) * 2 + 1] = src_v16[r * src_stride + c];
} else {
diff --git a/third_party/libaom/source/libaom/av1/encoder/palette.h b/third_party/libaom/source/libaom/av1/encoder/palette.h
index 85af473892..7d9a72f61d 100644
--- a/third_party/libaom/source/libaom/av1/encoder/palette.h
+++ b/third_party/libaom/source/libaom/av1/encoder/palette.h
@@ -185,10 +185,9 @@ int av1_palette_color_cost_uv(const PALETTE_MODE_INFO *const pmi,
void av1_rd_pick_palette_intra_sby(
const struct AV1_COMP *cpi, struct macroblock *x, BLOCK_SIZE bsize,
int dc_mode_cost, MB_MODE_INFO *best_mbmi, uint8_t *best_palette_color_map,
- int64_t *best_rd, int64_t *best_model_rd, int *rate, int *rate_tokenonly,
- int64_t *distortion, int *skippable, int *beat_best_rd,
- struct PICK_MODE_CONTEXT *ctx, uint8_t *best_blk_skip,
- uint8_t *tx_type_map);
+ int64_t *best_rd, int *rate, int *rate_tokenonly, int64_t *distortion,
+ int *skippable, int *beat_best_rd, struct PICK_MODE_CONTEXT *ctx,
+ uint8_t *best_blk_skip, uint8_t *tx_type_map);
/*!\brief Search for the best palette in the chroma plane.
*
diff --git a/third_party/libaom/source/libaom/av1/encoder/partition_search.c b/third_party/libaom/source/libaom/av1/encoder/partition_search.c
index 5d54a80b36..c5bfaf684f 100644
--- a/third_party/libaom/source/libaom/av1/encoder/partition_search.c
+++ b/third_party/libaom/source/libaom/av1/encoder/partition_search.c
@@ -25,6 +25,7 @@
#include "av1/encoder/encodemv.h"
#include "av1/encoder/motion_search_facade.h"
#include "av1/encoder/partition_search.h"
+#include "av1/encoder/partition_strategy.h"
#include "av1/encoder/reconinter_enc.h"
#include "av1/encoder/tokenize.h"
#include "av1/encoder/var_based_part.h"
@@ -34,6 +35,48 @@
#include "av1/encoder/tune_vmaf.h"
#endif
+void av1_reset_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
+ part_sf->partition_search_type = SEARCH_PARTITION;
+ part_sf->less_rectangular_check_level = 0;
+ part_sf->use_square_partition_only_threshold = BLOCK_128X128;
+ part_sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
+ part_sf->default_max_partition_size = BLOCK_LARGEST;
+ part_sf->default_min_partition_size = BLOCK_4X4;
+ part_sf->adjust_var_based_rd_partitioning = 0;
+ part_sf->allow_partition_search_skip = 0;
+ part_sf->max_intra_bsize = BLOCK_LARGEST;
+ // This setting only takes effect when partition_search_type is set
+ // to FIXED_PARTITION.
+ part_sf->fixed_partition_size = BLOCK_16X16;
+ // Recode loop tolerance %.
+ part_sf->partition_search_breakout_dist_thr = 0;
+ part_sf->partition_search_breakout_rate_thr = 0;
+ part_sf->prune_ext_partition_types_search_level = 0;
+ part_sf->prune_part4_search = 0;
+ part_sf->ml_prune_partition = 0;
+ part_sf->ml_early_term_after_part_split_level = 0;
+ for (int i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
+ part_sf->ml_partition_search_breakout_thresh[i] =
+ -1; // -1 means not enabled.
+ }
+ part_sf->simple_motion_search_prune_agg = 0;
+ part_sf->simple_motion_search_split = 0;
+ part_sf->simple_motion_search_prune_rect = 0;
+ part_sf->simple_motion_search_early_term_none = 0;
+ part_sf->simple_motion_search_reduce_search_steps = 0;
+ part_sf->intra_cnn_split = 0;
+ part_sf->ext_partition_eval_thresh = BLOCK_8X8;
+ part_sf->prune_ext_part_using_split_info = 0;
+ part_sf->prune_rectangular_split_based_on_qidx = 0;
+ part_sf->early_term_after_none_split = 0;
+ part_sf->ml_predict_breakout_level = 0;
+ part_sf->prune_sub_8x8_partition_level = 0;
+ part_sf->simple_motion_search_rect_split = 0;
+ part_sf->reuse_prev_rd_results_for_part_ab = 0;
+ part_sf->reuse_best_prediction_for_part_ab = 0;
+ part_sf->use_best_rd_for_pruning = 0;
+}
+
static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
FRAME_COUNTS *counts, TX_SIZE tx_size, int depth,
int blk_row, int blk_col,
@@ -151,11 +194,14 @@ static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
const int bsw = tx_size_wide_unit[sub_txs];
const int bsh = tx_size_high_unit[sub_txs];
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- const int offsetr = blk_row + row;
+ const int row_end =
+ AOMMIN(tx_size_high_unit[tx_size], max_blocks_high - blk_row);
+ const int col_end =
+ AOMMIN(tx_size_wide_unit[tx_size], max_blocks_wide - blk_col);
+ for (int row = 0; row < row_end; row += bsh) {
+ const int offsetr = blk_row + row;
+ for (int col = 0; col < col_end; col += bsw) {
const int offsetc = blk_col + col;
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
set_txfm_context(xd, sub_txs, offsetr, offsetc);
}
}
@@ -281,7 +327,7 @@ static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
xd->block_ref_scale_factors[ref], num_planes);
}
const int start_plane = (cpi->sf.rt_sf.reuse_inter_pred_nonrd &&
- cm->seq_params.bit_depth == AOM_BITS_8)
+ cm->seq_params->bit_depth == AOM_BITS_8)
? 1
: 0;
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize,
@@ -395,8 +441,8 @@ static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
if (!dry_run) {
if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 &&
cpi->sf.rt_sf.use_temporal_noise_estimate &&
- (!cpi->use_svc ||
- (cpi->use_svc &&
+ (!cpi->ppi->use_svc ||
+ (cpi->ppi->use_svc &&
!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)))
update_zeromv_cnt(cpi, mbmi, mi_row, mi_col, bsize);
@@ -590,7 +636,7 @@ static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
RD_STATS *rd_cost, PARTITION_TYPE partition,
BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
RD_STATS best_rd) {
- if (best_rd.rdcost < 0) {
+ if (cpi->sf.part_sf.use_best_rd_for_pruning && best_rd.rdcost < 0) {
ctx->rd_stats.rdcost = INT64_MAX;
ctx->rd_stats.skip_txfm = 0;
av1_invalid_rd_stats(rd_cost);
@@ -599,7 +645,8 @@ static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
av1_set_offsets(cpi, &tile_data->tile_info, x, mi_row, mi_col, bsize);
- if (ctx->rd_mode_is_ready) {
+ if (cpi->sf.part_sf.reuse_prev_rd_results_for_part_ab &&
+ ctx->rd_mode_is_ready) {
assert(ctx->mic.bsize == bsize);
assert(ctx->mic.partition == partition);
rd_cost->rate = ctx->rd_stats.rate;
@@ -672,6 +719,13 @@ static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
av1_set_error_per_bit(&x->errorperbit, x->rdmult);
av1_rd_cost_update(x->rdmult, &best_rd);
+ // If set best_rd.rdcost to INT64_MAX, the encoder will not use any previous
+ // rdcost information for the following mode search.
+ // Disabling the feature could get some coding gain, with encoder slowdown.
+ if (!cpi->sf.part_sf.use_best_rd_for_pruning) {
+ av1_invalid_rd_stats(&best_rd);
+ }
+
// Find best coding mode & reconstruct the MB so it is available
// as a predictor for MBs that follow in the SB
if (frame_is_intra_only(cm)) {
@@ -750,11 +804,11 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td) {
#if CONFIG_ENTROPY_STATS
// delta quant applies to both intra and inter
const int super_block_upper_left =
- ((xd->mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
- ((xd->mi_col & (cm->seq_params.mib_size - 1)) == 0);
+ ((xd->mi_row & (cm->seq_params->mib_size - 1)) == 0) &&
+ ((xd->mi_col & (cm->seq_params->mib_size - 1)) == 0);
const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
if (delta_q_info->delta_q_present_flag &&
- (bsize != cm->seq_params.sb_size || !mbmi->skip_txfm) &&
+ (bsize != cm->seq_params->sb_size || !mbmi->skip_txfm) &&
super_block_upper_left) {
const int dq = (mbmi->current_qindex - xd->current_base_qindex) /
delta_q_info->delta_q_res;
@@ -798,10 +852,16 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td) {
}
if (av1_allow_intrabc(cm)) {
- update_cdf(fc->intrabc_cdf, is_intrabc_block(mbmi), 2);
+ const int is_intrabc = is_intrabc_block(mbmi);
+ update_cdf(fc->intrabc_cdf, is_intrabc, 2);
#if CONFIG_ENTROPY_STATS
- ++td->counts->intrabc[is_intrabc_block(mbmi)];
+ ++td->counts->intrabc[is_intrabc];
#endif // CONFIG_ENTROPY_STATS
+ if (is_intrabc) {
+ const int_mv dv_ref = x->mbmi_ext_frame->ref_mv_stack[0].this_mv;
+ av1_update_mv_stats(&mbmi->mv[0].as_mv, &dv_ref.as_mv, &fc->ndvc,
+ MV_SUBPEL_NONE);
+ }
}
if (frame_is_intra_only(cm) || mbmi->skip_mode) return;
@@ -947,7 +1007,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td) {
}
}
- if (cm->seq_params.enable_interintra_compound &&
+ if (cm->seq_params->enable_interintra_compound &&
is_interintra_allowed(mbmi)) {
const int bsize_group = size_group_lookup[bsize];
if (mbmi->ref_frame[1] == INTRA_FRAME) {
@@ -1008,7 +1068,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td) {
mbmi->motion_mode == SIMPLE_TRANSLATION);
const int masked_compound_used = is_any_masked_compound_used(bsize) &&
- cm->seq_params.enable_masked_compound;
+ cm->seq_params->enable_masked_compound;
if (masked_compound_used) {
const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
#if CONFIG_ENTROPY_STATS
@@ -1053,7 +1113,7 @@ static void update_stats(const AV1_COMMON *const cm, ThreadData *td) {
if (inter_block && cm->features.interp_filter == SWITCHABLE &&
mbmi->motion_mode != WARPED_CAUSAL &&
!is_nontrans_global_motion(xd, mbmi)) {
- update_filter_type_cdf(xd, mbmi, cm->seq_params.enable_dual_filter);
+ update_filter_type_cdf(xd, mbmi, cm->seq_params->enable_dual_filter);
}
if (inter_block &&
!segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
@@ -1160,8 +1220,8 @@ static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
TileInfo *const tile = &tile_data->tile_info;
MACROBLOCK *const x = &td->mb;
MACROBLOCKD *xd = &x->e_mbd;
- const int subsampling_x = cm->seq_params.subsampling_x;
- const int subsampling_y = cm->seq_params.subsampling_y;
+ const int subsampling_x = cm->seq_params->subsampling_x;
+ const int subsampling_y = cm->seq_params->subsampling_y;
av1_set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
const int origin_mult = x->rdmult;
@@ -1174,9 +1234,9 @@ static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
set_cb_offsets(x->mbmi_ext_frame->cb_offset, x->cb_offset[PLANE_TYPE_Y],
x->cb_offset[PLANE_TYPE_UV]);
assert(x->cb_offset[PLANE_TYPE_Y] <
- (1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size]));
+ (1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]));
assert(x->cb_offset[PLANE_TYPE_UV] <
- ((1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size]) >>
+ ((1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]) >>
(subsampling_x + subsampling_y)));
}
@@ -1184,7 +1244,7 @@ static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
if (!dry_run) {
update_cb_offsets(x, bsize, subsampling_x, subsampling_y);
- if (bsize == cpi->common.seq_params.sb_size && mbmi->skip_txfm == 1 &&
+ if (bsize == cpi->common.seq_params->sb_size && mbmi->skip_txfm == 1 &&
cm->delta_q_info.delta_lf_present_flag) {
const int frame_lf_count =
av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
@@ -1202,11 +1262,11 @@ static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
// delta quant applies to both intra and inter
const int super_block_upper_left =
- ((mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
- ((mi_col & (cm->seq_params.mib_size - 1)) == 0);
+ ((mi_row & (cm->seq_params->mib_size - 1)) == 0) &&
+ ((mi_col & (cm->seq_params->mib_size - 1)) == 0);
const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
if (delta_q_info->delta_q_present_flag &&
- (bsize != cm->seq_params.sb_size || !mbmi->skip_txfm) &&
+ (bsize != cm->seq_params->sb_size || !mbmi->skip_txfm) &&
super_block_upper_left) {
xd->current_base_qindex = mbmi->current_qindex;
if (delta_q_info->delta_lf_present_flag) {
@@ -1753,11 +1813,11 @@ void av1_rd_use_partition(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
// We must have chosen a partitioning and encoding or we'll fail later on.
// No other opportunities for success.
- if (bsize == cm->seq_params.sb_size)
+ if (bsize == cm->seq_params->sb_size)
assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
if (do_recon) {
- if (bsize == cm->seq_params.sb_size) {
+ if (bsize == cm->seq_params->sb_size) {
// NOTE: To get estimate for rate due to the tokens, use:
// int rate_coeffs = 0;
// encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
@@ -1792,15 +1852,15 @@ static void encode_b_nonrd(const AV1_COMP *const cpi, TileDataEnc *tile_data,
// Nonrd pickmode does not currently support second/combined reference.
assert(!has_second_ref(mbmi));
av1_update_state(cpi, td, ctx, mi_row, mi_col, bsize, dry_run);
- const int subsampling_x = cpi->common.seq_params.subsampling_x;
- const int subsampling_y = cpi->common.seq_params.subsampling_y;
+ const int subsampling_x = cpi->common.seq_params->subsampling_x;
+ const int subsampling_y = cpi->common.seq_params->subsampling_y;
if (!dry_run) {
set_cb_offsets(x->mbmi_ext_frame->cb_offset, x->cb_offset[PLANE_TYPE_Y],
x->cb_offset[PLANE_TYPE_UV]);
assert(x->cb_offset[PLANE_TYPE_Y] <
- (1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size]));
+ (1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]));
assert(x->cb_offset[PLANE_TYPE_UV] <
- ((1 << num_pels_log2_lookup[cpi->common.seq_params.sb_size]) >>
+ ((1 << num_pels_log2_lookup[cpi->common.seq_params->sb_size]) >>
(subsampling_x + subsampling_y)));
}
encode_superblock(cpi, tile_data, td, tp, dry_run, bsize, rate);
@@ -1808,6 +1868,8 @@ static void encode_b_nonrd(const AV1_COMP *const cpi, TileDataEnc *tile_data,
update_cb_offsets(x, bsize, subsampling_x, subsampling_y);
if (tile_data->allow_update_cdf) update_stats(&cpi->common, td);
}
+ if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ && mbmi->skip_txfm)
+ av1_cyclic_reset_segment_skip(cpi, x, mi_row, mi_col, bsize);
// TODO(Ravi/Remya): Move this copy function to a better logical place
// This function will copy the best mode information from block
// level (x->mbmi_ext) to frame level (cpi->mbmi_ext_info.frame_base). This
@@ -1889,8 +1951,8 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
int i;
wait_for_top_right_sb(&cpi->mt_info.enc_row_mt, &tile_data->row_mt_sync,
- &tile_data->tile_info, cm->seq_params.sb_size,
- cm->seq_params.mib_size_log2, bsize, mi_row, mi_col);
+ &tile_data->tile_info, cm->seq_params->sb_size,
+ cm->seq_params->mib_size_log2, bsize, mi_row, mi_col);
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, rd_pick_sb_modes_time);
@@ -1947,6 +2009,30 @@ static void pick_sb_modes_nonrd(AV1_COMP *const cpi, TileDataEnc *tile_data,
end_timing(cpi, av1_rd_pick_inter_mode_sb_time);
#endif
}
+ if (cpi->sf.rt_sf.skip_cdef_sb) {
+ // Find the corresponding 64x64 block. It'll be the 128x128 block if that's
+ // the block size.
+ const int mi_row_sb = mi_row - mi_row % MI_SIZE_64X64;
+ const int mi_col_sb = mi_col - mi_col % MI_SIZE_64X64;
+ MB_MODE_INFO **mi_sb =
+ cm->mi_params.mi_grid_base +
+ get_mi_grid_idx(&cm->mi_params, mi_row_sb, mi_col_sb);
+ // Do not skip if intra or new mv is picked.
+ const int skip = mi_sb[0]->skip_cdef_curr_sb &&
+ !(mbmi->mode < INTRA_MODES || mbmi->mode == NEWMV);
+ // If 128x128 block is used, we need to set the flag for all 4 64x64 sub
+ // "blocks".
+ const int block64_in_sb = (bsize == BLOCK_128X128) ? 2 : 1;
+ for (int r = 0; r < block64_in_sb; ++r) {
+ for (int c = 0; c < block64_in_sb; ++c) {
+ const int idx_in_sb =
+ r * MI_SIZE_64X64 * cm->mi_params.mi_stride + c * MI_SIZE_64X64;
+ if (mi_sb[idx_in_sb]) mi_sb[idx_in_sb]->skip_cdef_curr_sb = skip;
+ }
+ }
+ // Store in the pickmode context.
+ ctx->mic.skip_cdef_curr_sb = mi_sb[0]->skip_cdef_curr_sb;
+ }
x->rdmult = orig_rdmult;
ctx->rd_stats.rate = rd_cost->rate;
ctx->rd_stats.dist = rd_cost->dist;
@@ -2301,15 +2387,15 @@ static bool rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
// Loop over sub-partitions in AB partition type.
for (int i = 0; i < SUB_PARTITIONS_AB; i++) {
if (mode_cache && mode_cache[i]) {
- x->use_intermode_cache = 1;
- x->intermode_cache = mode_cache[i];
+ x->use_mb_mode_cache = 1;
+ x->mb_mode_cache = mode_cache[i];
}
const int mode_search_success =
rd_try_subblock(cpi, td, tile_data, tp, i == SUB_PARTITIONS_AB - 1,
ab_mi_pos[i][0], ab_mi_pos[i][1], ab_subsize[i],
*best_rdc, &sum_rdc, partition, ctxs[i]);
- x->use_intermode_cache = 0;
- x->intermode_cache = NULL;
+ x->use_mb_mode_cache = 0;
+ x->mb_mode_cache = NULL;
if (!mode_search_success) {
return false;
}
@@ -2629,7 +2715,8 @@ static void rectangular_partition_search(
TokenExtra **tp, MACROBLOCK *x, PC_TREE *pc_tree,
RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
PartitionSearchState *part_search_state, RD_STATS *best_rdc,
- RD_RECT_PART_WIN_INFO *rect_part_win_info) {
+ RD_RECT_PART_WIN_INFO *rect_part_win_info, const RECT_PART_TYPE start_type,
+ const RECT_PART_TYPE end_type) {
const AV1_COMMON *const cm = &cpi->common;
PartitionBlkParams blk_params = part_search_state->part_blk_params;
RD_STATS *sum_rdc = &part_search_state->sum_rdc;
@@ -2663,7 +2750,7 @@ static void rectangular_partition_search(
};
// Loop over rectangular partition types.
- for (RECT_PART_TYPE i = HORZ; i < NUM_RECT_PARTS; i++) {
+ for (RECT_PART_TYPE i = start_type; i <= end_type; i++) {
assert(IMPLIES(!cpi->oxcf.part_cfg.enable_rect_partitions,
!part_search_state->partition_rect_allowed[i]));
@@ -2879,7 +2966,8 @@ static void ab_partitions_search(
TokenExtra **tp, MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *x_ctx,
PC_TREE *pc_tree, PartitionSearchState *part_search_state,
RD_STATS *best_rdc, RD_RECT_PART_WIN_INFO *rect_part_win_info,
- int pb_source_variance, int ext_partition_allowed) {
+ int pb_source_variance, int ext_partition_allowed,
+ const AB_PART_TYPE start_type, const AB_PART_TYPE end_type) {
PartitionBlkParams blk_params = part_search_state->part_blk_params;
const int mi_row = blk_params.mi_row;
const int mi_col = blk_params.mi_col;
@@ -2888,9 +2976,9 @@ static void ab_partitions_search(
int ab_partitions_allowed[NUM_AB_PARTS] = { 1, 1, 1, 1 };
// Prune AB partitions
av1_prune_ab_partitions(
- cpi, x, pc_tree, bsize, pb_source_variance, best_rdc->rdcost,
- part_search_state->rect_part_rd, part_search_state->split_rd,
- rect_part_win_info, ext_partition_allowed,
+ cpi, x, pc_tree, bsize, mi_row, mi_col, pb_source_variance,
+ best_rdc->rdcost, part_search_state->rect_part_rd,
+ part_search_state->split_rd, rect_part_win_info, ext_partition_allowed,
part_search_state->partition_rect_allowed[HORZ],
part_search_state->partition_rect_allowed[VERT],
&ab_partitions_allowed[HORZ_A], &ab_partitions_allowed[HORZ_B],
@@ -2946,7 +3034,7 @@ static void ab_partitions_search(
};
// Loop over AB partition types.
- for (AB_PART_TYPE ab_part_type = 0; ab_part_type < NUM_AB_PARTS;
+ for (AB_PART_TYPE ab_part_type = start_type; ab_part_type <= end_type;
ab_part_type++) {
const PARTITION_TYPE part_type = ab_part_type + PARTITION_HORZ_A;
@@ -2956,33 +3044,35 @@ static void ab_partitions_search(
continue;
blk_params.subsize = get_partition_subsize(bsize, part_type);
- for (int i = 0; i < SUB_PARTITIONS_AB; i++) {
- // Set AB partition context.
- cur_part_ctxs[ab_part_type][i] = av1_alloc_pmc(
- cpi, ab_subsize[ab_part_type][i], &td->shared_coeff_buf);
- // Set mode as not ready.
- cur_part_ctxs[ab_part_type][i]->rd_mode_is_ready = 0;
- }
+ if (cpi->sf.part_sf.reuse_prev_rd_results_for_part_ab) {
+ for (int i = 0; i < SUB_PARTITIONS_AB; i++) {
+ // Set AB partition context.
+ cur_part_ctxs[ab_part_type][i] = av1_alloc_pmc(
+ cpi, ab_subsize[ab_part_type][i], &td->shared_coeff_buf);
+ // Set mode as not ready.
+ cur_part_ctxs[ab_part_type][i]->rd_mode_is_ready = 0;
+ }
- // We can copy directly the mode search results if we have already searched
- // the current block and the contexts match.
- if (is_ctx_ready[ab_part_type][0]) {
- av1_copy_tree_context(cur_part_ctxs[ab_part_type][0],
- mode_srch_ctx[ab_part_type][0][0]);
- cur_part_ctxs[ab_part_type][0]->mic.partition = part_type;
- cur_part_ctxs[ab_part_type][0]->rd_mode_is_ready = 1;
- if (is_ctx_ready[ab_part_type][1]) {
- av1_copy_tree_context(cur_part_ctxs[ab_part_type][1],
- mode_srch_ctx[ab_part_type][1][0]);
- cur_part_ctxs[ab_part_type][1]->mic.partition = part_type;
- cur_part_ctxs[ab_part_type][1]->rd_mode_is_ready = 1;
+ // We can copy directly the mode search results if we have already
+ // searched the current block and the contexts match.
+ if (is_ctx_ready[ab_part_type][0]) {
+ av1_copy_tree_context(cur_part_ctxs[ab_part_type][0],
+ mode_srch_ctx[ab_part_type][0][0]);
+ cur_part_ctxs[ab_part_type][0]->mic.partition = part_type;
+ cur_part_ctxs[ab_part_type][0]->rd_mode_is_ready = 1;
+ if (is_ctx_ready[ab_part_type][1]) {
+ av1_copy_tree_context(cur_part_ctxs[ab_part_type][1],
+ mode_srch_ctx[ab_part_type][1][0]);
+ cur_part_ctxs[ab_part_type][1]->mic.partition = part_type;
+ cur_part_ctxs[ab_part_type][1]->rd_mode_is_ready = 1;
+ }
}
}
// Even if the contexts don't match, we can still speed up by reusing the
// previous prediction mode.
const MB_MODE_INFO *mode_cache[3] = { NULL, NULL, NULL };
- if (cpi->sf.inter_sf.reuse_best_prediction_for_part_ab) {
+ if (cpi->sf.part_sf.reuse_best_prediction_for_part_ab) {
set_mode_cache_for_partition_ab(mode_cache, pc_tree, ab_part_type);
}
@@ -3180,21 +3270,6 @@ static void prune_4_way_partition_search(
part4_search_allowed);
}
-// Set PARTITION_NONE allowed flag.
-static AOM_INLINE void set_part_none_allowed_flag(
- AV1_COMP *const cpi, PartitionSearchState *part_search_state) {
- PartitionBlkParams blk_params = part_search_state->part_blk_params;
- if ((blk_params.width <= blk_params.min_partition_size_1d) &&
- blk_params.has_rows && blk_params.has_cols)
- part_search_state->partition_none_allowed = 1;
- assert(part_search_state->terminate_partition_search == 0);
-
- // Set PARTITION_NONE for screen content.
- if (cpi->use_screen_content_tools)
- part_search_state->partition_none_allowed =
- blk_params.has_rows && blk_params.has_cols;
-}
-
// Set params needed for PARTITION_NONE search.
static void set_none_partition_params(const AV1_COMP *const cpi, ThreadData *td,
MACROBLOCK *x, PC_TREE *pc_tree,
@@ -3247,11 +3322,10 @@ static void prune_partitions_after_none(AV1_COMP *const cpi, MACROBLOCK *x,
bsize <= cpi->sf.part_sf.use_square_partition_only_threshold &&
bsize > BLOCK_4X4 && cpi->sf.part_sf.ml_predict_breakout_level >= 1;
if (use_ml_based_breakout) {
- if (av1_ml_predict_breakout(cpi, bsize, x, this_rdc, *pb_source_variance,
- xd->bd)) {
- part_search_state->do_square_split = 0;
- part_search_state->do_rectangular_split = 0;
- }
+ av1_ml_predict_breakout(cpi, bsize, x, this_rdc, blk_params,
+ *pb_source_variance, xd->bd,
+ &part_search_state->do_square_split,
+ &part_search_state->do_rectangular_split);
}
// Adjust dist breakout threshold according to the partition size.
@@ -3329,10 +3403,11 @@ static void prune_partitions_after_split(
!part_search_state->terminate_partition_search) {
av1_setup_src_planes(x, cpi->source, mi_row, mi_col, av1_num_planes(cm),
bsize);
- av1_ml_prune_rect_partition(
- cpi, x, bsize, best_rdc->rdcost, part_search_state->none_rd,
- part_search_state->split_rd, &part_search_state->prune_rect_part[HORZ],
- &part_search_state->prune_rect_part[VERT]);
+ av1_ml_prune_rect_partition(cpi, x, bsize, mi_row, mi_col, best_rdc->rdcost,
+ part_search_state->none_rd,
+ part_search_state->split_rd,
+ &part_search_state->prune_rect_part[HORZ],
+ &part_search_state->prune_rect_part[VERT]);
}
}
@@ -3351,12 +3426,11 @@ static void none_partition_search(
const BLOCK_SIZE bsize = blk_params.bsize;
assert(bsize < BLOCK_SIZES_ALL);
- // Set PARTITION_NONE allowed flag.
- set_part_none_allowed_flag(cpi, part_search_state);
if (!part_search_state->partition_none_allowed) return;
int pt_cost = 0;
RD_STATS best_remain_rdcost;
+ av1_invalid_rd_stats(&best_remain_rdcost);
// Set PARTITION_NONE context and cost.
set_none_partition_params(cpi, td, x, pc_tree, part_search_state,
@@ -3402,7 +3476,7 @@ static void none_partition_search(
if (cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions) {
const int ref_type = av1_ref_frame_type(pc_tree->none->mic.ref_frame);
av1_update_picked_ref_frames_mask(
- x, ref_type, bsize, cm->seq_params.mib_size, mi_row, mi_col);
+ x, ref_type, bsize, cm->seq_params->mib_size, mi_row, mi_col);
}
// Calculate the total cost and update the best partition.
@@ -3553,6 +3627,376 @@ static void split_partition_search(
av1_restore_context(x, x_ctx, mi_row, mi_col, bsize, av1_num_planes(cm));
}
+// The max number of nodes in the partition tree.
+// The number of leaf nodes is (128x128) / (4x4) = 1024.
+// The number of All possible parent nodes is 1 + 2 + ... + 512 = 1023.
+#define NUM_NODES 2048
+
+static void write_partition_tree(AV1_COMP *const cpi,
+ const PC_TREE *const pc_tree,
+ const BLOCK_SIZE bsize, const int mi_row,
+ const int mi_col) {
+ (void)mi_row;
+ (void)mi_col;
+ const char *path = cpi->oxcf.partition_info_path;
+ char filename[256];
+ snprintf(filename, sizeof(filename), "%s/partition_tree_sb%d_c%d", path,
+ cpi->sb_counter, 0);
+ ++cpi->sb_counter;
+ FILE *pfile = fopen(filename, "w");
+ fprintf(pfile, "%d", bsize);
+
+ // Write partition type with BFS order.
+ const PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
+ int q_idx = 0;
+ int depth = 0;
+ int last_idx = 1;
+ int num_nodes = 1;
+
+ // First traversal to get number of leaf nodes and depth.
+ tree_node_queue[q_idx] = pc_tree;
+ while (num_nodes > 0) {
+ const PC_TREE *node = tree_node_queue[q_idx];
+ if (node->partitioning == PARTITION_SPLIT) {
+ for (int i = 0; i < 4; ++i) {
+ tree_node_queue[last_idx] = node->split[i];
+ ++last_idx;
+ }
+ ++depth;
+ num_nodes += 4;
+ }
+ --num_nodes;
+ ++q_idx;
+ }
+ const int num_leafs = last_idx;
+ fprintf(pfile, ",%d,%d", num_leafs, /*num_configs=*/1);
+
+ // Write partitions for each node.
+ q_idx = 0;
+ depth = 0;
+ last_idx = 1;
+ num_nodes = 1;
+ tree_node_queue[q_idx] = pc_tree;
+ while (num_nodes > 0) {
+ const PC_TREE *node = tree_node_queue[q_idx];
+ fprintf(pfile, ",%d", node->partitioning);
+ if (node->partitioning == PARTITION_SPLIT) {
+ for (int i = 0; i < 4; ++i) {
+ tree_node_queue[last_idx] = node->split[i];
+ ++last_idx;
+ }
+ ++depth;
+ num_nodes += 4;
+ }
+ --num_nodes;
+ ++q_idx;
+ }
+ fprintf(pfile, "\n");
+
+ fclose(pfile);
+}
+
+static void verify_write_partition_tree(const AV1_COMP *const cpi,
+ const PC_TREE *const pc_tree,
+ const BLOCK_SIZE bsize,
+ const int config_id, const int mi_row,
+ const int mi_col) {
+ (void)mi_row;
+ (void)mi_col;
+ const char *path = cpi->oxcf.partition_info_path;
+ char filename[256];
+ snprintf(filename, sizeof(filename), "%s/verify_partition_tree_sb%d_c%d",
+ path, cpi->sb_counter, config_id);
+ FILE *pfile = fopen(filename, "w");
+ fprintf(pfile, "%d", bsize);
+
+ // Write partition type with BFS order.
+ const PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
+ int q_idx = 0;
+ int depth = 0;
+ int last_idx = 1;
+ int num_nodes = 1;
+
+ // First traversal to get number of leaf nodes and depth.
+ tree_node_queue[q_idx] = pc_tree;
+ while (num_nodes > 0) {
+ const PC_TREE *node = tree_node_queue[q_idx];
+ if (node != NULL && node->partitioning == PARTITION_SPLIT) {
+ for (int i = 0; i < 4; ++i) {
+ tree_node_queue[last_idx] = node->split[i];
+ ++last_idx;
+ }
+ ++depth;
+ num_nodes += 4;
+ }
+ --num_nodes;
+ ++q_idx;
+ }
+ const int num_leafs = last_idx;
+ fprintf(pfile, ",%d,%d", num_leafs, /*num_configs=*/1);
+
+ // Write partitions for each node.
+ q_idx = 0;
+ depth = 0;
+ last_idx = 1;
+ num_nodes = 1;
+ tree_node_queue[q_idx] = pc_tree;
+ while (num_nodes > 0) {
+ const PC_TREE *node = tree_node_queue[q_idx];
+ if (node != NULL) { // suppress warning
+ fprintf(pfile, ",%d", node->partitioning);
+ if (node->partitioning == PARTITION_SPLIT) {
+ for (int i = 0; i < 4; ++i) {
+ tree_node_queue[last_idx] = node->split[i];
+ ++last_idx;
+ }
+ ++depth;
+ num_nodes += 4;
+ }
+ }
+ --num_nodes;
+ ++q_idx;
+ }
+ fprintf(pfile, "\n");
+
+ fclose(pfile);
+}
+
+static int read_partition_tree(AV1_COMP *const cpi, PC_TREE *const pc_tree,
+ const int config_id) {
+ const char *path = cpi->oxcf.partition_info_path;
+ char filename[256];
+ snprintf(filename, sizeof(filename), "%s/partition_tree_sb%d_c%d", path,
+ cpi->sb_counter, config_id);
+ FILE *pfile = fopen(filename, "r");
+ if (pfile == NULL) {
+ printf("Can't find the file: %s\n", filename);
+ exit(0);
+ }
+
+ int read_bsize;
+ int num_nodes;
+ int num_configs;
+ fscanf(pfile, "%d,%d,%d", &read_bsize, &num_nodes, &num_configs);
+ assert(read_bsize == cpi->common.seq_params->sb_size);
+ BLOCK_SIZE bsize = (BLOCK_SIZE)read_bsize;
+
+ PC_TREE *tree_node_queue[NUM_NODES] = { NULL };
+ int last_idx = 1;
+ int q_idx = 0;
+ tree_node_queue[q_idx] = pc_tree;
+ while (num_nodes > 0) {
+ int partitioning;
+ fscanf(pfile, ",%d", &partitioning);
+ assert(partitioning >= PARTITION_NONE &&
+ partitioning < EXT_PARTITION_TYPES);
+ PC_TREE *node = tree_node_queue[q_idx];
+ if (node != NULL) node->partitioning = partitioning;
+ if (partitioning == PARTITION_SPLIT) {
+ const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
+ for (int i = 0; i < 4; ++i) {
+ if (node != NULL) { // Suppress warning
+ node->split[i] = av1_alloc_pc_tree_node(subsize);
+ node->split[i]->index = i;
+ tree_node_queue[last_idx] = node->split[i];
+ ++last_idx;
+ }
+ }
+ bsize = subsize;
+ }
+ --num_nodes;
+ ++q_idx;
+ }
+ fclose(pfile);
+
+ return num_configs;
+}
+
+static RD_STATS rd_search_for_fixed_partition(
+ AV1_COMP *const cpi, ThreadData *td, TileDataEnc *tile_data,
+ TokenExtra **tp, SIMPLE_MOTION_DATA_TREE *sms_tree, int mi_row, int mi_col,
+ const BLOCK_SIZE bsize, PC_TREE *pc_tree) {
+ const PARTITION_TYPE partition = pc_tree->partitioning;
+ const AV1_COMMON *const cm = &cpi->common;
+ const int num_planes = av1_num_planes(cm);
+ MACROBLOCK *const x = &td->mb;
+ MACROBLOCKD *const xd = &x->e_mbd;
+ TileInfo *const tile_info = &tile_data->tile_info;
+ RD_STATS best_rdc;
+ av1_invalid_rd_stats(&best_rdc);
+ int sum_subblock_rate = 0;
+ int64_t sum_subblock_dist = 0;
+ PartitionSearchState part_search_state;
+ init_partition_search_state_params(x, cpi, &part_search_state, mi_row, mi_col,
+ bsize);
+ // Override partition costs at the edges of the frame in the same
+ // way as in read_partition (see decodeframe.c).
+ PartitionBlkParams blk_params = part_search_state.part_blk_params;
+ if (!(blk_params.has_rows && blk_params.has_cols))
+ set_partition_cost_for_edge_blk(cm, &part_search_state);
+
+ av1_set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
+
+ // Save rdmult before it might be changed, so it can be restored later.
+ const int orig_rdmult = x->rdmult;
+ setup_block_rdmult(cpi, x, mi_row, mi_col, bsize, NO_AQ, NULL);
+ (void)orig_rdmult;
+
+ // Set the context.
+ RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
+ xd->above_txfm_context =
+ cm->above_contexts.txfm[tile_info->tile_row] + mi_col;
+ xd->left_txfm_context =
+ xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
+ av1_save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
+
+ assert(bsize < BLOCK_SIZES_ALL);
+ unsigned int pb_source_variance = UINT_MAX;
+ int64_t part_none_rd = INT64_MAX;
+ int64_t none_rd = INT64_MAX;
+ int inc_step[NUM_PART4_TYPES] = { 0 };
+ if (partition == PARTITION_HORZ_4) inc_step[HORZ4] = mi_size_high[bsize] / 4;
+ if (partition == PARTITION_VERT_4) inc_step[VERT4] = mi_size_wide[bsize] / 4;
+
+ switch (partition) {
+ case PARTITION_NONE:
+ none_partition_search(cpi, td, tile_data, x, pc_tree, sms_tree, &x_ctx,
+ &part_search_state, &best_rdc, &pb_source_variance,
+ &none_rd, &part_none_rd);
+ break;
+ case PARTITION_HORZ:
+ rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
+ &part_search_state, &best_rdc, NULL, HORZ,
+ HORZ);
+ break;
+ case PARTITION_VERT:
+ rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
+ &part_search_state, &best_rdc, NULL, VERT,
+ VERT);
+ break;
+ case PARTITION_HORZ_A:
+ ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
+ &part_search_state, &best_rdc, NULL,
+ pb_source_variance, 1, HORZ_A, HORZ_A);
+ break;
+ case PARTITION_HORZ_B:
+ ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
+ &part_search_state, &best_rdc, NULL,
+ pb_source_variance, 1, HORZ_B, HORZ_B);
+ break;
+ case PARTITION_VERT_A:
+ ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
+ &part_search_state, &best_rdc, NULL,
+ pb_source_variance, 1, VERT_A, VERT_A);
+ break;
+ case PARTITION_VERT_B:
+ ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
+ &part_search_state, &best_rdc, NULL,
+ pb_source_variance, 1, VERT_B, VERT_B);
+ break;
+ case PARTITION_HORZ_4:
+ rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
+ pc_tree->horizontal4, &part_search_state, &best_rdc,
+ inc_step, PARTITION_HORZ_4);
+ break;
+ case PARTITION_VERT_4:
+ rd_pick_4partition(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
+ pc_tree->vertical4, &part_search_state, &best_rdc,
+ inc_step, PARTITION_VERT_4);
+ break;
+ case PARTITION_SPLIT:
+ for (int idx = 0; idx < SUB_PARTITIONS_SPLIT; ++idx) {
+ const BLOCK_SIZE subsize =
+ get_partition_subsize(bsize, PARTITION_SPLIT);
+ assert(subsize < BLOCK_SIZES_ALL);
+ const int next_mi_row =
+ idx < 2 ? mi_row : mi_row + mi_size_high[subsize];
+ const int next_mi_col =
+ idx % 2 == 0 ? mi_col : mi_col + mi_size_wide[subsize];
+ if (next_mi_row >= cm->mi_params.mi_rows ||
+ next_mi_col >= cm->mi_params.mi_cols) {
+ continue;
+ }
+ const RD_STATS subblock_rdc = rd_search_for_fixed_partition(
+ cpi, td, tile_data, tp, sms_tree->split[idx], next_mi_row,
+ next_mi_col, subsize, pc_tree->split[idx]);
+ sum_subblock_rate += subblock_rdc.rate;
+ sum_subblock_dist += subblock_rdc.dist;
+ }
+ best_rdc.rate = sum_subblock_rate;
+ best_rdc.rate += part_search_state.partition_cost[PARTITION_SPLIT];
+ best_rdc.dist = sum_subblock_dist;
+ best_rdc.rdcost = RDCOST(x->rdmult, best_rdc.rate, best_rdc.dist);
+ break;
+ default: assert(0 && "invalid partition type."); exit(0);
+ }
+ // Note: it is necessary to restore context information.
+ av1_restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
+
+ if (bsize != cm->seq_params->sb_size) {
+ encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
+ pc_tree, NULL);
+ }
+ x->rdmult = orig_rdmult;
+
+ return best_rdc;
+}
+
+bool av1_rd_partition_search(AV1_COMP *const cpi, ThreadData *td,
+ TileDataEnc *tile_data, TokenExtra **tp,
+ SIMPLE_MOTION_DATA_TREE *sms_root, int mi_row,
+ int mi_col, const BLOCK_SIZE bsize,
+ RD_STATS *best_rd_cost) {
+ AV1_COMMON *const cm = &cpi->common;
+ MACROBLOCK *const x = &td->mb;
+ int best_idx = 0;
+ int64_t min_rdcost = INT64_MAX;
+ int num_configs;
+ RD_STATS *rdcost = NULL;
+ int i = 0;
+ do {
+ PC_TREE *const pc_tree = av1_alloc_pc_tree_node(bsize);
+ num_configs = read_partition_tree(cpi, pc_tree, i);
+ if (i == 0) {
+ rdcost = aom_calloc(num_configs, sizeof(*rdcost));
+ }
+ if (num_configs <= 0) {
+ av1_free_pc_tree_recursive(pc_tree, av1_num_planes(cm), 0, 0);
+ if (rdcost != NULL) aom_free(rdcost);
+ exit(0);
+ return false;
+ }
+ verify_write_partition_tree(cpi, pc_tree, bsize, i, mi_row, mi_col);
+ // Encode the block with the given partition tree. Get rdcost and encoding
+ // time.
+ rdcost[i] = rd_search_for_fixed_partition(cpi, td, tile_data, tp, sms_root,
+ mi_row, mi_col, bsize, pc_tree);
+
+ if (rdcost[i].rdcost < min_rdcost) {
+ min_rdcost = rdcost[i].rdcost;
+ best_idx = i;
+ *best_rd_cost = rdcost[i];
+ }
+ av1_free_pc_tree_recursive(pc_tree, av1_num_planes(cm), 0, 0);
+ ++i;
+ } while (i < num_configs);
+
+ // Encode with the partition configuration with the smallest rdcost.
+ PC_TREE *const pc_tree = av1_alloc_pc_tree_node(bsize);
+ read_partition_tree(cpi, pc_tree, best_idx);
+ rd_search_for_fixed_partition(cpi, td, tile_data, tp, sms_root, mi_row,
+ mi_col, bsize, pc_tree);
+ set_cb_offsets(x->cb_offset, 0, 0);
+ encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
+ pc_tree, NULL);
+
+ av1_free_pc_tree_recursive(pc_tree, av1_num_planes(cm), 0, 0);
+ aom_free(rdcost);
+ ++cpi->sb_counter;
+
+ return true;
+}
+
/*!\brief AV1 block partition search (full search).
*
* \ingroup partition_search
@@ -3617,7 +4061,7 @@ bool av1_rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
av1_invalid_rd_stats(rd_cost);
return part_search_state.found_best_partition;
}
- if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0;
+ if (bsize == cm->seq_params->sb_size) x->must_find_valid_partition = 0;
// Override skipping rectangular partition operations for edge blocks.
if (none_rd) *none_rd = 0;
@@ -3742,7 +4186,7 @@ BEGIN_PARTITION_SEARCH:
// when NONE and SPLIT partition rd_costs are INT64_MAX.
if (cpi->sf.part_sf.early_term_after_none_split &&
part_none_rd == INT64_MAX && part_split_rd == INT64_MAX &&
- !x->must_find_valid_partition && (bsize != cm->seq_params.sb_size)) {
+ !x->must_find_valid_partition && (bsize != cm->seq_params->sb_size)) {
part_search_state.terminate_partition_search = 1;
}
@@ -3755,7 +4199,7 @@ BEGIN_PARTITION_SEARCH:
// Rectangular partitions search stage.
rectangular_partition_search(cpi, td, tile_data, tp, x, pc_tree, &x_ctx,
&part_search_state, &best_rdc,
- rect_part_win_info);
+ rect_part_win_info, HORZ, VERT);
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, rectangular_partition_search_time);
#endif
@@ -3784,7 +4228,8 @@ BEGIN_PARTITION_SEARCH:
// AB partitions search stage.
ab_partitions_search(cpi, td, tile_data, tp, x, &x_ctx, pc_tree,
&part_search_state, &best_rdc, rect_part_win_info,
- pb_source_variance, ext_partition_allowed);
+ pb_source_variance, ext_partition_allowed, HORZ_A,
+ VERT_B);
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, ab_partitions_search_time);
#endif
@@ -3832,7 +4277,7 @@ BEGIN_PARTITION_SEARCH:
end_timing(cpi, rd_pick_4partition_time);
#endif
- if (bsize == cm->seq_params.sb_size &&
+ if (bsize == cm->seq_params->sb_size &&
!part_search_state.found_best_partition) {
// Did not find a valid partition, go back and search again, with less
// constraint on which partition types to search.
@@ -3859,7 +4304,7 @@ BEGIN_PARTITION_SEARCH:
// prediction block.
print_partition_timing_stats_with_rdcost(
part_timing_stats, mi_row, mi_col, bsize,
- cpi->gf_group.update_type[cpi->gf_group.index],
+ cpi->ppi->gf_group.update_type[cpi->gf_frame_index],
cm->current_frame.frame_number, &best_rdc, "part_timing.csv");
/*
print_partition_timing_stats(part_timing_stats, cm->show_frame,
@@ -3881,11 +4326,14 @@ BEGIN_PARTITION_SEARCH:
// If a valid partition is found and reconstruction is required for future
// sub-blocks in the same group.
if (part_search_state.found_best_partition && pc_tree->index != 3) {
- if (bsize == cm->seq_params.sb_size) {
+ if (bsize == cm->seq_params->sb_size) {
// Encode the superblock.
const int emit_output = multi_pass_mode != SB_DRY_PASS;
const RUN_TYPE run_type = emit_output ? OUTPUT_ENABLED : DRY_RUN_NORMAL;
+ // Write partition tree to file. Not used by default.
+ if (0) write_partition_tree(cpi, pc_tree, bsize, mi_row, mi_col);
+
set_cb_offsets(x->cb_offset, 0, 0);
encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, run_type, bsize,
pc_tree, NULL);
@@ -3907,7 +4355,7 @@ BEGIN_PARTITION_SEARCH:
if (pc_tree_dealloc == 0)
av1_free_pc_tree_recursive(pc_tree, num_planes, 1, 1);
- if (bsize == cm->seq_params.sb_size) {
+ if (bsize == cm->seq_params->sb_size) {
assert(best_rdc.rate < INT_MAX);
assert(best_rdc.dist < INT64_MAX);
} else {
@@ -3958,7 +4406,7 @@ static int ml_predict_var_paritioning(AV1_COMP *cpi, MACROBLOCK *x,
const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f;
float features[FEATURES] = { 0.0f };
const int dc_q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
int feature_idx = 0;
float score[LABELS];
@@ -4038,7 +4486,7 @@ static int store_partition_data(AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
{
const int dc_q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
int feature_idx = 0;
features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f);
@@ -4186,7 +4634,7 @@ void av1_nonrd_pick_partition(AV1_COMP *cpi, ThreadData *td,
int partition_none_allowed = !force_horz_split && !force_vert_split;
assert(mi_size_wide[bsize] == mi_size_high[bsize]); // Square partition only
- assert(cm->seq_params.sb_size == BLOCK_64X64); // Small SB so far
+ assert(cm->seq_params->sb_size == BLOCK_64X64); // Small SB so far
(void)*tp_orig;
@@ -4293,7 +4741,7 @@ void av1_nonrd_pick_partition(AV1_COMP *cpi, ThreadData *td,
fill_mode_info_sb(cpi, x, mi_row, mi_col, bsize, pc_tree);
if (do_recon) {
- if (bsize == cm->seq_params.sb_size) {
+ if (bsize == cm->seq_params->sb_size) {
// NOTE: To get estimate for rate due to the tokens, use:
// int rate_coeffs = 0;
// encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
diff --git a/third_party/libaom/source/libaom/av1/encoder/partition_search.h b/third_party/libaom/source/libaom/av1/encoder/partition_search.h
index 136548e3e6..8a6717690c 100644
--- a/third_party/libaom/source/libaom/av1/encoder/partition_search.h
+++ b/third_party/libaom/source/libaom/av1/encoder/partition_search.h
@@ -39,6 +39,13 @@ void av1_nonrd_pick_partition(AV1_COMP *cpi, ThreadData *td,
RD_STATS *rd_cost, int do_recon, int64_t best_rd,
PC_TREE *pc_tree);
#endif
+void av1_reset_part_sf(PARTITION_SPEED_FEATURES *part_sf);
+
+bool av1_rd_partition_search(AV1_COMP *const cpi, ThreadData *td,
+ TileDataEnc *tile_data, TokenExtra **tp,
+ SIMPLE_MOTION_DATA_TREE *sms_root, int mi_row,
+ int mi_col, BLOCK_SIZE bsize,
+ RD_STATS *best_rd_cost);
bool av1_rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
TileDataEnc *tile_data, TokenExtra **tp, int mi_row,
int mi_col, BLOCK_SIZE bsize, RD_STATS *rd_cost,
@@ -57,12 +64,14 @@ static AOM_INLINE void set_cb_offsets(uint16_t *cb_offset,
static AOM_INLINE void update_cb_offsets(MACROBLOCK *x, const BLOCK_SIZE bsize,
const int subsampling_x,
const int subsampling_y) {
- const BLOCK_SIZE plane_bsize =
- get_plane_block_size(bsize, subsampling_x, subsampling_y);
x->cb_offset[PLANE_TYPE_Y] += block_size_wide[bsize] * block_size_high[bsize];
- if (x->e_mbd.is_chroma_ref)
+ if (x->e_mbd.is_chroma_ref) {
+ const BLOCK_SIZE plane_bsize =
+ get_plane_block_size(bsize, subsampling_x, subsampling_y);
+ assert(plane_bsize != BLOCK_INVALID);
x->cb_offset[PLANE_TYPE_UV] +=
block_size_wide[plane_bsize] * block_size_high[plane_bsize];
+ }
}
#endif // AOM_AV1_ENCODER_PARTITION_SEARCH_H_
diff --git a/third_party/libaom/source/libaom/av1/encoder/partition_strategy.c b/third_party/libaom/source/libaom/av1/encoder/partition_strategy.c
index f846d595bc..bf678a452f 100644
--- a/third_party/libaom/source/libaom/av1/encoder/partition_strategy.c
+++ b/third_party/libaom/source/libaom/av1/encoder/partition_strategy.c
@@ -35,6 +35,48 @@ static AOM_INLINE void simple_motion_search_prune_part_features(
int mi_row, int mi_col, BLOCK_SIZE bsize, float *features,
int features_to_get);
+static bool ext_ml_model_decision_before_none(
+ AV1_COMP *cpi, const float features_from_motion[FEATURE_SIZE_SMS_SPLIT],
+ int *partition_none_allowed, int *partition_horz_allowed,
+ int *partition_vert_allowed, int *do_rectangular_split,
+ int *do_square_split);
+
+static bool ext_ml_model_decision_before_none_part2(
+ AV1_COMP *cpi,
+ const float features_from_motion[FEATURE_SIZE_SMS_PRUNE_PART],
+ int *prune_horz, int *prune_vert);
+
+static bool ext_ml_model_decision_after_none(
+ ExtPartController *const ext_part_controller, const int is_intra_frame,
+ const float *const features_after_none, int *do_square_split,
+ int *do_rectangular_split);
+
+static bool ext_ml_model_decision_after_none_part2(
+ AV1_COMP *const cpi, const float *const features_terminate,
+ int *terminate_partition_search);
+
+static bool ext_ml_model_decision_after_split(
+ AV1_COMP *const cpi, const float *const features_terminate,
+ int *terminate_partition_search);
+
+static bool ext_ml_model_decision_after_split_part2(
+ ExtPartController *const ext_part_controller, const int is_intra_frame,
+ const float *const features_prune, int *prune_rect_part_horz,
+ int *prune_rect_part_vert);
+
+static bool ext_ml_model_decision_after_rect(
+ ExtPartController *const ext_part_controller, const int is_intra_frame,
+ const float *const features_after_rect, int *horza_partition_allowed,
+ int *horzb_partition_allowed, int *verta_partition_allowed,
+ int *vertb_partition_allowed);
+
+static bool ext_ml_model_decision_after_part_ab(
+ AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int part_ctx,
+ int64_t best_rd, int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT],
+ int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const partition_horz4_allowed,
+ int *const partition_vert4_allowed, unsigned int pb_source_variance,
+ int mi_row, int mi_col);
+
static INLINE int convert_bsize_to_idx(BLOCK_SIZE bsize) {
switch (bsize) {
case BLOCK_128X128: return 0;
@@ -45,9 +87,45 @@ static INLINE int convert_bsize_to_idx(BLOCK_SIZE bsize) {
default: assert(0 && "Invalid bsize"); return -1;
}
}
-#endif
-#if !CONFIG_REALTIME_ONLY
+static char *get_feature_file_name(int id) {
+ static char *feature_file_names[] = {
+ "feature_before_partition_none",
+ "feature_before_partition_none_prune_rect",
+ "feature_after_partition_none_prune",
+ "feature_after_partition_none_terminate",
+ "feature_after_partition_split_terminate",
+ "feature_after_partition_split_prune_rect",
+ "feature_after_partition_rect",
+ "feature_after_partition_ab",
+ };
+
+ return feature_file_names[id];
+}
+
+static void write_features_to_file(const char *const path,
+ const bool is_test_mode,
+ const float *features,
+ const int feature_size, const int id,
+ const int bsize, const int mi_row,
+ const int mi_col) {
+ if (!WRITE_FEATURE_TO_FILE && !is_test_mode) return;
+
+ char filename[256];
+ snprintf(filename, sizeof(filename), "%s/%s", path,
+ get_feature_file_name(id));
+ FILE *pfile = fopen(filename, "a");
+ if (!is_test_mode) {
+ fprintf(pfile, "%d,%d,%d,%d,%d\n", id, bsize, mi_row, mi_col, feature_size);
+ }
+ for (int i = 0; i < feature_size; ++i) {
+ fprintf(pfile, "%.6f", features[i]);
+ if (i < feature_size - 1) fprintf(pfile, ",");
+ }
+ fprintf(pfile, "\n");
+ fclose(pfile);
+}
+
// TODO(chiyotsai@google.com): This is very much a work in progress. We still
// need to the following:
// -- add support for hdres
@@ -61,7 +139,7 @@ void av1_intra_mode_cnn_partition(const AV1_COMMON *const cm, MACROBLOCK *x,
int *partition_vert_allowed,
int *do_rectangular_split,
int *do_square_split) {
- assert(cm->seq_params.sb_size >= BLOCK_64X64 &&
+ assert(cm->seq_params->sb_size >= BLOCK_64X64 &&
"Invalid sb_size for intra_cnn!");
const int bsize_idx = convert_bsize_to_idx(bsize);
@@ -284,6 +362,20 @@ void av1_simple_motion_search_based_split(
simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col,
bsize, features,
FEATURE_SMS_SPLIT_MODEL_FLAG);
+
+ // Write features to file
+ write_features_to_file(cpi->oxcf.partition_info_path,
+ cpi->ext_part_controller.test_mode, features,
+ FEATURE_SIZE_SMS_SPLIT, 0, bsize, mi_row, mi_col);
+
+ // Note: it is intended to not normalize the features here, to keep it
+ // consistent for all features collected and passed to the external model.
+ if (ext_ml_model_decision_before_none(
+ cpi, features, partition_none_allowed, partition_horz_allowed,
+ partition_vert_allowed, do_rectangular_split, do_square_split)) {
+ return;
+ }
+
for (int idx = 0; idx < FEATURE_SIZE_SMS_SPLIT; idx++) {
features[idx] = (features[idx] - ml_mean[idx]) / ml_std[idx];
}
@@ -308,7 +400,7 @@ void av1_simple_motion_search_based_split(
// If the score is very low, prune rectangular split since it is unlikely to
// occur.
if (cpi->sf.part_sf.simple_motion_search_rect_split) {
- const float scale = res_idx >= 2 ? 3 : 2;
+ const float scale = res_idx >= 2 ? 3.0f : 2.0f;
const float rect_split_thresh =
scale * av1_simple_motion_search_no_split_thresh
[cpi->sf.part_sf.simple_motion_search_rect_split][res_idx]
@@ -356,7 +448,7 @@ static int simple_motion_search_get_best_ref(
int_mv best_mv =
av1_simple_motion_search(cpi, x, mi_row, mi_col, bsize, ref,
start_mvs[ref], num_planes, use_subpixel);
- curr_var = cpi->fn_ptr[bsize].vf(
+ curr_var = cpi->ppi->fn_ptr[bsize].vf(
x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf,
xd->plane[0].dst.stride, &curr_sse);
if (curr_sse < *best_sse) {
@@ -543,6 +635,24 @@ void av1_simple_motion_search_prune_rect(
simple_motion_search_prune_part_features(cpi, x, sms_tree, mi_row, mi_col,
bsize, features,
FEATURE_SMS_PRUNE_PART_FLAG);
+
+ // Note: it is intended to not normalize the features here, to keep it
+ // consistent for all features collected and passed to the external model.
+ if (cpi->sf.part_sf.simple_motion_search_prune_rect &&
+ !frame_is_intra_only(cm) &&
+ (partition_horz_allowed || partition_vert_allowed) &&
+ bsize >= BLOCK_8X8 && !av1_superres_scaled(cm)) {
+ // Write features to file
+ write_features_to_file(
+ cpi->oxcf.partition_info_path, cpi->ext_part_controller.test_mode,
+ features, FEATURE_SIZE_SMS_PRUNE_PART, 1, bsize, mi_row, mi_col);
+
+ if (ext_ml_model_decision_before_none_part2(cpi, features, prune_horz,
+ prune_vert)) {
+ return;
+ }
+ }
+
for (int f_idx = 0; f_idx < FEATURE_SIZE_SMS_PRUNE_PART; f_idx++) {
features[f_idx] = (features[f_idx] - ml_mean[f_idx]) / ml_std[f_idx];
}
@@ -617,6 +727,15 @@ void av1_simple_motion_search_early_term_none(
assert(0 && "Unexpected block size in simple_motion_term_none");
}
+ // Write features to file
+ write_features_to_file(cpi->oxcf.partition_info_path,
+ cpi->ext_part_controller.test_mode, features,
+ FEATURE_SIZE_SMS_TERM_NONE, 3, bsize, mi_row, mi_col);
+
+ if (ext_ml_model_decision_after_none_part2(cpi, features, early_terminate)) {
+ return;
+ }
+
if (ml_model) {
float score = 0.0f;
for (f_idx = 0; f_idx < FEATURE_SIZE_SMS_TERM_NONE; f_idx++) {
@@ -636,8 +755,9 @@ void av1_get_max_min_partition_features(AV1_COMP *const cpi, MACROBLOCK *x,
float *features) {
AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
- const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
+ const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
+ // Currently this only allows 128X128 SB size. May extend it to 64X64 SB size.
assert(sb_size == BLOCK_128X128);
int f_idx = 0;
@@ -701,14 +821,18 @@ void av1_get_max_min_partition_features(AV1_COMP *const cpi, MACROBLOCK *x,
if (log_sse > max_log_sse) max_log_sse = log_sse;
}
aom_clear_system_state();
- const float avg_mv_row = sum_mv_row / 64.0f;
- const float var_mv_row = sum_mv_row_sq / 64.0f - avg_mv_row * avg_mv_row;
+ const int blks = mb_rows * mb_cols;
+ const float avg_mv_row = sum_mv_row / (float)blks;
+ const float var_mv_row =
+ sum_mv_row_sq / (float)blks - avg_mv_row * avg_mv_row;
- const float avg_mv_col = sum_mv_col / 64.0f;
- const float var_mv_col = sum_mv_col_sq / 64.0f - avg_mv_col * avg_mv_col;
+ const float avg_mv_col = sum_mv_col / (float)blks;
+ const float var_mv_col =
+ sum_mv_col_sq / (float)blks - avg_mv_col * avg_mv_col;
- const float avg_log_sse = sum_log_sse / 64.0f;
- const float var_log_sse = sum_log_sse_sq / 64.0f - avg_log_sse * avg_log_sse;
+ const float avg_log_sse = sum_log_sse / (float)blks;
+ const float var_log_sse =
+ sum_log_sse_sq / (float)blks - avg_log_sse * avg_log_sse;
features[f_idx++] = avg_log_sse;
features[f_idx++] = avg_mv_col;
@@ -727,11 +851,20 @@ void av1_get_max_min_partition_features(AV1_COMP *const cpi, MACROBLOCK *x,
assert(f_idx == FEATURE_SIZE_MAX_MIN_PART_PRED);
}
+// Convert result index to block size.
+// result idx block size
+// 0 BLOCK_16X16
+// 1 BLOCK_32X32
+// 2 BLOCK_64X64
+// 3 BLOCK_128X128
+static BLOCK_SIZE get_block_size(int idx) {
+ return (BLOCK_SIZE)((idx + 2) * 3);
+}
+
BLOCK_SIZE av1_predict_max_partition(const AV1_COMP *const cpi,
const MACROBLOCK *const x,
const float *features) {
- float scores[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f },
- probs[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f };
+ float scores[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f };
const NN_CONFIG *nn_config = &av1_max_part_pred_nn_config;
assert(cpi->sf.part_sf.auto_max_partition_based_on_simple_motion !=
@@ -739,21 +872,26 @@ BLOCK_SIZE av1_predict_max_partition(const AV1_COMP *const cpi,
aom_clear_system_state();
av1_nn_predict(features, nn_config, 1, scores);
- av1_nn_softmax(scores, probs, MAX_NUM_CLASSES_MAX_MIN_PART_PRED);
int result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1;
if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion ==
DIRECT_PRED) {
result = 0;
- float max_prob = probs[0];
+ float max_score = scores[0];
for (int i = 1; i < MAX_NUM_CLASSES_MAX_MIN_PART_PRED; ++i) {
- if (probs[i] > max_prob) {
- max_prob = probs[i];
+ if (scores[i] > max_score) {
+ max_score = scores[i];
result = i;
}
}
- } else if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion ==
- RELAXED_PRED) {
+ return get_block_size(result);
+ }
+
+ float probs[MAX_NUM_CLASSES_MAX_MIN_PART_PRED] = { 0.0f };
+ av1_nn_softmax(scores, probs, MAX_NUM_CLASSES_MAX_MIN_PART_PRED);
+
+ if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion ==
+ RELAXED_PRED) {
for (result = MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1; result >= 0;
--result) {
if (result < MAX_NUM_CLASSES_MAX_MIN_PART_PRED - 1) {
@@ -763,7 +901,7 @@ BLOCK_SIZE av1_predict_max_partition(const AV1_COMP *const cpi,
}
} else if (cpi->sf.part_sf.auto_max_partition_based_on_simple_motion ==
ADAPT_PRED) {
- const BLOCK_SIZE sb_size = cpi->common.seq_params.sb_size;
+ const BLOCK_SIZE sb_size = cpi->common.seq_params->sb_size;
const MACROBLOCKD *const xd = &x->e_mbd;
// TODO(debargha): x->source_variance is unavailable at this point,
// so compute. The redundant recomputation later can be removed.
@@ -784,7 +922,7 @@ BLOCK_SIZE av1_predict_max_partition(const AV1_COMP *const cpi,
}
}
- return (BLOCK_SIZE)((result + 2) * 3);
+ return get_block_size(result);
}
// Get the minimum partition block width and height(in log scale) under a
@@ -911,6 +1049,16 @@ void av1_ml_early_term_after_split(AV1_COMP *const cpi, MACROBLOCK *const x,
assert(f_idx == FEATURES);
+ // Write features to file
+ write_features_to_file(cpi->oxcf.partition_info_path,
+ cpi->ext_part_controller.test_mode, features, FEATURES,
+ 4, bsize, mi_row, mi_col);
+
+ if (ext_ml_model_decision_after_split(cpi, features,
+ terminate_partition_search)) {
+ return;
+ }
+
float score = 0.0f;
av1_nn_predict(features, nn_config, 1, &score);
// Score is indicator of confidence that we should NOT terminate.
@@ -918,10 +1066,11 @@ void av1_ml_early_term_after_split(AV1_COMP *const cpi, MACROBLOCK *const x,
}
#undef FEATURES
-void av1_ml_prune_rect_partition(const AV1_COMP *const cpi,
- const MACROBLOCK *const x, BLOCK_SIZE bsize,
- int64_t best_rd, int64_t none_rd,
- int64_t *split_rd, int *const dst_prune_horz,
+void av1_ml_prune_rect_partition(AV1_COMP *const cpi, const MACROBLOCK *const x,
+ BLOCK_SIZE bsize, const int mi_row,
+ const int mi_col, int64_t best_rd,
+ int64_t none_rd, int64_t *split_rd,
+ int *const dst_prune_horz,
int *const dst_prune_vert) {
if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
best_rd = AOMMAX(best_rd, 1);
@@ -998,6 +1147,17 @@ void av1_ml_prune_rect_partition(const AV1_COMP *const cpi,
for (int i = 0; i < SUB_PARTITIONS_SPLIT; i++)
features[5 + i] = (float)split_variance[i] / (float)whole_block_variance;
+ // Write features to file
+ write_features_to_file(cpi->oxcf.partition_info_path,
+ cpi->ext_part_controller.test_mode, features,
+ /*feature_size=*/9, 5, bsize, mi_row, mi_col);
+
+ if (ext_ml_model_decision_after_split_part2(
+ &cpi->ext_part_controller, frame_is_intra_only(&cpi->common),
+ features, dst_prune_horz, dst_prune_vert)) {
+ return;
+ }
+
// 2. Do the prediction and prune 0-2 partitions based on their probabilities
float raw_scores[3] = { 0.0f };
av1_nn_predict(features, nn_config, 1, raw_scores);
@@ -1014,7 +1174,8 @@ void av1_ml_prune_rect_partition(const AV1_COMP *const cpi,
// Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be
// considered.
void av1_ml_prune_ab_partition(
- BLOCK_SIZE bsize, int part_ctx, int var_ctx, int64_t best_rd,
+ AV1_COMP *const cpi, BLOCK_SIZE bsize, const int mi_row, const int mi_col,
+ int part_ctx, int var_ctx, int64_t best_rd,
int64_t horz_rd[SUB_PARTITIONS_RECT], int64_t vert_rd[SUB_PARTITIONS_RECT],
int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const horza_partition_allowed,
int *const horzb_partition_allowed, int *const verta_partition_allowed,
@@ -1065,6 +1226,20 @@ void av1_ml_prune_ab_partition(
}
assert(feature_index == 10);
+ // Write features to file
+ if (!frame_is_intra_only(&cpi->common)) {
+ write_features_to_file(cpi->oxcf.partition_info_path,
+ cpi->ext_part_controller.test_mode, features,
+ /*feature_size=*/10, 6, bsize, mi_row, mi_col);
+ }
+
+ if (ext_ml_model_decision_after_rect(
+ &cpi->ext_part_controller, frame_is_intra_only(&cpi->common),
+ features, horza_partition_allowed, horzb_partition_allowed,
+ verta_partition_allowed, vertb_partition_allowed)) {
+ return;
+ }
+
// Calculate scores using the NN model.
float score[16] = { 0.0f };
av1_nn_predict(features, nn_config, 1, score);
@@ -1101,12 +1276,17 @@ void av1_ml_prune_ab_partition(
#define LABELS 4
// Use a ML model to predict if horz4 and vert4 should be considered.
void av1_ml_prune_4_partition(
- const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
- int part_ctx, int64_t best_rd,
- int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT],
+ AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int part_ctx,
+ int64_t best_rd, int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT],
int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const partition_horz4_allowed,
int *const partition_vert4_allowed, unsigned int pb_source_variance,
int mi_row, int mi_col) {
+ if (ext_ml_model_decision_after_part_ab(
+ cpi, x, bsize, part_ctx, best_rd, rect_part_rd, split_rd,
+ partition_horz4_allowed, partition_vert4_allowed, pb_source_variance,
+ mi_row, mi_col))
+ return;
+
if (best_rd >= 1000000000) return;
int64_t *horz_rd = rect_part_rd[HORZ];
int64_t *vert_rd = rect_part_rd[VERT];
@@ -1206,6 +1386,13 @@ void av1_ml_prune_4_partition(
}
assert(feature_index == FEATURES);
+ // Write features to file
+ if (!frame_is_intra_only(&cpi->common)) {
+ write_features_to_file(cpi->oxcf.partition_info_path,
+ cpi->ext_part_controller.test_mode, features,
+ FEATURES, 7, bsize, mi_row, mi_col);
+ }
+
// Calculate scores using the NN model.
float score[LABELS] = { 0.0f };
av1_nn_predict(features, nn_config, 1, score);
@@ -1238,10 +1425,12 @@ void av1_ml_prune_4_partition(
#undef LABELS
#define FEATURES 4
-int av1_ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- const MACROBLOCK *const x,
- const RD_STATS *const rd_stats,
- unsigned int pb_source_variance, int bit_depth) {
+void av1_ml_predict_breakout(AV1_COMP *const cpi, BLOCK_SIZE bsize,
+ const MACROBLOCK *const x,
+ const RD_STATS *const rd_stats,
+ const PartitionBlkParams blk_params,
+ unsigned int pb_source_variance, int bit_depth,
+ int *do_square_split, int *do_rectangular_split) {
const NN_CONFIG *nn_config = NULL;
int thresh = 0;
switch (bsize) {
@@ -1267,7 +1456,7 @@ int av1_ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
break;
default: assert(0 && "Unexpected bsize.");
}
- if (!nn_config || thresh < 0) return 0;
+ if (!nn_config || thresh < 0) return;
const float ml_predict_breakout_thresh_scale[3] = { 1.15f, 1.05f, 1.0f };
thresh = (int)((float)thresh *
@@ -1295,13 +1484,28 @@ int av1_ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
features[feature_index++] = (float)(dc_q * dc_q) / 256.0f;
assert(feature_index == FEATURES);
+ // Write features to file
+ write_features_to_file(cpi->oxcf.partition_info_path,
+ cpi->ext_part_controller.test_mode, features, FEATURES,
+ 2, blk_params.bsize, blk_params.mi_row,
+ blk_params.mi_col);
+
+ if (ext_ml_model_decision_after_none(
+ &cpi->ext_part_controller, frame_is_intra_only(&cpi->common),
+ features, do_square_split, do_rectangular_split)) {
+ return;
+ }
+
// Calculate score using the NN model.
float score = 0.0f;
av1_nn_predict(features, nn_config, 1, &score);
aom_clear_system_state();
// Make decision.
- return (int)(score * 100) >= thresh;
+ if ((int)(score * 100) >= thresh) {
+ *do_square_split = 0;
+ *do_rectangular_split = 0;
+ }
}
#undef FEATURES
@@ -1361,7 +1565,7 @@ void av1_prune_partitions_before_search(
const int try_intra_cnn_split =
!cpi->use_screen_content_tools && frame_is_intra_only(cm) &&
cpi->sf.part_sf.intra_cnn_split &&
- cm->seq_params.sb_size >= BLOCK_64X64 && bsize <= BLOCK_64X64 &&
+ cm->seq_params->sb_size >= BLOCK_64X64 && bsize <= BLOCK_64X64 &&
bsize >= BLOCK_8X8 &&
mi_row + mi_size_high[bsize] <= mi_params->mi_rows &&
mi_col + mi_size_wide[bsize] <= mi_params->mi_cols;
@@ -1483,8 +1687,9 @@ int evaluate_ab_partition_based_on_split(
}
void av1_prune_ab_partitions(
- const AV1_COMP *cpi, const MACROBLOCK *x, const PC_TREE *pc_tree,
- BLOCK_SIZE bsize, int pb_source_variance, int64_t best_rdcost,
+ AV1_COMP *cpi, const MACROBLOCK *x, const PC_TREE *pc_tree,
+ BLOCK_SIZE bsize, const int mi_row, const int mi_col,
+ int pb_source_variance, int64_t best_rdcost,
int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT],
int64_t split_rd[SUB_PARTITIONS_SPLIT],
const RD_RECT_PART_WIN_INFO *rect_part_win_info, int ext_partition_allowed,
@@ -1580,7 +1785,7 @@ void av1_prune_ab_partitions(
// TODO(huisu@google.com): x->source_variance may not be the current
// block's variance. The correct one to use is pb_source_variance. Need to
// re-train the model to fix it.
- av1_ml_prune_ab_partition(bsize, pc_tree->partitioning,
+ av1_ml_prune_ab_partition(cpi, bsize, mi_row, mi_col, pc_tree->partitioning,
get_unsigned_bits(x->source_variance),
best_rdcost, horz_rd, vert_rd, split_rd,
horza_partition_allowed, horzb_partition_allowed,
@@ -1617,4 +1822,390 @@ void av1_prune_ab_partitions(
}
}
+// Prepare features for the external model. Specifically, features after
+// ab partition is searched.
+static void prepare_features_after_part_ab(
+ const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
+ int part_ctx, int64_t best_rd,
+ int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT],
+ int64_t split_rd[SUB_PARTITIONS_SPLIT], unsigned int pb_source_variance,
+ int mi_row, int mi_col, aom_partition_features_t *const features) {
+ int64_t *horz_rd = rect_part_rd[HORZ];
+ int64_t *vert_rd = rect_part_rd[VERT];
+
+ aom_clear_system_state();
+
+ // Generate features.
+ int feature_index = 0;
+ features->after_part_ab.f[feature_index++] = (float)part_ctx;
+ features->after_part_ab.f[feature_index++] =
+ (float)get_unsigned_bits(pb_source_variance);
+
+ const int rdcost = (int)AOMMIN(INT_MAX, best_rd);
+ int sub_block_rdcost[8] = { 0 };
+ int rd_index = 0;
+ for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
+ if (horz_rd[i] > 0 && horz_rd[i] < 1000000000)
+ sub_block_rdcost[rd_index] = (int)horz_rd[i];
+ ++rd_index;
+ }
+ for (int i = 0; i < SUB_PARTITIONS_RECT; ++i) {
+ if (vert_rd[i] > 0 && vert_rd[i] < 1000000000)
+ sub_block_rdcost[rd_index] = (int)vert_rd[i];
+ ++rd_index;
+ }
+ for (int i = 0; i < SUB_PARTITIONS_SPLIT; ++i) {
+ if (split_rd[i] > 0 && split_rd[i] < 1000000000)
+ sub_block_rdcost[rd_index] = (int)split_rd[i];
+ ++rd_index;
+ }
+ for (int i = 0; i < 8; ++i) {
+ // Ratio between the sub-block RD and the whole-block RD.
+ float rd_ratio = 1.0f;
+ if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost)
+ rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost;
+ features->after_part_ab.f[feature_index++] = rd_ratio;
+ }
+
+ // Get variance of the 1:4 and 4:1 sub-blocks.
+ unsigned int horz_4_source_var[SUB_PARTITIONS_PART4] = { 0 };
+ unsigned int vert_4_source_var[SUB_PARTITIONS_PART4] = { 0 };
+ {
+ BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4);
+ BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4);
+ av1_setup_src_planes(x, cpi->source, mi_row, mi_col,
+ av1_num_planes(&cpi->common), bsize);
+ const int src_stride = x->plane[0].src.stride;
+ uint8_t *src = x->plane[0].src.buf;
+ const MACROBLOCKD *const xd = &x->e_mbd;
+
+ struct buf_2d horz_4_src, vert_4_src;
+ horz_4_src.stride = src_stride;
+ vert_4_src.stride = src_stride;
+
+ for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) {
+ horz_4_src.buf = src + i * block_size_high[horz_4_bs] * src_stride;
+ vert_4_src.buf = src + i * block_size_wide[vert_4_bs];
+
+ if (is_cur_buf_hbd(xd)) {
+ horz_4_source_var[i] = av1_high_get_sby_perpixel_variance(
+ cpi, &horz_4_src, horz_4_bs, xd->bd);
+ vert_4_source_var[i] = av1_high_get_sby_perpixel_variance(
+ cpi, &vert_4_src, vert_4_bs, xd->bd);
+ } else {
+ horz_4_source_var[i] =
+ av1_get_sby_perpixel_variance(cpi, &horz_4_src, horz_4_bs);
+ vert_4_source_var[i] =
+ av1_get_sby_perpixel_variance(cpi, &vert_4_src, vert_4_bs);
+ }
+ }
+ }
+
+ const float denom = (float)(pb_source_variance + 1);
+ const float low_b = 0.1f;
+ const float high_b = 10.0f;
+ for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) {
+ // Ratio between the 4:1 sub-block variance and the whole-block variance.
+ float var_ratio = (float)(horz_4_source_var[i] + 1) / denom;
+ if (var_ratio < low_b) var_ratio = low_b;
+ if (var_ratio > high_b) var_ratio = high_b;
+ features->after_part_ab.f[feature_index++] = var_ratio;
+ }
+ for (int i = 0; i < SUB_PARTITIONS_PART4; ++i) {
+ // Ratio between the 1:4 sub-block RD and the whole-block RD.
+ float var_ratio = (float)(vert_4_source_var[i] + 1) / denom;
+ if (var_ratio < low_b) var_ratio = low_b;
+ if (var_ratio > high_b) var_ratio = high_b;
+ features->after_part_ab.f[feature_index++] = var_ratio;
+ }
+ assert(feature_index == 18);
+}
+
+// If the external partition model is used, we let it determine partition
+// decisions before partition none. Specifically, these parameters:
+// partition_none_allowed
+// partition_horz_allowed
+// partition_vert_allowed
+// do_rectangular_split
+// do_square_split
+static bool ext_ml_model_decision_before_none(
+ AV1_COMP *cpi, const float features_from_motion[FEATURE_SIZE_SMS_SPLIT],
+ int *partition_none_allowed, int *partition_horz_allowed,
+ int *partition_vert_allowed, int *do_rectangular_split,
+ int *do_square_split) {
+ ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
+ if (!ext_part_controller->ready) return false;
+
+ // Setup features.
+ aom_partition_features_t features;
+ features.id = FEATURE_BEFORE_PART_NONE;
+ for (int i = 0; i < FEATURE_SIZE_SMS_SPLIT; ++i) {
+ features.before_part_none.f[i] = features_from_motion[i];
+ }
+
+ // Send necessary features to the external model.
+ av1_ext_part_send_features(ext_part_controller, &features);
+
+ // Get partition decisions from the external model.
+ aom_partition_decision_t decision;
+ const bool valid_decision =
+ av1_ext_part_get_partition_decision(ext_part_controller, &decision);
+ if (!valid_decision) return false;
+
+ // Populate decisions
+ *partition_none_allowed = decision.partition_none_allowed;
+ *partition_horz_allowed = decision.partition_rect_allowed[HORZ];
+ *partition_vert_allowed = decision.partition_rect_allowed[VERT];
+ *do_rectangular_split = decision.do_rectangular_split;
+ *do_square_split = decision.do_square_split;
+
+ return true;
+}
+
+// If the external partition model is used, we let it determine partition
+// decisions before partition none. Specifically, these parameters:
+// prune_horz
+// prune_vert
+static bool ext_ml_model_decision_before_none_part2(
+ AV1_COMP *cpi,
+ const float features_from_motion[FEATURE_SIZE_SMS_PRUNE_PART],
+ int *prune_horz, int *prune_vert) {
+ ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
+ if (!ext_part_controller->ready) return false;
+
+ // Setup features.
+ aom_partition_features_t features;
+ features.id = FEATURE_BEFORE_PART_NONE_PART2;
+ for (int i = 0; i < FEATURE_SIZE_SMS_PRUNE_PART; ++i) {
+ features.before_part_none.f_part2[i] = features_from_motion[i];
+ }
+
+ // Send necessary features to the external model.
+ av1_ext_part_send_features(ext_part_controller, &features);
+
+ // Get partition decisions from the external model.
+ aom_partition_decision_t decision;
+ const bool valid_decision =
+ av1_ext_part_get_partition_decision(ext_part_controller, &decision);
+ if (!valid_decision) return false;
+
+ // Populate decisions
+ *prune_horz = decision.prune_rect_part[HORZ];
+ *prune_vert = decision.prune_rect_part[VERT];
+
+ return true;
+}
+
+// If the external partition model is used, we let it determine partition
+// decisions after none partition. Specifically, these parameters:
+// do_square_split
+// do_rectangular_split
+bool ext_ml_model_decision_after_none(
+ ExtPartController *const ext_part_controller, const int is_intra_frame,
+ const float *const features_after_none, int *do_square_split,
+ int *do_rectangular_split) {
+ if (!ext_part_controller->ready || is_intra_frame) return false;
+
+ // Setup features.
+ aom_partition_features_t features;
+ features.id = FEATURE_AFTER_PART_NONE;
+ for (int i = 0; i < 4; ++i) {
+ features.after_part_none.f[i] = features_after_none[i];
+ }
+
+ // Send necessary features to the external model.
+ av1_ext_part_send_features(ext_part_controller, &features);
+
+ // Get partition decisions from the external model.
+ aom_partition_decision_t decision;
+ const bool valid_decision =
+ av1_ext_part_get_partition_decision(ext_part_controller, &decision);
+ if (!valid_decision) return false;
+
+ // Populate decisions
+ *do_square_split = decision.do_square_split;
+ *do_rectangular_split = decision.do_rectangular_split;
+
+ return true;
+}
+
+// If the external partition model is used, we let it determine partition
+// decisions after none partition. Specifically, these parameters:
+// terminate_partition_search
+bool ext_ml_model_decision_after_none_part2(
+ AV1_COMP *const cpi, const float *const features_terminate,
+ int *terminate_partition_search) {
+ AV1_COMMON *const cm = &cpi->common;
+ ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
+ if (!ext_part_controller->ready || frame_is_intra_only(cm)) return false;
+
+ // Setup features.
+ aom_partition_features_t features;
+ features.id = FEATURE_AFTER_PART_NONE_PART2;
+ for (int i = 0; i < FEATURE_SIZE_SMS_TERM_NONE; ++i) {
+ features.after_part_none.f_terminate[i] = features_terminate[i];
+ }
+
+ // Send necessary features to the external model.
+ av1_ext_part_send_features(ext_part_controller, &features);
+
+ // Get partition decisions from the external model.
+ aom_partition_decision_t decision;
+ const bool valid_decision =
+ av1_ext_part_get_partition_decision(ext_part_controller, &decision);
+ if (!valid_decision) return false;
+
+ // Populate decisions
+ *terminate_partition_search = decision.terminate_partition_search;
+
+ return true;
+}
+
+// If the external partition model is used, we let it determine partition
+// decisions after none partition. Specifically, these parameters:
+// terminate_partition_search
+bool ext_ml_model_decision_after_split(AV1_COMP *const cpi,
+ const float *const features_terminate,
+ int *terminate_partition_search) {
+ const AV1_COMMON *const cm = &cpi->common;
+ ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
+ if (frame_is_intra_only(cm) || !cpi->ext_part_controller.ready) {
+ return false;
+ }
+
+ // Setup features.
+ aom_partition_features_t features;
+ features.id = FEATURE_AFTER_PART_SPLIT;
+ for (int i = 0; i < 31; ++i) {
+ features.after_part_split.f_terminate[i] = features_terminate[i];
+ }
+
+ // Send necessary features to the external model.
+ av1_ext_part_send_features(ext_part_controller, &features);
+
+ // Get partition decisions from the external model.
+ aom_partition_decision_t decision;
+ const bool valid_decision =
+ av1_ext_part_get_partition_decision(ext_part_controller, &decision);
+ if (!valid_decision) return false;
+
+ // Populate decisions
+ *terminate_partition_search = decision.terminate_partition_search;
+
+ return true;
+}
+
+// If the external partition model is used, we let it determine partition
+// decisions after none partition. Specifically, these parameters:
+// prune_rect_part[HORZ]
+// prune_rect_part[VERT]
+bool ext_ml_model_decision_after_split_part2(
+ ExtPartController *const ext_part_controller, const int is_intra_frame,
+ const float *const features_prune, int *prune_rect_part_horz,
+ int *prune_rect_part_vert) {
+ if (is_intra_frame || !ext_part_controller->ready) {
+ return false;
+ }
+
+ // Setup features.
+ aom_partition_features_t features;
+ features.id = FEATURE_AFTER_PART_SPLIT_PART2;
+ for (int i = 0; i < 9; ++i) {
+ features.after_part_split.f_prune_rect[i] = features_prune[i];
+ }
+
+ // Send necessary features to the external model.
+ av1_ext_part_send_features(ext_part_controller, &features);
+
+ // Get partition decisions from the external model.
+ aom_partition_decision_t decision;
+ const bool valid_decision =
+ av1_ext_part_get_partition_decision(ext_part_controller, &decision);
+ if (!valid_decision) return false;
+
+ // Populate decisions
+ *prune_rect_part_horz = decision.prune_rect_part[0];
+ *prune_rect_part_vert = decision.prune_rect_part[1];
+
+ return true;
+}
+
+// If the external partition model is used, we let it determine partition
+// decisions after rectangular partition. Specifically, these parameters:
+// horza_partition_allowed
+// horzb_partition_allowed
+// verta_partition_allowed
+// vertb_partition_allowed
+static bool ext_ml_model_decision_after_rect(
+ ExtPartController *const ext_part_controller, const int is_intra_frame,
+ const float *const features_after_rect, int *horza_partition_allowed,
+ int *horzb_partition_allowed, int *verta_partition_allowed,
+ int *vertb_partition_allowed) {
+ if (is_intra_frame || !ext_part_controller->ready) return false;
+
+ // Setup features.
+ aom_partition_features_t features;
+ features.id = FEATURE_AFTER_PART_RECT;
+ for (int i = 0; i < 10; ++i) {
+ features.after_part_rect.f[i] = features_after_rect[i];
+ }
+
+ // Send necessary features to the external model.
+ av1_ext_part_send_features(ext_part_controller, &features);
+
+ // Get partition decisions from the external model.
+ aom_partition_decision_t decision;
+ const bool valid_decision =
+ av1_ext_part_get_partition_decision(ext_part_controller, &decision);
+ if (!valid_decision) return false;
+
+ // Populate decisions
+ *horza_partition_allowed = decision.horza_partition_allowed;
+ *horzb_partition_allowed = decision.horzb_partition_allowed;
+ *verta_partition_allowed = decision.verta_partition_allowed;
+ *vertb_partition_allowed = decision.vertb_partition_allowed;
+
+ return true;
+}
+
+// If the external partition model is used, we let it determine partition
+// decisions after AB partition. Specifically, these parameters:
+// partition_vert4_allowed
+// partition_horz4_allowed
+static bool ext_ml_model_decision_after_part_ab(
+ AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int part_ctx,
+ int64_t best_rd, int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT],
+ int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const partition_horz4_allowed,
+ int *const partition_vert4_allowed, unsigned int pb_source_variance,
+ int mi_row, int mi_col) {
+ const AV1_COMMON *const cm = &cpi->common;
+ ExtPartController *const ext_part_controller = &cpi->ext_part_controller;
+
+ if (!frame_is_intra_only(cm) && ext_part_controller->ready) {
+ // Setup features.
+ aom_partition_features_t features;
+ features.id = FEATURE_AFTER_PART_AB;
+ prepare_features_after_part_ab(cpi, x, bsize, part_ctx, best_rd,
+ rect_part_rd, split_rd, pb_source_variance,
+ mi_row, mi_col, &features);
+
+ // Send necessary features to the external model.
+ av1_ext_part_send_features(ext_part_controller, &features);
+
+ // Get partition decisions from the external model.
+ aom_partition_decision_t decision;
+ const bool valid_decision =
+ av1_ext_part_get_partition_decision(ext_part_controller, &decision);
+ if (!valid_decision) return false;
+
+ // Populate decisions
+ *partition_horz4_allowed = decision.partition_horz4_allowed;
+ *partition_vert4_allowed = decision.partition_vert4_allowed;
+
+ return true;
+ }
+
+ return false;
+}
+
#endif // !CONFIG_REALTIME_ONLY
diff --git a/third_party/libaom/source/libaom/av1/encoder/partition_strategy.h b/third_party/libaom/source/libaom/av1/encoder/partition_strategy.h
index 0527a944cd..ed66a364d9 100644
--- a/third_party/libaom/source/libaom/av1/encoder/partition_strategy.h
+++ b/third_party/libaom/source/libaom/av1/encoder/partition_strategy.h
@@ -13,58 +13,10 @@
#define AOM_AV1_ENCODER_PARTITION_STRATEGY_H_
#include "av1/encoder/encodeframe.h"
+#include "av1/encoder/encodeframe_utils.h"
#include "av1/encoder/encodemb.h"
#include "av1/encoder/encoder.h"
-#define FEATURE_SIZE_SMS_SPLIT_FAST 6
-#define FEATURE_SIZE_SMS_SPLIT 17
-#define FEATURE_SIZE_SMS_PRUNE_PART 25
-#define FEATURE_SIZE_SMS_TERM_NONE 28
-#define FEATURE_SIZE_FP_SMS_TERM_NONE 20
-#define FEATURE_SIZE_MAX_MIN_PART_PRED 13
-#define MAX_NUM_CLASSES_MAX_MIN_PART_PRED 4
-
-#define FEATURE_SMS_NONE_FLAG 1
-#define FEATURE_SMS_SPLIT_FLAG (1 << 1)
-#define FEATURE_SMS_RECT_FLAG (1 << 2)
-
-#define FEATURE_SMS_PRUNE_PART_FLAG \
- (FEATURE_SMS_NONE_FLAG | FEATURE_SMS_SPLIT_FLAG | FEATURE_SMS_RECT_FLAG)
-#define FEATURE_SMS_SPLIT_MODEL_FLAG \
- (FEATURE_SMS_NONE_FLAG | FEATURE_SMS_SPLIT_FLAG)
-
-// Number of sub-partitions in rectangular partition types.
-#define SUB_PARTITIONS_RECT 2
-
-// Number of sub-partitions in split partition type.
-#define SUB_PARTITIONS_SPLIT 4
-
-// Number of sub-partitions in AB partition types.
-#define SUB_PARTITIONS_AB 3
-
-// Number of sub-partitions in 4-way partition types.
-#define SUB_PARTITIONS_PART4 4
-
-// 4part parition types.
-enum { HORZ4 = 0, VERT4, NUM_PART4_TYPES } UENUM1BYTE(PART4_TYPES);
-
-// AB parition types.
-enum {
- HORZ_A = 0,
- HORZ_B,
- VERT_A,
- VERT_B,
- NUM_AB_PARTS
-} UENUM1BYTE(AB_PART_TYPE);
-
-// Rectangular parition types.
-enum { HORZ = 0, VERT, NUM_RECT_PARTS } UENUM1BYTE(RECT_PART_TYPE);
-
-// Structure to keep win flags for HORZ and VERT partition evaluations.
-typedef struct {
- int rect_part_win[NUM_RECT_PARTS];
-} RD_RECT_PART_WIN_INFO;
-
void av1_intra_mode_cnn_partition(const AV1_COMMON *const cm, MACROBLOCK *x,
int bsize, int label_idx,
int *partition_none_allowed,
@@ -129,16 +81,18 @@ void av1_ml_early_term_after_split(AV1_COMP *const cpi, MACROBLOCK *const x,
// no information about rectangular partitions. Preliminary experiments suggest
// that we can get better performance by adding in q_index and rectangular
// sse/var from SMS. We should retrain and tune this model later.
-void av1_ml_prune_rect_partition(const AV1_COMP *const cpi,
- const MACROBLOCK *const x, BLOCK_SIZE bsize,
- int64_t best_rd, int64_t none_rd,
- int64_t *split_rd, int *const dst_prune_horz,
+void av1_ml_prune_rect_partition(AV1_COMP *const cpi, const MACROBLOCK *const x,
+ BLOCK_SIZE bsize, const int mi_row,
+ const int mi_col, int64_t best_rd,
+ int64_t none_rd, int64_t *split_rd,
+ int *const dst_prune_horz,
int *const dst_prune_vert);
// Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be
// considered.
void av1_ml_prune_ab_partition(
- BLOCK_SIZE bsize, int part_ctx, int var_ctx, int64_t best_rd,
+ AV1_COMP *const cpi, BLOCK_SIZE bsize, const int mi_row, const int mi_col,
+ int part_ctx, int var_ctx, int64_t best_rd,
int64_t horz_rd[SUB_PARTITIONS_RECT], int64_t vert_rd[SUB_PARTITIONS_RECT],
int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const horza_partition_allowed,
int *const horzb_partition_allowed, int *const verta_partition_allowed,
@@ -146,18 +100,19 @@ void av1_ml_prune_ab_partition(
// Use a ML model to predict if horz4 and vert4 should be considered.
void av1_ml_prune_4_partition(
- const AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize,
- int part_ctx, int64_t best_rd,
- int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT],
+ AV1_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int part_ctx,
+ int64_t best_rd, int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT],
int64_t split_rd[SUB_PARTITIONS_SPLIT], int *const partition_horz4_allowed,
int *const partition_vert4_allowed, unsigned int pb_source_variance,
int mi_row, int mi_col);
// ML-based partition search breakout after PARTITION_NONE.
-int av1_ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
- const MACROBLOCK *const x,
- const RD_STATS *const rd_stats,
- unsigned int pb_source_variance, int bit_depth);
+void av1_ml_predict_breakout(AV1_COMP *const cpi, BLOCK_SIZE bsize,
+ const MACROBLOCK *const x,
+ const RD_STATS *const rd_stats,
+ const PartitionBlkParams blk_params,
+ unsigned int pb_source_variance, int bit_depth,
+ int *do_square_split, int *do_rectangular_split);
// The first round of partition pruning determined before any partition
// has been tested. The decisions will be updated and passed back
@@ -183,8 +138,9 @@ void av1_prune_partitions_by_max_min_bsize(
// Prune out AB partitions based on rd decisions made from testing the
// basic partitions.
void av1_prune_ab_partitions(
- const AV1_COMP *cpi, const MACROBLOCK *x, const PC_TREE *pc_tree,
- BLOCK_SIZE bsize, int pb_source_variance, int64_t best_rdcost,
+ AV1_COMP *cpi, const MACROBLOCK *x, const PC_TREE *pc_tree,
+ BLOCK_SIZE bsize, const int mi_row, const int mi_col,
+ int pb_source_variance, int64_t best_rdcost,
int64_t rect_part_rd[NUM_RECT_PARTS][SUB_PARTITIONS_RECT],
int64_t split_rd[SUB_PARTITIONS_SPLIT],
const RD_RECT_PART_WIN_INFO *rect_part_win_info, int ext_partition_allowed,
@@ -261,22 +217,66 @@ static INLINE int is_full_sb(const CommonModeInfoParams *const mi_params,
(mi_col + sb_mi_wide) <= mi_params->mi_cols;
}
+#if !CONFIG_REALTIME_ONLY
// Do not use this criteria for screen content videos.
// Since screen content videos could often find good predictors and the largest
// block size is likely to be used.
static INLINE int use_auto_max_partition(const AV1_COMP *const cpi,
BLOCK_SIZE sb_size, int mi_row,
int mi_col) {
- assert(IMPLIES(cpi->gf_group.size > 0,
- cpi->gf_group.index < cpi->gf_group.size));
+ assert(IMPLIES(cpi->ppi->gf_group.size > 0,
+ cpi->gf_frame_index < cpi->ppi->gf_group.size));
const AV1_COMMON *const cm = &cpi->common;
return !frame_is_intra_only(cm) && !cpi->use_screen_content_tools &&
cpi->sf.part_sf.auto_max_partition_based_on_simple_motion !=
NOT_IN_USE &&
sb_size == BLOCK_128X128 &&
is_full_sb(&cm->mi_params, mi_row, mi_col, sb_size) &&
- cpi->gf_group.update_type[cpi->gf_group.index] != OVERLAY_UPDATE &&
- cpi->gf_group.update_type[cpi->gf_group.index] != INTNL_OVERLAY_UPDATE;
+ cpi->ppi->gf_group.update_type[cpi->gf_frame_index] !=
+ OVERLAY_UPDATE &&
+ cpi->ppi->gf_group.update_type[cpi->gf_frame_index] !=
+ INTNL_OVERLAY_UPDATE;
}
+static BLOCK_SIZE dim_to_size(int dim) {
+ switch (dim) {
+ case 4: return BLOCK_4X4;
+ case 8: return BLOCK_8X8;
+ case 16: return BLOCK_16X16;
+ case 32: return BLOCK_32X32;
+ case 64: return BLOCK_64X64;
+ case 128: return BLOCK_128X128;
+ default: assert(0); return 0;
+ }
+}
+
+static AOM_INLINE void set_max_min_partition_size(SuperBlockEnc *sb_enc,
+ AV1_COMP *cpi, MACROBLOCK *x,
+ const SPEED_FEATURES *sf,
+ BLOCK_SIZE sb_size,
+ int mi_row, int mi_col) {
+ const AV1_COMMON *cm = &cpi->common;
+
+ sb_enc->max_partition_size =
+ AOMMIN(sf->part_sf.default_max_partition_size,
+ dim_to_size(cpi->oxcf.part_cfg.max_partition_size));
+ sb_enc->min_partition_size =
+ AOMMAX(sf->part_sf.default_min_partition_size,
+ dim_to_size(cpi->oxcf.part_cfg.min_partition_size));
+ sb_enc->max_partition_size =
+ AOMMIN(sb_enc->max_partition_size, cm->seq_params->sb_size);
+ sb_enc->min_partition_size =
+ AOMMIN(sb_enc->min_partition_size, cm->seq_params->sb_size);
+
+ if (use_auto_max_partition(cpi, sb_size, mi_row, mi_col)) {
+ float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f };
+
+ av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features);
+ sb_enc->max_partition_size =
+ AOMMAX(AOMMIN(av1_predict_max_partition(cpi, x, features),
+ sb_enc->max_partition_size),
+ sb_enc->min_partition_size);
+ }
+}
+#endif // !CONFIG_REALTIME_ONLY
#endif // AOM_AV1_ENCODER_PARTITION_STRATEGY_H_
diff --git a/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.c b/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.c
index 804fb3a510..e3639f7784 100644
--- a/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.c
+++ b/third_party/libaom/source/libaom/av1/encoder/pass2_strategy.c
@@ -43,6 +43,13 @@
#define DEFAULT_KF_BOOST 2300
#define DEFAULT_GF_BOOST 2000
#define GROUP_ADAPTIVE_MAXQ 1
+
+static INLINE int is_fp_stats_to_predict_flat_gop_invalid(
+ const FIRSTPASS_STATS *fp_stats) {
+ return ((fp_stats->tr_coded_error < 0) || (fp_stats->pcnt_third_ref < 0) ||
+ (fp_stats->frame_avg_wavelet_energy < 0));
+}
+
static void init_gf_stats(GF_GROUP_STATS *gf_stats);
// Calculate an active area of the image that discounts formatting
@@ -182,7 +189,7 @@ static double calc_correction_factor(double err_per_mb, int q) {
// Based on history adjust expectations of bits per macroblock.
static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
- TWO_PASS *twopass = &cpi->twopass;
+ TWO_PASS *twopass = &cpi->ppi->twopass;
const RATE_CONTROL *const rc = &cpi->rc;
int err_estimate = rc->rate_error_estimate;
@@ -194,14 +201,14 @@ static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
const double max_fac = 1.0 + adj_limit;
if (rc->vbr_bits_off_target && rc->total_actual_bits > 0) {
- if (cpi->lap_enabled) {
+ if (cpi->ppi->lap_enabled) {
rate_err_factor =
(double)twopass->rolling_arf_group_actual_bits /
DOUBLE_DIVIDE_CHECK((double)twopass->rolling_arf_group_target_bits);
} else {
rate_err_factor =
1.0 - ((double)(rc->vbr_bits_off_target) /
- AOMMAX(rc->total_actual_bits, cpi->twopass.bits_left));
+ AOMMAX(rc->total_actual_bits, cpi->ppi->twopass.bits_left));
}
rate_err_factor = AOMMAX(min_fac, AOMMIN(max_fac, rate_err_factor));
@@ -209,7 +216,7 @@ static void twopass_update_bpm_factor(AV1_COMP *cpi, int rate_err_tol) {
// Adjustment is damped if this is 1 pass with look ahead processing
// (as there are only ever a few frames of data) and for all but the first
// GOP in normal two pass.
- if ((twopass->bpm_factor != 1.0) || cpi->lap_enabled) {
+ if ((twopass->bpm_factor != 1.0) || cpi->ppi->lap_enabled) {
rate_err_factor = 1.0 + ((rate_err_factor - 1.0) / damp_fac);
}
}
@@ -302,9 +309,9 @@ static int get_twopass_worst_quality(AV1_COMP *cpi, const double av_frame_err,
// Try and pick a max Q that will be high enough to encode the
// content at the given rate.
int q = find_qindex_by_rate_with_correction(
- target_norm_bits_per_mb, cpi->common.seq_params.bit_depth,
- av_err_per_mb, cpi->twopass.bpm_factor, rate_err_tol, rc->best_quality,
- rc->worst_quality);
+ target_norm_bits_per_mb, cpi->common.seq_params->bit_depth,
+ av_err_per_mb, cpi->ppi->twopass.bpm_factor, rate_err_tol,
+ rc->best_quality, rc->worst_quality);
// Restriction on active max q for constrained quality mode.
if (rc_cfg->mode == AOM_CQ) q = AOMMAX(q, rc_cfg->cq_level);
@@ -312,57 +319,63 @@ static int get_twopass_worst_quality(AV1_COMP *cpi, const double av_frame_err,
}
}
-#define SR_DIFF_PART 0.0015
-#define MOTION_AMP_PART 0.003
#define INTRA_PART 0.005
#define DEFAULT_DECAY_LIMIT 0.75
#define LOW_SR_DIFF_TRHESH 0.1
-#define SR_DIFF_MAX 128.0
#define NCOUNT_FRAME_II_THRESH 5.0
+#define LOW_CODED_ERR_PER_MB 10.0
-static double get_sr_decay_rate(const FRAME_INFO *frame_info,
- const FIRSTPASS_STATS *frame) {
- const int num_mbs = frame_info->num_mbs;
- double sr_diff = (frame->sr_coded_error - frame->coded_error) / num_mbs;
+/* This function considers how the quality of prediction may be deteriorating
+ * with distance. It comapres the coded error for the last frame and the
+ * second reference frame (usually two frames old) and also applies a factor
+ * based on the extent of INTRA coding.
+ *
+ * The decay factor is then used to reduce the contribution of frames further
+ * from the alt-ref or golden frame, to the bitframe boost calculation for that
+ * alt-ref or golden frame.
+ */
+static double get_sr_decay_rate(const FIRSTPASS_STATS *frame) {
+ double sr_diff = (frame->sr_coded_error - frame->coded_error);
double sr_decay = 1.0;
double modified_pct_inter;
double modified_pcnt_intra;
- const double motion_amplitude_factor =
- frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / 2);
modified_pct_inter = frame->pcnt_inter;
- if ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
- (double)NCOUNT_FRAME_II_THRESH) {
+ if ((frame->coded_error > LOW_CODED_ERR_PER_MB) &&
+ ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
+ (double)NCOUNT_FRAME_II_THRESH)) {
modified_pct_inter = frame->pcnt_inter - frame->pcnt_neutral;
}
modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);
if ((sr_diff > LOW_SR_DIFF_TRHESH)) {
- sr_diff = AOMMIN(sr_diff, SR_DIFF_MAX);
- sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) -
- (MOTION_AMP_PART * motion_amplitude_factor) -
- (INTRA_PART * modified_pcnt_intra);
+ double sr_diff_part = ((sr_diff * 0.25) / frame->intra_error);
+ sr_decay = 1.0 - sr_diff_part - (INTRA_PART * modified_pcnt_intra);
}
- return AOMMAX(sr_decay, AOMMIN(DEFAULT_DECAY_LIMIT, modified_pct_inter));
+ return AOMMAX(sr_decay, DEFAULT_DECAY_LIMIT);
}
// This function gives an estimate of how badly we believe the prediction
// quality is decaying from frame to frame.
-static double get_zero_motion_factor(const FRAME_INFO *frame_info,
- const FIRSTPASS_STATS *frame) {
+static double get_zero_motion_factor(const FIRSTPASS_STATS *frame) {
const double zero_motion_pct = frame->pcnt_inter - frame->pcnt_motion;
- double sr_decay = get_sr_decay_rate(frame_info, frame);
+ double sr_decay = get_sr_decay_rate(frame);
return AOMMIN(sr_decay, zero_motion_pct);
}
-#define ZM_POWER_FACTOR 0.75
+#define DEFAULT_ZM_FACTOR 0.5
+static double get_prediction_decay_rate(const FIRSTPASS_STATS *frame_stats) {
+ const double sr_decay_rate = get_sr_decay_rate(frame_stats);
+ double zero_motion_factor =
+ DEFAULT_ZM_FACTOR * (frame_stats->pcnt_inter - frame_stats->pcnt_motion);
-static double get_prediction_decay_rate(const FRAME_INFO *frame_info,
- const FIRSTPASS_STATS *next_frame) {
- const double sr_decay_rate = get_sr_decay_rate(frame_info, next_frame);
- const double zero_motion_factor =
- (0.95 * pow((next_frame->pcnt_inter - next_frame->pcnt_motion),
- ZM_POWER_FACTOR));
+ // Clamp value to range 0.0 to 1.0
+ // This should happen anyway if input values are sensibly clamped but checked
+ // here just in case.
+ if (zero_motion_factor > 1.0)
+ zero_motion_factor = 1.0;
+ else if (zero_motion_factor < 0.0)
+ zero_motion_factor = 0.0;
return AOMMAX(zero_motion_factor,
(sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
@@ -449,7 +462,6 @@ static void accumulate_this_frame_stats(const FIRSTPASS_STATS *stats,
}
static void accumulate_next_frame_stats(const FIRSTPASS_STATS *stats,
- const FRAME_INFO *frame_info,
const int flash_detected,
const int frames_since_key,
const int cur_idx,
@@ -470,16 +482,15 @@ static void accumulate_next_frame_stats(const FIRSTPASS_STATS *stats,
// Accumulate the effect of prediction quality decay
if (!flash_detected) {
gf_stats->last_loop_decay_rate = gf_stats->loop_decay_rate;
- gf_stats->loop_decay_rate = get_prediction_decay_rate(frame_info, stats);
+ gf_stats->loop_decay_rate = get_prediction_decay_rate(stats);
gf_stats->decay_accumulator =
gf_stats->decay_accumulator * gf_stats->loop_decay_rate;
// Monitor for static sections.
if ((frames_since_key + cur_idx - 1) > 1) {
- gf_stats->zero_motion_accumulator =
- AOMMIN(gf_stats->zero_motion_accumulator,
- get_zero_motion_factor(frame_info, stats));
+ gf_stats->zero_motion_accumulator = AOMMIN(
+ gf_stats->zero_motion_accumulator, get_zero_motion_factor(stats));
}
}
}
@@ -618,8 +629,8 @@ static double calc_kf_frame_boost(const RATE_CONTROL *rc,
return AOMMIN(frame_boost, max_boost * boost_q_correction);
}
-static int get_projected_gfu_boost(const RATE_CONTROL *rc, int gfu_boost,
- int frames_to_project,
+static int get_projected_gfu_boost(const PRIMARY_RATE_CONTROL *p_rc,
+ int gfu_boost, int frames_to_project,
int num_stats_used_for_gfu_boost) {
/*
* If frames_to_project is equal to num_stats_used_for_gfu_boost,
@@ -629,7 +640,7 @@ static int get_projected_gfu_boost(const RATE_CONTROL *rc, int gfu_boost,
*/
if (num_stats_used_for_gfu_boost >= frames_to_project) return gfu_boost;
- double min_boost_factor = sqrt(rc->baseline_gf_interval);
+ double min_boost_factor = sqrt(p_rc->baseline_gf_interval);
// Get the current tpl factor (number of frames = frames_to_project).
double tpl_factor = av1_get_gfu_boost_projection_factor(
min_boost_factor, MAX_GFUBOOST_FACTOR, frames_to_project);
@@ -642,11 +653,13 @@ static int get_projected_gfu_boost(const RATE_CONTROL *rc, int gfu_boost,
}
#define GF_MAX_BOOST 90.0
+#define GF_MIN_BOOST 50
#define MIN_DECAY_FACTOR 0.01
-int av1_calc_arf_boost(const TWO_PASS *twopass, const RATE_CONTROL *rc,
+int av1_calc_arf_boost(const TWO_PASS *twopass,
+ const PRIMARY_RATE_CONTROL *p_rc, const RATE_CONTROL *rc,
FRAME_INFO *frame_info, int offset, int f_frames,
int b_frames, int *num_fpstats_used,
- int *num_fpstats_required) {
+ int *num_fpstats_required, int project_gfu_boost) {
int i;
GF_GROUP_STATS gf_stats;
init_gf_stats(&gf_stats);
@@ -670,8 +683,7 @@ int av1_calc_arf_boost(const TWO_PASS *twopass, const RATE_CONTROL *rc,
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
- gf_stats.decay_accumulator *=
- get_prediction_decay_rate(frame_info, this_frame);
+ gf_stats.decay_accumulator *= get_prediction_decay_rate(this_frame);
gf_stats.decay_accumulator = gf_stats.decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR
: gf_stats.decay_accumulator;
@@ -704,8 +716,7 @@ int av1_calc_arf_boost(const TWO_PASS *twopass, const RATE_CONTROL *rc,
// Cumulative effect of prediction quality decay.
if (!flash_detected) {
- gf_stats.decay_accumulator *=
- get_prediction_decay_rate(frame_info, this_frame);
+ gf_stats.decay_accumulator *= get_prediction_decay_rate(this_frame);
gf_stats.decay_accumulator = gf_stats.decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR
: gf_stats.decay_accumulator;
@@ -719,16 +730,16 @@ int av1_calc_arf_boost(const TWO_PASS *twopass, const RATE_CONTROL *rc,
}
arf_boost += (int)boost_score;
- if (num_fpstats_required) {
+ if (project_gfu_boost) {
+ assert(num_fpstats_required != NULL);
+ assert(num_fpstats_used != NULL);
*num_fpstats_required = f_frames + b_frames;
- if (num_fpstats_used) {
- arf_boost = get_projected_gfu_boost(rc, arf_boost, *num_fpstats_required,
- *num_fpstats_used);
- }
+ arf_boost = get_projected_gfu_boost(p_rc, arf_boost, *num_fpstats_required,
+ *num_fpstats_used);
}
- if (arf_boost < ((b_frames + f_frames) * 50))
- arf_boost = ((b_frames + f_frames) * 50);
+ if (arf_boost < ((b_frames + f_frames) * GF_MIN_BOOST))
+ arf_boost = ((b_frames + f_frames) * GF_MIN_BOOST);
return arf_boost;
}
@@ -767,7 +778,8 @@ static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin,
static int64_t calculate_total_gf_group_bits(AV1_COMP *cpi,
double gf_group_err) {
const RATE_CONTROL *const rc = &cpi->rc;
- const TWO_PASS *const twopass = &cpi->twopass;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ const TWO_PASS *const twopass = &cpi->ppi->twopass;
const int max_bits = frame_max_bits(rc, &cpi->oxcf);
int64_t total_group_bits;
@@ -787,8 +799,8 @@ static int64_t calculate_total_gf_group_bits(AV1_COMP *cpi,
: total_group_bits;
// Clip based on user supplied data rate variability limit.
- if (total_group_bits > (int64_t)max_bits * rc->baseline_gf_interval)
- total_group_bits = (int64_t)max_bits * rc->baseline_gf_interval;
+ if (total_group_bits > (int64_t)max_bits * p_rc->baseline_gf_interval)
+ total_group_bits = (int64_t)max_bits * p_rc->baseline_gf_interval;
return total_group_bits;
}
@@ -834,7 +846,8 @@ static int adjust_boost_bits_for_target_level(const AV1_COMP *const cpi,
int64_t group_bits,
int frame_type) {
const AV1_COMMON *const cm = &cpi->common;
- const SequenceHeader *const seq_params = &cm->seq_params;
+ const SequenceHeader *const seq_params = cm->seq_params;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const int temporal_layer_id = cm->temporal_layer_id;
const int spatial_layer_id = cm->spatial_layer_id;
for (int index = 0; index < seq_params->operating_points_cnt_minus_1 + 1;
@@ -845,7 +858,7 @@ static int adjust_boost_bits_for_target_level(const AV1_COMP *const cpi,
}
const AV1_LEVEL target_level =
- cpi->level_params.target_seq_level_idx[index];
+ cpi->ppi->level_params.target_seq_level_idx[index];
if (target_level >= SEQ_LEVELS) continue;
assert(is_valid_seq_level_idx(target_level));
@@ -859,18 +872,20 @@ static int adjust_boost_bits_for_target_level(const AV1_COMP *const cpi,
const int level_enforced_max_kf_bits = target_bits_per_frame * 8;
if (bits_assigned > level_enforced_max_kf_bits) {
const int frames = rc->frames_to_key - 1;
- rc->kf_boost = calculate_boost_factor(
+ p_rc->kf_boost = calculate_boost_factor(
frames, level_enforced_max_kf_bits, group_bits);
- bits_assigned = calculate_boost_bits(frames, rc->kf_boost, group_bits);
+ bits_assigned =
+ calculate_boost_bits(frames, p_rc->kf_boost, group_bits);
}
} else if (frame_type == 1) {
// Maximum bits for arf is 4 times the target_bits_per_frame.
const int level_enforced_max_arf_bits = target_bits_per_frame * 4;
if (bits_assigned > level_enforced_max_arf_bits) {
- rc->gfu_boost = calculate_boost_factor(
- rc->baseline_gf_interval, level_enforced_max_arf_bits, group_bits);
- bits_assigned = calculate_boost_bits(rc->baseline_gf_interval,
- rc->gfu_boost, group_bits);
+ p_rc->gfu_boost =
+ calculate_boost_factor(p_rc->baseline_gf_interval,
+ level_enforced_max_arf_bits, group_bits);
+ bits_assigned = calculate_boost_bits(p_rc->baseline_gf_interval,
+ p_rc->gfu_boost, group_bits);
}
} else {
assert(0);
@@ -883,7 +898,9 @@ static int adjust_boost_bits_for_target_level(const AV1_COMP *const cpi,
// Allocate bits to each frame in a GF / ARF group
double layer_fraction[MAX_ARF_LAYERS + 1] = { 1.0, 0.70, 0.55, 0.60,
0.60, 1.0, 1.0 };
-static void allocate_gf_group_bits(GF_GROUP *gf_group, RATE_CONTROL *const rc,
+static void allocate_gf_group_bits(GF_GROUP *gf_group,
+ PRIMARY_RATE_CONTROL *const p_rc,
+ RATE_CONTROL *const rc,
int64_t gf_group_bits, int gf_arf_bits,
int key_frame, int use_arf) {
int64_t total_group_bits = gf_group_bits;
@@ -900,7 +917,7 @@ static void allocate_gf_group_bits(GF_GROUP *gf_group, RATE_CONTROL *const rc,
if (use_arf) total_group_bits -= gf_arf_bits;
int num_frames =
- AOMMAX(1, rc->baseline_gf_interval - (rc->frames_since_key == 0));
+ AOMMAX(1, p_rc->baseline_gf_interval - (rc->frames_since_key == 0));
base_frame_bits = (int)(total_group_bits / num_frames);
// Check the number of frames in each layer in case we have a
@@ -943,7 +960,8 @@ static void allocate_gf_group_bits(GF_GROUP *gf_group, RATE_CONTROL *const rc,
// in the next GOP. For GF group, next GOP will overwrite the rate allocation.
// Setting this frame to use 0 bit (of out the current GOP budget) will
// simplify logics in reference frame management.
- gf_group->bit_allocation[gf_group_size] = 0;
+ if (gf_group_size < MAX_STATIC_GF_GROUP_LENGTH)
+ gf_group->bit_allocation[gf_group_size] = 0;
}
// Returns true if KF group and GF group both are almost completely static.
@@ -967,7 +985,7 @@ static INLINE int detect_gf_cut(AV1_COMP *cpi, int frame_index, int cur_start,
int active_min_gf_interval,
GF_GROUP_STATS *gf_stats) {
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
InitialDimensions *const initial_dimensions = &cpi->initial_dimensions;
// Motion breakout threshold for loop below depends on image size.
const double mv_ratio_accumulator_thresh =
@@ -997,12 +1015,71 @@ static INLINE int detect_gf_cut(AV1_COMP *cpi, int frame_index, int cur_start,
// so we can continue for more frames.
if (((frame_index - cur_start) >= active_max_gf_interval + 1) &&
!is_almost_static(gf_stats->zero_motion_accumulator,
- twopass->kf_zeromotion_pct, cpi->lap_enabled)) {
+ twopass->kf_zeromotion_pct, cpi->ppi->lap_enabled)) {
return 1;
}
return 0;
}
+static int is_shorter_gf_interval_better(AV1_COMP *cpi,
+ EncodeFrameParams *frame_params,
+ const EncodeFrameInput *frame_input) {
+ RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ int gop_length_decision_method = cpi->sf.tpl_sf.gop_length_decision_method;
+ int shorten_gf_interval;
+
+ av1_tpl_preload_rc_estimate(cpi, frame_params);
+
+ if (gop_length_decision_method == 2) {
+ // GF group length is decided based on GF boost and tpl stats of ARFs from
+ // base layer, (base+1) layer.
+ shorten_gf_interval =
+ (p_rc->gfu_boost <
+ p_rc->num_stats_used_for_gfu_boost * GF_MIN_BOOST * 1.4) &&
+ !av1_tpl_setup_stats(cpi, 3, frame_params, frame_input);
+ } else {
+ int do_complete_tpl = 1;
+ GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ int is_temporal_filter_enabled =
+ (rc->frames_since_key > 0 && gf_group->arf_index > -1);
+
+ if (is_temporal_filter_enabled) {
+ int arf_src_index = gf_group->arf_src_offset[gf_group->arf_index];
+ FRAME_UPDATE_TYPE arf_update_type =
+ gf_group->update_type[gf_group->arf_index];
+ int is_forward_keyframe = 0;
+ av1_temporal_filter(cpi, arf_src_index, arf_update_type,
+ is_forward_keyframe, NULL);
+ aom_extend_frame_borders(&cpi->ppi->alt_ref_buffer,
+ av1_num_planes(&cpi->common));
+ }
+
+ if (gop_length_decision_method == 1) {
+ // Check if tpl stats of ARFs from base layer, (base+1) layer,
+ // (base+2) layer can decide the GF group length.
+ int gop_length_eval =
+ av1_tpl_setup_stats(cpi, 2, frame_params, frame_input);
+
+ if (gop_length_eval != 2) {
+ do_complete_tpl = 0;
+ shorten_gf_interval = !gop_length_eval;
+ }
+ }
+
+ if (do_complete_tpl) {
+ // Decide GF group length based on complete tpl stats.
+ shorten_gf_interval =
+ !av1_tpl_setup_stats(cpi, 1, frame_params, frame_input);
+ // Tpl stats is reused when the ARF is temporally filtered and GF
+ // interval is not shortened.
+ if (is_temporal_filter_enabled && !shorten_gf_interval)
+ cpi->skip_tpl_setup_stats = 1;
+ }
+ }
+ return shorten_gf_interval;
+}
+
#define MIN_FWD_KF_INTERVAL 8
#define MIN_SHRINK_LEN 6 // the minimum length of gf if we are shrinking
#define SMOOTH_FILT_LEN 7
@@ -1014,17 +1091,16 @@ const double smooth_filt[SMOOTH_FILT_LEN] = { 0.006, 0.061, 0.242, 0.383,
0.242, 0.061, 0.006 };
// Smooth filter intra_error and coded_error in firstpass stats.
-// If ignore[i]==1, the ith element should not be used in the filtering.
-static void smooth_filter_stats(const FIRSTPASS_STATS *stats, const int *ignore,
- int start_idx, int last_idx,
- double *filt_intra_err,
+// If stats[i].is_flash==1, the ith element should not be used in the filtering.
+static void smooth_filter_stats(const FIRSTPASS_STATS *stats, int start_idx,
+ int last_idx, double *filt_intra_err,
double *filt_coded_err) {
int i, j;
for (i = start_idx; i <= last_idx; i++) {
double total_wt = 0;
for (j = -HALF_FILT_LEN; j <= HALF_FILT_LEN; j++) {
int idx = AOMMIN(AOMMAX(i + j, start_idx), last_idx);
- if (ignore[idx]) continue;
+ if (stats[idx].is_flash) continue;
filt_intra_err[i] +=
smooth_filt[j + HALF_FILT_LEN] * stats[idx].intra_error;
@@ -1041,7 +1117,7 @@ static void smooth_filter_stats(const FIRSTPASS_STATS *stats, const int *ignore,
for (j = -HALF_FILT_LEN; j <= HALF_FILT_LEN; j++) {
int idx = AOMMIN(AOMMAX(i + j, start_idx), last_idx);
// Coded error involves idx and idx - 1.
- if (ignore[idx] || (idx > 0 && ignore[idx - 1])) continue;
+ if (stats[idx].is_flash || (idx > 0 && stats[idx - 1].is_flash)) continue;
filt_coded_err[i] +=
smooth_filt[j + HALF_FILT_LEN] * stats[idx].coded_error;
@@ -1070,7 +1146,7 @@ static void get_gradient(const double *values, int start, int last,
}
static int find_next_scenecut(const FIRSTPASS_STATS *const stats_start,
- int first, int last, int *ignore) {
+ int first, int last) {
// Identify unstable areas caused by scenecuts.
// Find the max and 2nd max coded error, and the average of the rest frames.
// If there is only one frame that yields a huge coded error, it is likely a
@@ -1081,14 +1157,16 @@ static int find_next_scenecut(const FIRSTPASS_STATS *const stats_start,
if (last - first == 0) return -1;
for (int i = first; i <= last; i++) {
- if (ignore[i] || (i > 0 && ignore[i - 1])) continue;
+ if (stats_start[i].is_flash || (i > 0 && stats_start[i - 1].is_flash))
+ continue;
double temp_intra = AOMMAX(stats_start[i].intra_error, 0.01);
this_ratio = stats_start[i].coded_error / temp_intra;
// find the avg ratio in the preceding neighborhood
max_prev_ratio = 0;
max_prev_coded = 0;
for (int j = AOMMAX(first, i - HALF_WIN); j < i; j++) {
- if (ignore[j] || (j > 0 && ignore[j - 1])) continue;
+ if (stats_start[j].is_flash || (j > 0 && stats_start[j - 1].is_flash))
+ continue;
temp_intra = AOMMAX(stats_start[j].intra_error, 0.01);
double temp_ratio = stats_start[j].coded_error / temp_intra;
if (temp_ratio > max_prev_ratio) {
@@ -1102,7 +1180,8 @@ static int find_next_scenecut(const FIRSTPASS_STATS *const stats_start,
max_next_ratio = 0;
max_next_coded = 0;
for (int j = i + 1; j <= AOMMIN(i + HALF_WIN, last); j++) {
- if (ignore[j] || (j > 0 && ignore[j - 1])) continue;
+ if (stats_start[i].is_flash || (i > 0 && stats_start[i - 1].is_flash))
+ continue;
temp_intra = AOMMAX(stats_start[j].intra_error, 0.01);
double temp_ratio = stats_start[j].coded_error / temp_intra;
if (temp_ratio > max_next_ratio) {
@@ -1135,19 +1214,6 @@ static int find_next_scenecut(const FIRSTPASS_STATS *const stats_start,
return -1;
}
-static void mark_flashes(const FIRSTPASS_STATS *stats, int start_idx,
- int last_idx, int *is_flash) {
- int i;
- for (i = start_idx; i < last_idx; i++) {
- if (stats[i + 1].pcnt_second_ref > stats[i + 1].pcnt_inter &&
- stats[i + 1].pcnt_second_ref >= 0.5) {
- // this is a new flash frame
- is_flash[i] = 1;
- continue;
- }
- }
-}
-
// Remove the region with index next_region.
// parameter merge: 0: merge with previous; 1: merge with next; 2:
// merge with both, take type from previous if possible
@@ -1220,46 +1286,10 @@ static void insert_region(int start, int last, REGION_TYPES type,
*cur_region_idx = k;
}
-// Estimate the noise variance of each frame from the first pass stats
-static void estimate_region_noise(const FIRSTPASS_STATS *stats,
- const int *is_flash, REGIONS *region) {
- double C1, C2, C3, noise;
- int count = 0;
- region->avg_noise_var = -1;
- for (int i = region->start + 2; i <= region->last; i++) {
- if (is_flash[i] || is_flash[i - 1] || is_flash[i - 2]) continue;
-
- C1 = stats[i - 1].intra_error *
- (stats[i].intra_error - stats[i].coded_error);
- C2 = stats[i - 2].intra_error *
- (stats[i - 1].intra_error - stats[i - 1].coded_error);
- C3 = stats[i - 2].intra_error *
- (stats[i].intra_error - stats[i].sr_coded_error);
- if (C1 <= 0 || C2 <= 0 || C3 <= 0) continue;
- C1 = sqrt(C1);
- C2 = sqrt(C2);
- C3 = sqrt(C3);
-
- noise = stats[i - 1].intra_error - C1 * C2 / C3;
- noise = AOMMAX(noise, 0.01);
- region->avg_noise_var = (region->avg_noise_var == -1)
- ? noise
- : AOMMIN(noise, region->avg_noise_var);
- count++;
- }
- if (count == 0) {
- region->avg_noise_var = 0;
- }
-}
-
-// Analyze the corrrelation coefficient of each frame with its previous frame in
-// a region. Also get the average of stats inside a region.
-// Before calling this function, the region's noise variance is needed.
-static void analyze_region(const FIRSTPASS_STATS *stats, int region_idx,
- REGIONS *regions, double *coeff) {
- double cor_coeff;
-
- int i, k = region_idx;
+// Get the average of stats inside a region.
+static void analyze_region(const FIRSTPASS_STATS *stats, int k,
+ REGIONS *regions) {
+ int i;
regions[k].avg_cor_coeff = 0;
regions[k].avg_sr_fr_ratio = 0;
regions[k].avg_intra_err = 0;
@@ -1268,12 +1298,6 @@ static void analyze_region(const FIRSTPASS_STATS *stats, int region_idx,
int check_first_sr = (k != 0);
for (i = regions[k].start; i <= regions[k].last; i++) {
- double C = sqrt(AOMMAX(stats[i - 1].intra_error *
- (stats[i].intra_error - stats[i].coded_error),
- 0.001));
- cor_coeff =
- C / AOMMAX(stats[i - 1].intra_error - regions[k].avg_noise_var, 0.001);
-
if (i > regions[k].start || check_first_sr) {
double num_frames =
(double)(regions[k].last - regions[k].start + check_first_sr);
@@ -1289,85 +1313,27 @@ static void analyze_region(const FIRSTPASS_STATS *stats, int region_idx,
regions[k].avg_coded_err +=
stats[i].coded_error / (double)(regions[k].last - regions[k].start + 1);
- coeff[i] =
- cor_coeff *
- sqrt(
- AOMMAX(stats[i - 1].intra_error - regions[k].avg_noise_var, 0.001) /
- AOMMAX(stats[i].intra_error - regions[k].avg_noise_var, 0.001));
- // clip correlation coefficient.
- coeff[i] = AOMMIN(AOMMAX(coeff[i], 0), 1);
-
regions[k].avg_cor_coeff +=
- coeff[i] / (double)(regions[k].last - regions[k].start + 1);
+ AOMMAX(stats[i].cor_coeff, 0.001) /
+ (double)(regions[k].last - regions[k].start + 1);
+ regions[k].avg_noise_var +=
+ AOMMAX(stats[i].noise_var, 0.001) /
+ (double)(regions[k].last - regions[k].start + 1);
}
}
-// Calculate the regions stats of every region. Uses the stable regions to
-// estimate noise variance of other regions. Then call analyze_region for each.
-static void get_region_stats(const FIRSTPASS_STATS *stats, const int *is_flash,
- REGIONS *regions, double *coeff, int num_regions) {
- int k, count_stable = 0;
- // Analyze stable regions.
- for (k = 0; k < num_regions; k++) {
- if (regions[k].type == STABLE_REGION) {
- estimate_region_noise(stats, is_flash, regions + k);
- analyze_region(stats, k, regions, coeff);
- count_stable++;
- }
- }
-
- if (count_stable == 0) {
- // no stable region, just use the lowest noise variance estimated.
- double lowest_noise = -1;
- for (k = 0; k < num_regions; k++) {
- if (regions[k].type == SCENECUT_REGION) continue;
- estimate_region_noise(stats, is_flash, regions + k);
- if (regions[k].avg_noise_var < 0.01) continue;
- if (lowest_noise < 0 || lowest_noise > regions[k].avg_noise_var) {
- lowest_noise = regions[k].avg_noise_var;
- }
- }
- lowest_noise = AOMMAX(lowest_noise, 0);
- for (k = 0; k < num_regions; k++) {
- regions[k].avg_noise_var = lowest_noise;
- analyze_region(stats, k, regions, coeff);
- }
- return;
- }
-
- // Analyze other regions
- for (k = 0; k < num_regions; k++) {
- if (regions[k].type != STABLE_REGION) {
- // use the average of the nearest previous and next stable regions
- int count = 0;
- regions[k].avg_noise_var = 0;
- for (int r = k - 1; r >= 0; r--) {
- if (regions[r].type == STABLE_REGION) {
- count++;
- regions[k].avg_noise_var += regions[r].avg_noise_var;
- break;
- }
- }
- for (int r = k + 1; r < num_regions; r++) {
- if (regions[r].type == STABLE_REGION) {
- count++;
- regions[k].avg_noise_var += regions[r].avg_noise_var;
- break;
- }
- }
- if (count) {
- regions[k].avg_noise_var /= (double)count;
- }
- analyze_region(stats, k, regions, coeff);
- }
+// Calculate the regions stats of every region.
+static void get_region_stats(const FIRSTPASS_STATS *stats, REGIONS *regions,
+ int num_regions) {
+ for (int k = 0; k < num_regions; k++) {
+ analyze_region(stats, k, regions);
}
}
// Find tentative stable regions
static int find_stable_regions(const FIRSTPASS_STATS *stats,
- const double *grad_coded, const int *ignore,
- int this_start, int this_last,
- REGIONS *regions) {
+ const double *grad_coded, int this_start,
+ int this_last, REGIONS *regions) {
int i, j, k = 0;
regions[k].start = this_start;
for (i = this_start; i <= this_last; i++) {
@@ -1377,7 +1343,7 @@ static int find_stable_regions(const FIRSTPASS_STATS *stats,
int count = 0;
for (j = -HALF_WIN; j <= HALF_WIN; j++) {
int idx = AOMMIN(AOMMAX(i + j, this_start), this_last);
- if (ignore[idx] || (idx > 0 && ignore[idx - 1])) continue;
+ if (stats[idx].is_flash || (idx > 0 && stats[idx - 1].is_flash)) continue;
mean_intra += stats[idx].intra_error;
var_intra += stats[idx].intra_error * stats[idx].intra_error;
mean_coded += stats[idx].coded_error;
@@ -1451,15 +1417,13 @@ static void remove_short_regions(REGIONS *regions, int *num_regions,
}
static void adjust_unstable_region_bounds(const FIRSTPASS_STATS *stats,
- const int *is_flash,
- const double *grad, REGIONS *regions,
- double *coeff, int *num_regions) {
+ REGIONS *regions, int *num_regions) {
int i, j, k;
// Remove regions that are too short. Likely noise.
remove_short_regions(regions, num_regions, STABLE_REGION, HALF_WIN);
remove_short_regions(regions, num_regions, HIGH_VAR_REGION, HALF_WIN);
- get_region_stats(stats, is_flash, regions, coeff, *num_regions);
+ get_region_stats(stats, regions, *num_regions);
// Adjust region boundaries. The thresholds are empirically obtained, but
// overall the performance is not very sensitive to small changes to them.
@@ -1469,34 +1433,24 @@ static void adjust_unstable_region_bounds(const FIRSTPASS_STATS *stats,
// Adjust previous boundary.
// First find the average intra/coded error in the previous
// neighborhood.
- double avg_intra_err = 0, avg_coded_err = 0, avg_coeff = 0;
- int starti = AOMMAX(regions[k - 1].last - WINDOW_SIZE + 1,
- regions[k - 1].start + 1);
- int lasti = regions[k - 1].last;
+ double avg_intra_err = 0;
+ const int starti = AOMMAX(regions[k - 1].last - WINDOW_SIZE + 1,
+ regions[k - 1].start + 1);
+ const int lasti = regions[k - 1].last;
int counti = 0;
for (i = starti; i <= lasti; i++) {
avg_intra_err += stats[i].intra_error;
- avg_coded_err += stats[i].coded_error;
- avg_coeff += coeff[i];
counti++;
}
if (counti > 0) {
avg_intra_err = AOMMAX(avg_intra_err / (double)counti, 0.001);
- avg_coded_err /= AOMMAX(avg_coded_err / (double)counti, 0.001);
- avg_coeff /= AOMMIN(avg_intra_err / (double)counti, 0.99999);
int count_coded = 0, count_grad = 0;
for (j = lasti + 1; j <= regions[k].last; j++) {
- int intra_close =
+ const int intra_close =
fabs(stats[j].intra_error - avg_intra_err) / avg_intra_err < 0.1;
- int coded_close =
- fabs(stats[j].coded_error - avg_coded_err) / avg_coded_err < 0.15;
- int grad_small = fabs(grad[j]) / avg_coded_err < 0.05;
- int coded_small = stats[j].coded_error / avg_intra_err < 0.03;
- int coeff_close =
- (1 - coeff[j]) / (1 - avg_coeff) < 1.5 || coeff[j] > 0.995;
- if (!coeff_close || (!coded_close && !coded_small)) count_coded--;
- if (!grad_small && !coded_small) count_grad--;
-
+ const int coded_small = stats[j].coded_error / avg_intra_err < 0.1;
+ const int coeff_close = stats[j].cor_coeff > 0.995;
+ if (!coeff_close || !coded_small) count_coded--;
if (intra_close && count_coded >= 0 && count_grad >= 0) {
// this frame probably belongs to the previous stable region
regions[k - 1].last = j;
@@ -1510,35 +1464,26 @@ static void adjust_unstable_region_bounds(const FIRSTPASS_STATS *stats,
if (k < *num_regions - 1) {
// Adjust next boundary.
// First find the average intra/coded error in the next neighborhood.
- double avg_intra_err = 0, avg_coded_err = 0, avg_coeff = 0;
- int starti = regions[k + 1].start;
- int lasti = AOMMIN(regions[k + 1].last - 1,
- regions[k + 1].start + WINDOW_SIZE - 1);
+ double avg_intra_err = 0;
+ const int starti = regions[k + 1].start;
+ const int lasti = AOMMIN(regions[k + 1].last - 1,
+ regions[k + 1].start + WINDOW_SIZE - 1);
int counti = 0;
for (i = starti; i <= lasti; i++) {
avg_intra_err += stats[i].intra_error;
- avg_coded_err += stats[i + 1].coded_error;
- avg_coeff += coeff[i];
counti++;
}
if (counti > 0) {
avg_intra_err = AOMMAX(avg_intra_err / (double)counti, 0.001);
- avg_coded_err /= AOMMAX(avg_coded_err / (double)counti, 0.001);
- avg_coeff /= AOMMIN(avg_intra_err / (double)counti, 0.99999);
// At the boundary, coded error is large, but still the frame is stable
int count_coded = 1, count_grad = 1;
for (j = starti - 1; j >= regions[k].start; j--) {
- int intra_close =
+ const int intra_close =
fabs(stats[j].intra_error - avg_intra_err) / avg_intra_err < 0.1;
- int coded_close =
- fabs(stats[j + 1].coded_error - avg_coded_err) / avg_coded_err <
- 0.15;
- int grad_small = fabs(grad[j + 1]) / avg_coded_err < 0.05;
- int coded_small = stats[j + 1].coded_error / avg_intra_err < 0.03;
- int coeff_close =
- (1 - coeff[j + 1]) / (1 - avg_coeff) < 1.5 || coeff[j] > 0.995;
- if (!coeff_close || (!coded_close && !coded_small)) count_coded--;
- if (!grad_small && !coded_small) count_grad--;
+ const int coded_small =
+ stats[j + 1].coded_error / avg_intra_err < 0.1;
+ const int coeff_close = stats[j].cor_coeff > 0.995;
+ if (!coeff_close || !coded_small) count_coded--;
if (intra_close && count_coded >= 0 && count_grad >= 0) {
// this frame probably belongs to the next stable region
regions[k + 1].start = j;
@@ -1553,7 +1498,7 @@ static void adjust_unstable_region_bounds(const FIRSTPASS_STATS *stats,
cleanup_regions(regions, num_regions);
remove_short_regions(regions, num_regions, HIGH_VAR_REGION, HALF_WIN);
- get_region_stats(stats, is_flash, regions, coeff, *num_regions);
+ get_region_stats(stats, regions, *num_regions);
// If a stable regions has higher error than neighboring high var regions,
// or if the stable region has a lower average correlation,
@@ -1561,25 +1506,31 @@ static void adjust_unstable_region_bounds(const FIRSTPASS_STATS *stats,
k = 0;
while (k < *num_regions && (*num_regions) > 1) {
if (regions[k].type == STABLE_REGION &&
+ (regions[k].last - regions[k].start + 1) < 2 * WINDOW_SIZE &&
((k > 0 && // previous regions
- (regions[k].avg_coded_err > regions[k - 1].avg_coded_err ||
- regions[k].avg_cor_coeff < regions[k - 1].avg_cor_coeff)) &&
+ (regions[k].avg_coded_err > regions[k - 1].avg_coded_err * 1.01 ||
+ regions[k].avg_cor_coeff < regions[k - 1].avg_cor_coeff * 0.999)) &&
(k < *num_regions - 1 && // next region
- (regions[k].avg_coded_err > regions[k + 1].avg_coded_err ||
- regions[k].avg_cor_coeff < regions[k + 1].avg_cor_coeff)))) {
+ (regions[k].avg_coded_err > regions[k + 1].avg_coded_err * 1.01 ||
+ regions[k].avg_cor_coeff < regions[k + 1].avg_cor_coeff * 0.999)))) {
// merge current region with the previous and next regions
remove_region(2, regions, num_regions, &k);
- analyze_region(stats, k - 1, regions, coeff);
+ analyze_region(stats, k - 1, regions);
} else if (regions[k].type == HIGH_VAR_REGION &&
+ (regions[k].last - regions[k].start + 1) < 2 * WINDOW_SIZE &&
((k > 0 && // previous regions
- (regions[k].avg_coded_err < regions[k - 1].avg_coded_err ||
- regions[k].avg_cor_coeff > regions[k - 1].avg_cor_coeff)) &&
+ (regions[k].avg_coded_err <
+ regions[k - 1].avg_coded_err * 0.99 ||
+ regions[k].avg_cor_coeff >
+ regions[k - 1].avg_cor_coeff * 1.001)) &&
(k < *num_regions - 1 && // next region
- (regions[k].avg_coded_err < regions[k + 1].avg_coded_err ||
- regions[k].avg_cor_coeff > regions[k + 1].avg_cor_coeff)))) {
+ (regions[k].avg_coded_err <
+ regions[k + 1].avg_coded_err * 0.99 ||
+ regions[k].avg_cor_coeff >
+ regions[k + 1].avg_cor_coeff * 1.001)))) {
// merge current region with the previous and next regions
remove_region(2, regions, num_regions, &k);
- analyze_region(stats, k - 1, regions, coeff);
+ analyze_region(stats, k - 1, regions);
} else {
k++;
}
@@ -1591,8 +1542,7 @@ static void adjust_unstable_region_bounds(const FIRSTPASS_STATS *stats,
// Identify blending regions.
static void find_blending_regions(const FIRSTPASS_STATS *stats,
- const int *is_flash, REGIONS *regions,
- int *num_regions, double *coeff) {
+ REGIONS *regions, int *num_regions) {
int i, k = 0;
// Blending regions will have large content change, therefore will have a
// large consistent change in intra error.
@@ -1607,7 +1557,8 @@ static void find_blending_regions(const FIRSTPASS_STATS *stats,
int start = 0, last;
for (i = regions[k].start; i <= regions[k].last; i++) {
// First mark the regions that has consistent large change of intra error.
- if (is_flash[i] || (i > 0 && is_flash[i - 1])) continue;
+ if (k == 0 && i == regions[k].start) continue;
+ if (stats[i].is_flash || (i > 0 && stats[i - 1].is_flash)) continue;
double grad = stats[i].intra_error - stats[i - 1].intra_error;
int large_change = fabs(grad) / AOMMAX(stats[i].intra_error, 0.01) > 0.05;
int this_dir = 0;
@@ -1622,7 +1573,11 @@ static void find_blending_regions(const FIRSTPASS_STATS *stats,
insert_region(start, last, BLENDING_REGION, regions, num_regions, &k);
}
dir = this_dir;
- start = i;
+ if (k == 0 && i == regions[k].start + 1) {
+ start = i - 1;
+ } else {
+ start = i;
+ }
}
if (dir != 0) {
last = regions[k].last;
@@ -1633,14 +1588,14 @@ static void find_blending_regions(const FIRSTPASS_STATS *stats,
// If the blending region has very low correlation, mark it as high variance
// since we probably cannot benefit from it anyways.
- get_region_stats(stats, is_flash, regions, coeff, *num_regions);
+ get_region_stats(stats, regions, *num_regions);
for (k = 0; k < *num_regions; k++) {
if (regions[k].type != BLENDING_REGION) continue;
if (regions[k].last == regions[k].start || regions[k].avg_cor_coeff < 0.6 ||
count_stable == 0)
regions[k].type = HIGH_VAR_REGION;
}
- get_region_stats(stats, is_flash, regions, coeff, *num_regions);
+ get_region_stats(stats, regions, *num_regions);
// It is possible for blending to result in a "dip" in intra error (first
// decrease then increase). Therefore we need to find the dip and combine the
@@ -1669,7 +1624,7 @@ static void find_blending_regions(const FIRSTPASS_STATS *stats,
if (regions[k].avg_sr_fr_ratio > ratio_thres) {
regions[k].type = BLENDING_REGION;
remove_region(2, regions, num_regions, &k);
- analyze_region(stats, k - 1, regions, coeff);
+ analyze_region(stats, k - 1, regions);
continue;
}
}
@@ -1727,7 +1682,7 @@ static void find_blending_regions(const FIRSTPASS_STATS *stats,
if (to_merge) {
remove_region(0, regions, num_regions, &k);
- analyze_region(stats, k - 1, regions, coeff);
+ analyze_region(stats, k - 1, regions);
continue;
} else {
// These are possibly two separate blending regions. Mark the boundary
@@ -1735,9 +1690,9 @@ static void find_blending_regions(const FIRSTPASS_STATS *stats,
int prev_k = k - 1;
insert_region(regions[prev_k].last, regions[prev_k].last,
HIGH_VAR_REGION, regions, num_regions, &prev_k);
- analyze_region(stats, prev_k, regions, coeff);
+ analyze_region(stats, prev_k, regions);
k = prev_k + 1;
- analyze_region(stats, k, regions, coeff);
+ analyze_region(stats, k, regions);
}
}
k++;
@@ -1793,16 +1748,13 @@ static void cleanup_blendings(REGIONS *regions, int *num_regions) {
// pointing to.
static void identify_regions(const FIRSTPASS_STATS *const stats_start,
int total_frames, int offset, REGIONS *regions,
- int *total_regions, double *cor_coeff) {
+ int *total_regions) {
int k;
if (total_frames <= 1) return;
- double *coeff = cor_coeff + offset;
-
// store the initial decisions
REGIONS temp_regions[MAX_FIRSTPASS_ANALYSIS_FRAMES];
av1_zero_array(temp_regions, MAX_FIRSTPASS_ANALYSIS_FRAMES);
- int is_flash[MAX_FIRSTPASS_ANALYSIS_FRAMES] = { 0 };
// buffers for filtered stats
double filt_intra_err[MAX_FIRSTPASS_ANALYSIS_FRAMES] = { 0 };
double filt_coded_err[MAX_FIRSTPASS_ANALYSIS_FRAMES] = { 0 };
@@ -1810,32 +1762,28 @@ static void identify_regions(const FIRSTPASS_STATS *const stats_start,
int cur_region = 0, this_start = 0, this_last;
- // find possible flash frames
- mark_flashes(stats_start, 0, total_frames - 1, is_flash);
-
- // first get the obvious scenecuts
int next_scenecut = -1;
-
do {
+ // first get the obvious scenecuts
next_scenecut =
- find_next_scenecut(stats_start, this_start, total_frames - 1, is_flash);
+ find_next_scenecut(stats_start, this_start, total_frames - 1);
this_last = (next_scenecut >= 0) ? (next_scenecut - 1) : total_frames - 1;
+
// low-pass filter the needed stats
- smooth_filter_stats(stats_start, is_flash, this_start, this_last,
- filt_intra_err, filt_coded_err);
+ smooth_filter_stats(stats_start, this_start, this_last, filt_intra_err,
+ filt_coded_err);
get_gradient(filt_coded_err, this_start, this_last, grad_coded);
// find tentative stable regions and unstable regions
- int num_regions = find_stable_regions(stats_start, grad_coded, is_flash,
- this_start, this_last, temp_regions);
- adjust_unstable_region_bounds(stats_start, is_flash, grad_coded,
- temp_regions, coeff, &num_regions);
+ int num_regions = find_stable_regions(stats_start, grad_coded, this_start,
+ this_last, temp_regions);
- get_region_stats(stats_start, is_flash, temp_regions, coeff, num_regions);
+ adjust_unstable_region_bounds(stats_start, temp_regions, &num_regions);
+
+ get_region_stats(stats_start, temp_regions, num_regions);
// Try to identify blending regions in the unstable regions
- find_blending_regions(stats_start, is_flash, temp_regions, &num_regions,
- coeff);
+ find_blending_regions(stats_start, temp_regions, &num_regions);
cleanup_blendings(temp_regions, &num_regions);
// The flash points should all be considered high variance points
@@ -1848,7 +1796,7 @@ static void identify_regions(const FIRSTPASS_STATS *const stats_start,
int start = temp_regions[k].start;
int last = temp_regions[k].last;
for (int i = start; i <= last; i++) {
- if (is_flash[i]) {
+ if (stats_start[i].is_flash) {
insert_region(i, i, HIGH_VAR_REGION, temp_regions, &num_regions, &k);
}
}
@@ -1858,6 +1806,11 @@ static void identify_regions(const FIRSTPASS_STATS *const stats_start,
// copy the regions in the scenecut group
for (k = 0; k < num_regions; k++) {
+ if (temp_regions[k].last < temp_regions[k].start &&
+ k == num_regions - 1) {
+ num_regions--;
+ break;
+ }
regions[k + cur_region] = temp_regions[k];
}
cur_region += num_regions;
@@ -1874,17 +1827,21 @@ static void identify_regions(const FIRSTPASS_STATS *const stats_start,
} while (next_scenecut >= 0);
*total_regions = cur_region;
- get_region_stats(stats_start, is_flash, regions, coeff, *total_regions);
+ get_region_stats(stats_start, regions, *total_regions);
for (k = 0; k < *total_regions; k++) {
// If scenecuts are very minor, mark them as high variance.
- if (regions[k].type != SCENECUT_REGION || regions[k].avg_cor_coeff < 0.8) {
+ if (regions[k].type != SCENECUT_REGION ||
+ regions[k].avg_cor_coeff *
+ (1 - stats_start[regions[k].start].noise_var /
+ regions[k].avg_intra_err) <
+ 0.8) {
continue;
}
regions[k].type = HIGH_VAR_REGION;
}
cleanup_regions(regions, total_regions);
- get_region_stats(stats_start, is_flash, regions, coeff, *total_regions);
+ get_region_stats(stats_start, regions, *total_regions);
for (k = 0; k < *total_regions; k++) {
regions[k].start += offset;
@@ -1911,16 +1868,17 @@ static int find_regions_index(const REGIONS *regions, int num_regions,
* \param[in] max_gop_length Maximum length of the GF group
* \param[in] max_intervals Maximum number of intervals to decide
*
- * \return Nothing is returned. Instead, cpi->rc.gf_intervals is
+ * \return Nothing is returned. Instead, cpi->ppi->rc.gf_intervals is
* changed to store the decided GF group lengths.
*/
static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
int max_intervals) {
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
FIRSTPASS_STATS next_frame;
const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
- FRAME_INFO *frame_info = &cpi->frame_info;
+ const FIRSTPASS_STATS *const stats = start_pos - (rc->frames_since_key == 0);
int i;
int flash_detected;
@@ -1930,9 +1888,9 @@ static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
if (has_no_stats_stage(cpi)) {
for (i = 0; i < MAX_NUM_GF_INTERVALS; i++) {
- rc->gf_intervals[i] = AOMMIN(rc->max_gf_interval, max_gop_length);
+ p_rc->gf_intervals[i] = AOMMIN(rc->max_gf_interval, max_gop_length);
}
- rc->cur_gf_index = 0;
+ p_rc->cur_gf_index = 0;
rc->intervals_till_gf_calculate_due = MAX_NUM_GF_INTERVALS;
return;
}
@@ -1944,17 +1902,17 @@ static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
const int min_shrink_int = AOMMAX(MIN_SHRINK_LEN, active_min_gf_interval);
i = (rc->frames_since_key == 0);
- max_intervals = cpi->lap_enabled ? 1 : max_intervals;
+ max_intervals = cpi->ppi->lap_enabled ? 1 : max_intervals;
int count_cuts = 1;
// If cpi->gf_state.arf_gf_boost_lst is 0, we are starting with a KF or GF.
- int cur_start = -1 + !cpi->gf_state.arf_gf_boost_lst, cur_last;
+ int cur_start = -1 + !cpi->ppi->gf_state.arf_gf_boost_lst, cur_last;
int cut_pos[MAX_NUM_GF_INTERVALS + 1] = { -1 };
int cut_here;
GF_GROUP_STATS gf_stats;
init_gf_stats(&gf_stats);
while (count_cuts < max_intervals + 1) {
// reaches next key frame, break here
- if (i >= rc->frames_to_key + rc->next_is_fwd_key) {
+ if (i >= rc->frames_to_key + p_rc->next_is_fwd_key) {
cut_here = 2;
} else if (i - cur_start >= rc->static_scene_max_gf_interval) {
// reached maximum len, but nothing special yet (almost static)
@@ -1969,7 +1927,7 @@ static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
flash_detected = detect_flash(twopass, 0);
// TODO(bohanli): remove redundant accumulations here, or unify
// this and the ones in define_gf_group
- accumulate_next_frame_stats(&next_frame, frame_info, flash_detected,
+ accumulate_next_frame_stats(&next_frame, flash_detected,
rc->frames_since_key, i, &gf_stats);
cut_here = detect_gf_cut(cpi, i, cur_start, flash_detected,
@@ -1981,10 +1939,10 @@ static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
int ori_last = cur_last;
// The region frame idx does not start from the same frame as cur_start
// and cur_last. Need to offset them.
- int offset = rc->frames_since_key - rc->regions_offset;
- REGIONS *regions = rc->regions;
- int num_regions = rc->num_regions;
- if (cpi->oxcf.kf_cfg.fwd_kf_enabled && rc->next_is_fwd_key) {
+ int offset = rc->frames_since_key - p_rc->regions_offset;
+ REGIONS *regions = p_rc->regions;
+ int num_regions = p_rc->num_regions;
+ if (cpi->oxcf.kf_cfg.fwd_kf_enabled && p_rc->next_is_fwd_key) {
const int frames_left = rc->frames_to_key - i;
const int min_int = AOMMIN(MIN_FWD_KF_INTERVAL, active_min_gf_interval);
if (frames_left < min_int && frames_left > 0) {
@@ -2021,7 +1979,11 @@ static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
// If we have a scenecut, then stop at it.
// TODO(bohanli): add logic here to stop before the scenecut and for
// the next gop start from the scenecut with GF
- int is_minor_sc = (regions[scenecut_idx].avg_cor_coeff > 0.6);
+ int is_minor_sc =
+ (regions[scenecut_idx].avg_cor_coeff *
+ (1 - stats[regions[scenecut_idx].start - offset].noise_var /
+ regions[scenecut_idx].avg_intra_err) >
+ 0.6);
cur_last = regions[scenecut_idx].last - offset - !is_minor_sc;
} else {
int is_last_analysed = (k_last == num_regions - 1) &&
@@ -2032,45 +1994,91 @@ static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
// if we are very close to the end, then do not shrink since it may
// introduce intervals that are too short
if (!(is_last_analysed && not_enough_regions)) {
- int found = 0;
- // first try to end at a stable area
- for (int j = cur_last; j >= cur_start + min_shrink_int; j--) {
- if (regions[find_regions_index(regions, num_regions, j + offset)]
- .type == STABLE_REGION) {
- cur_last = j;
- found = 1;
- break;
- }
+ const double arf_length_factor = 0.1;
+ double best_score = 0;
+ int best_j = -1;
+ const int first_frame = regions[0].start - offset;
+ const int last_frame = regions[num_regions - 1].last - offset;
+ // score of how much the arf helps the whole GOP
+ double base_score = 0.0;
+ // Accumulate base_score in
+ for (int j = cur_start + 1; j < cur_start + min_shrink_int; j++) {
+ if (stats + j >= twopass->stats_buf_ctx->stats_in_end) break;
+ base_score = (base_score + 1.0) * stats[j].cor_coeff;
}
- if (!found) {
- // Could not find stable point,
- // try to find an OK point (high correlation, not blending)
- for (int j = cur_last; j >= cur_start + min_shrink_int; j--) {
- REGIONS *cur_region =
- regions +
- find_regions_index(regions, num_regions, j + offset);
- double avg_coeff = cur_region->avg_cor_coeff;
- if (rc->cor_coeff[j + offset] > avg_coeff &&
- cur_region->type != BLENDING_REGION) {
- cur_last = j;
- found = 1;
+ int met_blending = 0; // Whether we have met blending areas before
+ int last_blending = 0; // Whether the previous frame if blending
+ for (int j = cur_start + min_shrink_int; j <= cur_last; j++) {
+ if (stats + j >= twopass->stats_buf_ctx->stats_in_end) break;
+ base_score = (base_score + 1.0) * stats[j].cor_coeff;
+ int this_reg =
+ find_regions_index(regions, num_regions, j + offset);
+ if (this_reg < 0) continue;
+ // A GOP should include at most 1 blending region.
+ if (regions[this_reg].type == BLENDING_REGION) {
+ last_blending = 1;
+ if (met_blending) {
break;
+ } else {
+ base_score = 0;
+ continue;
}
+ } else {
+ if (last_blending) met_blending = 1;
+ last_blending = 0;
+ }
+
+ // Add the factor of how good the neighborhood is for this
+ // candidate arf.
+ double this_score = arf_length_factor * base_score;
+ double temp_accu_coeff = 1.0;
+ // following frames
+ int count_f = 0;
+ for (int n = j + 1; n <= j + 3 && n <= last_frame; n++) {
+ if (stats + n >= twopass->stats_buf_ctx->stats_in_end) break;
+ temp_accu_coeff *= stats[n].cor_coeff;
+ this_score +=
+ temp_accu_coeff *
+ (1 - stats[n].noise_var /
+ AOMMAX(regions[this_reg].avg_intra_err, 0.001));
+ count_f++;
+ }
+ // preceding frames
+ temp_accu_coeff = 1.0;
+ for (int n = j; n > j - 3 * 2 + count_f && n > first_frame; n--) {
+ if (stats + n < twopass->stats_buf_ctx->stats_in_start) break;
+ temp_accu_coeff *= stats[n].cor_coeff;
+ this_score +=
+ temp_accu_coeff *
+ (1 - stats[n].noise_var /
+ AOMMAX(regions[this_reg].avg_intra_err, 0.001));
+ }
+
+ if (this_score > best_score) {
+ best_score = this_score;
+ best_j = j;
}
}
- if (!found) {
- // Could not find a better point,
- // try not to cut in blending areas
- for (int j = cur_last; j >= cur_start + min_shrink_int; j--) {
- REGIONS *cur_region =
- regions +
- find_regions_index(regions, num_regions, j + offset);
- if (cur_region->type != BLENDING_REGION) {
- cur_last = j;
- break;
+
+ // For blending areas, move one more frame in case we missed the
+ // first blending frame.
+ int best_reg =
+ find_regions_index(regions, num_regions, best_j + offset);
+ if (best_reg < num_regions - 1 && best_reg > 0) {
+ if (regions[best_reg - 1].type == BLENDING_REGION &&
+ regions[best_reg + 1].type == BLENDING_REGION) {
+ if (best_j + offset == regions[best_reg].start &&
+ best_j + offset < regions[best_reg].last) {
+ best_j += 1;
+ } else if (best_j + offset == regions[best_reg].last &&
+ best_j + offset > regions[best_reg].start) {
+ best_j -= 1;
}
}
}
+
+ if (cur_last - best_j < 2) best_j = cur_last;
+ if (best_j > 0 && best_score > 0.1) cur_last = best_j;
// if cannot find anything, just cut at the original place.
}
}
@@ -2081,11 +2089,11 @@ static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
// reset pointers to the shrinked location
twopass->stats_in = start_pos + cur_last;
cur_start = cur_last;
- if (regions[find_regions_index(regions, num_regions,
- cur_start + 1 + offset)]
- .type == SCENECUT_REGION) {
- cur_start++;
- }
+ int cur_region_idx =
+ find_regions_index(regions, num_regions, cur_start + 1 + offset);
+ if (cur_region_idx >= 0)
+ if (regions[cur_region_idx].type == SCENECUT_REGION) cur_start++;
+
i = cur_last;
if (cut_here > 1 && cur_last == ori_last) break;
@@ -2099,9 +2107,9 @@ static void calculate_gf_length(AV1_COMP *cpi, int max_gop_length,
// save intervals
rc->intervals_till_gf_calculate_due = count_cuts - 1;
for (int n = 1; n < count_cuts; n++) {
- rc->gf_intervals[n - 1] = cut_pos[n] - cut_pos[n - 1];
+ p_rc->gf_intervals[n - 1] = cut_pos[n] - cut_pos[n - 1];
}
- rc->cur_gf_index = 0;
+ p_rc->cur_gf_index = 0;
twopass->stats_in = start_pos;
}
@@ -2110,12 +2118,13 @@ static void correct_frames_to_key(AV1_COMP *cpi) {
(int)av1_lookahead_depth(cpi->ppi->lookahead, cpi->compressor_stage);
if (lookahead_size <
av1_lookahead_pop_sz(cpi->ppi->lookahead, cpi->compressor_stage)) {
- assert(IMPLIES(cpi->oxcf.pass != 0 && cpi->frames_left > 0,
- lookahead_size == cpi->frames_left));
+ assert(IMPLIES(cpi->oxcf.pass != 0 && cpi->ppi->frames_left > 0,
+ lookahead_size == cpi->ppi->frames_left));
cpi->rc.frames_to_key = AOMMIN(cpi->rc.frames_to_key, lookahead_size);
- } else if (cpi->frames_left > 0) {
+ } else if (cpi->ppi->frames_left > 0) {
// Correct frames to key based on limit
- cpi->rc.frames_to_key = AOMMIN(cpi->rc.frames_to_key, cpi->frames_left);
+ cpi->rc.frames_to_key =
+ AOMMIN(cpi->rc.frames_to_key, cpi->ppi->frames_left);
}
}
@@ -2129,11 +2138,12 @@ static void correct_frames_to_key(AV1_COMP *cpi) {
*
* \param[in] cpi Top-level encoder structure
*
- * \return Nothing is returned. Instead, cpi->gf_group is changed.
+ * \return Nothing is returned. Instead, cpi->ppi->gf_group is changed.
*/
static void define_gf_group_pass0(AV1_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
- GF_GROUP *const gf_group = &cpi->gf_group;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
const GFConfig *const gf_cfg = &oxcf->gf_cfg;
int target;
@@ -2141,28 +2151,28 @@ static void define_gf_group_pass0(AV1_COMP *cpi) {
if (oxcf->q_cfg.aq_mode == CYCLIC_REFRESH_AQ) {
av1_cyclic_refresh_set_golden_update(cpi);
} else {
- rc->baseline_gf_interval = rc->gf_intervals[rc->cur_gf_index];
+ p_rc->baseline_gf_interval = p_rc->gf_intervals[p_rc->cur_gf_index];
rc->intervals_till_gf_calculate_due--;
- rc->cur_gf_index++;
+ p_rc->cur_gf_index++;
}
// correct frames_to_key when lookahead queue is flushing
correct_frames_to_key(cpi);
- if (rc->baseline_gf_interval > rc->frames_to_key)
- rc->baseline_gf_interval = rc->frames_to_key;
+ if (p_rc->baseline_gf_interval > rc->frames_to_key)
+ p_rc->baseline_gf_interval = rc->frames_to_key;
- rc->gfu_boost = DEFAULT_GF_BOOST;
- rc->constrained_gf_group =
- (rc->baseline_gf_interval >= rc->frames_to_key) ? 1 : 0;
+ p_rc->gfu_boost = DEFAULT_GF_BOOST;
+ p_rc->constrained_gf_group =
+ (p_rc->baseline_gf_interval >= rc->frames_to_key) ? 1 : 0;
gf_group->max_layer_depth_allowed = oxcf->gf_cfg.gf_max_pyr_height;
// Rare case when the look-ahead is less than the target GOP length, can't
// generate ARF frame.
- if (rc->baseline_gf_interval > gf_cfg->lag_in_frames ||
+ if (p_rc->baseline_gf_interval > gf_cfg->lag_in_frames ||
!is_altref_enabled(gf_cfg->lag_in_frames, gf_cfg->enable_auto_arf) ||
- rc->baseline_gf_interval < rc->min_gf_interval)
+ p_rc->baseline_gf_interval < rc->min_gf_interval)
gf_group->max_layer_depth_allowed = 0;
// Set up the structure of this Group-Of-Pictures (same as GF_GROUP)
@@ -2194,7 +2204,8 @@ static INLINE void set_baseline_gf_interval(AV1_COMP *cpi, int arf_position,
int use_alt_ref,
int is_final_pass) {
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
// Set the interval until the next gf.
// If forward keyframes are enabled, ensure the final gf group obeys the
// MIN_FWD_KF_INTERVAL.
@@ -2203,27 +2214,28 @@ static INLINE void set_baseline_gf_interval(AV1_COMP *cpi, int arf_position,
twopass->stats_buf_ctx->stats_in_end;
if (cpi->oxcf.kf_cfg.fwd_kf_enabled && use_alt_ref && !is_last_kf &&
- cpi->rc.next_is_fwd_key) {
+ cpi->ppi->p_rc.next_is_fwd_key) {
if (arf_position == rc->frames_to_key + 1) {
- rc->baseline_gf_interval = arf_position;
+ p_rc->baseline_gf_interval = arf_position;
// if the last gf group will be smaller than MIN_FWD_KF_INTERVAL
} else if (rc->frames_to_key + 1 - arf_position <
AOMMAX(MIN_FWD_KF_INTERVAL, rc->min_gf_interval)) {
// if possible, merge the last two gf groups
if (rc->frames_to_key + 1 <= active_max_gf_interval) {
- rc->baseline_gf_interval = rc->frames_to_key + 1;
+ p_rc->baseline_gf_interval = rc->frames_to_key + 1;
if (is_final_pass) rc->intervals_till_gf_calculate_due = 0;
// if merging the last two gf groups creates a group that is too long,
// split them and force the last gf group to be the MIN_FWD_KF_INTERVAL
} else {
- rc->baseline_gf_interval = rc->frames_to_key + 1 - MIN_FWD_KF_INTERVAL;
+ p_rc->baseline_gf_interval =
+ rc->frames_to_key + 1 - MIN_FWD_KF_INTERVAL;
if (is_final_pass) rc->intervals_till_gf_calculate_due = 0;
}
} else {
- rc->baseline_gf_interval = arf_position;
+ p_rc->baseline_gf_interval = arf_position;
}
} else {
- rc->baseline_gf_interval = arf_position;
+ p_rc->baseline_gf_interval = arf_position;
}
}
@@ -2269,18 +2281,19 @@ static void init_gf_stats(GF_GROUP_STATS *gf_stats) {
* \param[in] is_final_pass Whether this is the final pass for the
* GF group, or a trial (non-zero)
*
- * \return Nothing is returned. Instead, cpi->gf_group is changed.
+ * \return Nothing is returned. Instead, cpi->ppi->gf_group is changed.
*/
static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
EncodeFrameParams *frame_params, int max_gop_length,
int is_final_pass) {
AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
FIRSTPASS_STATS next_frame;
const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
- GF_GROUP *gf_group = &cpi->gf_group;
+ GF_GROUP *gf_group = &cpi->ppi->gf_group;
FRAME_INFO *frame_info = &cpi->frame_info;
const GFConfig *const gf_cfg = &oxcf->gf_cfg;
const RateControlCfg *const rc_cfg = &oxcf->rc_cfg;
@@ -2289,12 +2302,13 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
int64_t gf_group_bits;
const int is_intra_only = rc->frames_since_key == 0;
- cpi->internal_altref_allowed = (gf_cfg->gf_max_pyr_height > 1);
+ cpi->ppi->internal_altref_allowed = (gf_cfg->gf_max_pyr_height > 1);
// Reset the GF group data structures unless this is a key
// frame in which case it will already have been done.
if (!is_intra_only) {
- av1_zero(cpi->gf_group);
+ av1_zero(cpi->ppi->gf_group);
+ cpi->gf_frame_index = 0;
}
aom_clear_system_state();
@@ -2306,7 +2320,7 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
}
// correct frames_to_key when lookahead queue is emptying
- if (cpi->lap_enabled) {
+ if (cpi->ppi->lap_enabled) {
correct_frames_to_key(cpi);
}
@@ -2336,8 +2350,8 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
AOMMIN(rc->max_gf_interval, max_gop_length);
i = is_intra_only;
- // get the determined gf group length from rc->gf_intervals
- while (i < rc->gf_intervals[rc->cur_gf_index]) {
+ // get the determined gf group length from p_rc->gf_intervals
+ while (i < p_rc->gf_intervals[p_rc->cur_gf_index]) {
// read in the next frame
if (EOF == input_stats(twopass, &next_frame)) break;
// Accumulate error score of frames in this gf group.
@@ -2360,7 +2374,7 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
i = is_intra_only;
input_stats(twopass, &next_frame);
- while (i < rc->gf_intervals[rc->cur_gf_index]) {
+ while (i < p_rc->gf_intervals[p_rc->cur_gf_index]) {
// read in the next frame
if (EOF == input_stats(twopass, &next_frame)) break;
@@ -2369,13 +2383,13 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
flash_detected = detect_flash(twopass, 0);
// accumulate stats for next frame
- accumulate_next_frame_stats(&next_frame, frame_info, flash_detected,
+ accumulate_next_frame_stats(&next_frame, flash_detected,
rc->frames_since_key, i, &gf_stats);
++i;
}
- i = rc->gf_intervals[rc->cur_gf_index];
+ i = p_rc->gf_intervals[p_rc->cur_gf_index];
// save the errs for the last frame
last_frame_stats.frame_coded_error = next_frame.coded_error;
@@ -2384,11 +2398,11 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
if (is_final_pass) {
rc->intervals_till_gf_calculate_due--;
- rc->cur_gf_index++;
+ p_rc->cur_gf_index++;
}
// Was the group length constrained by the requirement for a new KF?
- rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
+ p_rc->constrained_gf_group = (i >= rc->frames_to_key) ? 1 : 0;
const int num_mbs = (oxcf->resize_cfg.resize_mode != RESIZE_NONE)
? cpi->initial_mbs
@@ -2407,32 +2421,34 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
gf_stats.zero_motion_accumulator > MIN_ZERO_MOTION &&
gf_stats.avg_sr_coded_error / num_mbs < MAX_SR_CODED_ERROR &&
gf_stats.avg_raw_err_stdev < MAX_RAW_ERR_VAR) {
- cpi->internal_altref_allowed = 0;
+ cpi->ppi->internal_altref_allowed = 0;
}
int use_alt_ref;
if (can_disable_arf) {
use_alt_ref =
!is_almost_static(gf_stats.zero_motion_accumulator,
- twopass->kf_zeromotion_pct, cpi->lap_enabled) &&
- rc->use_arf_in_this_kf_group && (i < gf_cfg->lag_in_frames) &&
+ twopass->kf_zeromotion_pct, cpi->ppi->lap_enabled) &&
+ p_rc->use_arf_in_this_kf_group && (i < gf_cfg->lag_in_frames) &&
(i >= MIN_GF_INTERVAL);
+ FIRSTPASS_STATS *total_stats = twopass->stats_buf_ctx->total_stats;
// TODO(urvang): Improve and use model for VBR, CQ etc as well.
- if (use_alt_ref && rc_cfg->mode == AOM_Q && rc_cfg->cq_level <= 200) {
+ if (use_alt_ref && use_ml_model_to_decide_flat_gop(rc_cfg) &&
+ !is_fp_stats_to_predict_flat_gop_invalid(total_stats)) {
aom_clear_system_state();
float features[21];
get_features_from_gf_stats(
&gf_stats, &first_frame_stats, &last_frame_stats, num_mbs,
- rc->constrained_gf_group, twopass->kf_zeromotion_pct, i, features);
+ p_rc->constrained_gf_group, twopass->kf_zeromotion_pct, i, features);
// Infer using ML model.
float score;
av1_nn_predict(features, &av1_use_flat_gop_nn_config, 1, &score);
use_alt_ref = (score <= 0.0);
}
} else {
- use_alt_ref =
- rc->use_arf_in_this_kf_group && (i < gf_cfg->lag_in_frames) && (i > 2);
+ use_alt_ref = p_rc->use_arf_in_this_kf_group &&
+ (i < gf_cfg->lag_in_frames) && (i > 2);
}
#define REDUCE_GF_LENGTH_THRESH 4
@@ -2443,7 +2459,7 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
// work well for certain other cases.
const int allow_gf_length_reduction =
((rc_cfg->mode == AOM_Q && rc_cfg->cq_level <= 128) ||
- !cpi->internal_altref_allowed) &&
+ !cpi->ppi->internal_altref_allowed) &&
!is_lossless_requested(rc_cfg);
if (allow_gf_length_reduction && use_alt_ref) {
@@ -2485,48 +2501,48 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
: AOMMAX(0, rc->frames_to_key - i);
// Calculate the boost for alt ref.
- rc->gfu_boost = av1_calc_arf_boost(
- twopass, rc, frame_info, alt_offset, forward_frames, ext_len,
- cpi->lap_enabled ? &rc->num_stats_used_for_gfu_boost : NULL,
- cpi->lap_enabled ? &rc->num_stats_required_for_gfu_boost : NULL);
+ p_rc->gfu_boost = av1_calc_arf_boost(
+ twopass, p_rc, rc, frame_info, alt_offset, forward_frames, ext_len,
+ &p_rc->num_stats_used_for_gfu_boost,
+ &p_rc->num_stats_required_for_gfu_boost, cpi->ppi->lap_enabled);
} else {
reset_fpf_position(twopass, start_pos);
gf_group->max_layer_depth_allowed = 0;
set_baseline_gf_interval(cpi, i, active_max_gf_interval, use_alt_ref,
is_final_pass);
- rc->gfu_boost = AOMMIN(
+ p_rc->gfu_boost = AOMMIN(
MAX_GF_BOOST,
- av1_calc_arf_boost(
- twopass, rc, frame_info, alt_offset, ext_len, 0,
- cpi->lap_enabled ? &rc->num_stats_used_for_gfu_boost : NULL,
- cpi->lap_enabled ? &rc->num_stats_required_for_gfu_boost : NULL));
+ av1_calc_arf_boost(twopass, p_rc, rc, frame_info, alt_offset, ext_len,
+ 0, &p_rc->num_stats_used_for_gfu_boost,
+ &p_rc->num_stats_required_for_gfu_boost,
+ cpi->ppi->lap_enabled));
}
#define LAST_ALR_BOOST_FACTOR 0.2f
- rc->arf_boost_factor = 1.0;
+ p_rc->arf_boost_factor = 1.0;
if (use_alt_ref && !is_lossless_requested(rc_cfg)) {
// Reduce the boost of altref in the last gf group
if (rc->frames_to_key - ext_len == REDUCE_GF_LENGTH_BY ||
rc->frames_to_key - ext_len == 0) {
- rc->arf_boost_factor = LAST_ALR_BOOST_FACTOR;
+ p_rc->arf_boost_factor = LAST_ALR_BOOST_FACTOR;
}
}
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ rc->frames_till_gf_update_due = p_rc->baseline_gf_interval;
// Reset the file position.
reset_fpf_position(twopass, start_pos);
- if (cpi->lap_enabled) {
+ if (cpi->ppi->lap_enabled) {
// Since we don't have enough stats to know the actual error of the
// gf group, we assume error of each frame to be equal to 1 and set
// the error of the group as baseline_gf_interval.
- gf_stats.gf_group_err = rc->baseline_gf_interval;
+ gf_stats.gf_group_err = p_rc->baseline_gf_interval;
}
// Calculate the bits to be allocated to the gf/arf group as a whole
gf_group_bits = calculate_total_gf_group_bits(cpi, gf_stats.gf_group_err);
- rc->gf_group_bits = gf_group_bits;
+ p_rc->gf_group_bits = gf_group_bits;
#if GROUP_ADAPTIVE_MAXQ
// Calculate an estimate of the maxq needed for the group.
@@ -2534,17 +2550,17 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
// where there could be significant overshoot than for easier
// sections where we do not wish to risk creating an overshoot
// of the allocated bit budget.
- if ((rc_cfg->mode != AOM_Q) && (rc->baseline_gf_interval > 1) &&
+ if ((rc_cfg->mode != AOM_Q) && (p_rc->baseline_gf_interval > 1) &&
is_final_pass) {
const int vbr_group_bits_per_frame =
- (int)(gf_group_bits / rc->baseline_gf_interval);
+ (int)(gf_group_bits / p_rc->baseline_gf_interval);
const double group_av_err =
- gf_stats.gf_group_raw_error / rc->baseline_gf_interval;
+ gf_stats.gf_group_raw_error / p_rc->baseline_gf_interval;
const double group_av_skip_pct =
- gf_stats.gf_group_skip_pct / rc->baseline_gf_interval;
+ gf_stats.gf_group_skip_pct / p_rc->baseline_gf_interval;
const double group_av_inactive_zone =
((gf_stats.gf_group_inactive_zone_rows * 2) /
- (rc->baseline_gf_interval * (double)cm->mi_params.mb_rows));
+ (p_rc->baseline_gf_interval * (double)cm->mi_params.mb_rows));
int tmp_q;
tmp_q = get_twopass_worst_quality(
@@ -2568,7 +2584,7 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
if (rc->frames_since_key != 0) {
twopass->section_intra_rating = calculate_section_intra_ratio(
start_pos, twopass->stats_buf_ctx->stats_in_end,
- rc->baseline_gf_interval);
+ p_rc->baseline_gf_interval);
}
av1_gop_bit_allocation(cpi, rc, gf_group, rc->frames_since_key == 0,
@@ -2577,12 +2593,12 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
frame_params->frame_type =
rc->frames_since_key == 0 ? KEY_FRAME : INTER_FRAME;
frame_params->show_frame =
- !(gf_group->update_type[gf_group->index] == ARF_UPDATE ||
- gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE);
+ !(gf_group->update_type[cpi->gf_frame_index] == ARF_UPDATE ||
+ gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE);
// TODO(jingning): Generalize this condition.
if (is_final_pass) {
- cpi->gf_state.arf_gf_boost_lst = use_alt_ref;
+ cpi->ppi->gf_state.arf_gf_boost_lst = use_alt_ref;
// Reset rolling actual and target bits counters for ARF groups.
twopass->rolling_arf_group_target_bits = 1;
@@ -2597,12 +2613,13 @@ static void define_gf_group(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
void av1_gop_bit_allocation(const AV1_COMP *cpi, RATE_CONTROL *const rc,
GF_GROUP *gf_group, int is_key_frame, int use_arf,
int64_t gf_group_bits) {
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
// Calculate the extra bits to be used for boosted frame(s)
#ifdef FIXED_ARF_BITS
int gf_arf_bits = (int)(ARF_BITS_FRACTION * gf_group_bits);
#else
int gf_arf_bits = calculate_boost_bits(
- rc->baseline_gf_interval - (rc->frames_since_key == 0), rc->gfu_boost,
+ p_rc->baseline_gf_interval - (rc->frames_since_key == 0), p_rc->gfu_boost,
gf_group_bits);
#endif
@@ -2610,8 +2627,8 @@ void av1_gop_bit_allocation(const AV1_COMP *cpi, RATE_CONTROL *const rc,
gf_group_bits, 1);
// Allocate bits to each of the frames in the GF group.
- allocate_gf_group_bits(gf_group, rc, gf_group_bits, gf_arf_bits, is_key_frame,
- use_arf);
+ allocate_gf_group_bits(gf_group, p_rc, rc, gf_group_bits, gf_arf_bits,
+ is_key_frame, use_arf);
}
// Minimum % intra coding observed in first pass (1.0 = 100%)
@@ -2786,10 +2803,10 @@ static int test_candidate_kf(TWO_PASS *twopass,
#define MIN_STATIC_KF_BOOST 5400 // Minimum boost for static KF interval
static int detect_app_forced_key(AV1_COMP *cpi) {
- if (cpi->oxcf.kf_cfg.fwd_kf_enabled) cpi->rc.next_is_fwd_key = 1;
+ if (cpi->oxcf.kf_cfg.fwd_kf_enabled) cpi->ppi->p_rc.next_is_fwd_key = 1;
int num_frames_to_app_forced_key = is_forced_keyframe_pending(
cpi->ppi->lookahead, cpi->ppi->lookahead->max_sz, cpi->compressor_stage);
- if (num_frames_to_app_forced_key != -1) cpi->rc.next_is_fwd_key = 0;
+ if (num_frames_to_app_forced_key != -1) cpi->ppi->p_rc.next_is_fwd_key = 0;
return num_frames_to_app_forced_key;
}
@@ -2799,16 +2816,16 @@ static int get_projected_kf_boost(AV1_COMP *cpi) {
* all stats needed for prior boost calculation are available.
* Hence projecting the prior boost is not needed in this cases.
*/
- if (cpi->rc.num_stats_used_for_kf_boost >= cpi->rc.frames_to_key)
- return cpi->rc.kf_boost;
+ if (cpi->ppi->p_rc.num_stats_used_for_kf_boost >= cpi->rc.frames_to_key)
+ return cpi->ppi->p_rc.kf_boost;
// Get the current tpl factor (number of frames = frames_to_key).
double tpl_factor = av1_get_kf_boost_projection_factor(cpi->rc.frames_to_key);
// Get the tpl factor when number of frames = num_stats_used_for_kf_boost.
- double tpl_factor_num_stats =
- av1_get_kf_boost_projection_factor(cpi->rc.num_stats_used_for_kf_boost);
+ double tpl_factor_num_stats = av1_get_kf_boost_projection_factor(
+ cpi->ppi->p_rc.num_stats_used_for_kf_boost);
int projected_kf_boost =
- (int)rint((tpl_factor * cpi->rc.kf_boost) / tpl_factor_num_stats);
+ (int)rint((tpl_factor * cpi->ppi->p_rc.kf_boost) / tpl_factor_num_stats);
return projected_kf_boost;
}
@@ -2828,8 +2845,9 @@ static int get_projected_kf_boost(AV1_COMP *cpi) {
static int define_kf_interval(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
double *kf_group_err,
int num_frames_to_detect_scenecut) {
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
const KeyFrameCfg *const kf_cfg = &oxcf->kf_cfg;
double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
@@ -2874,7 +2892,7 @@ static int define_kf_interval(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
input_stats(twopass, this_frame);
// Provided that we are not at the end of the file...
- if ((cpi->rc.enable_scenecut_detection > 0) && kf_cfg->auto_key &&
+ if ((cpi->ppi->p_rc.enable_scenecut_detection > 0) && kf_cfg->auto_key &&
twopass->stats_in < twopass->stats_buf_ctx->stats_in_end) {
double loop_decay_rate;
@@ -2882,14 +2900,13 @@ static int define_kf_interval(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
if (frames_since_key >= kf_cfg->key_freq_min &&
test_candidate_kf(twopass, &last_frame, this_frame, twopass->stats_in,
frames_since_key, oxcf->rc_cfg.mode,
- cpi->rc.enable_scenecut_detection)) {
+ cpi->ppi->p_rc.enable_scenecut_detection)) {
scenecut_detected = 1;
break;
}
// How fast is the prediction quality decaying?
- loop_decay_rate =
- get_prediction_decay_rate(frame_info, twopass->stats_in);
+ loop_decay_rate = get_prediction_decay_rate(twopass->stats_in);
// We want to know something about the recent past... rather than
// as used elsewhere where we are concerned with decay in prediction
@@ -2909,7 +2926,7 @@ static int define_kf_interval(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
// In the case of transition followed by a static scene, the key frame
// could be a good predictor for the following frames, therefore we
// do not use an arf.
- rc->use_arf_in_this_kf_group = 0;
+ p_rc->use_arf_in_this_kf_group = 0;
break;
}
@@ -2928,14 +2945,14 @@ static int define_kf_interval(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame,
}
if (kf_group_err != NULL)
- rc->num_stats_used_for_kf_boost = num_stats_used_for_kf_boost;
+ p_rc->num_stats_used_for_kf_boost = num_stats_used_for_kf_boost;
- if (cpi->lap_enabled && !scenecut_detected)
+ if (cpi->ppi->lap_enabled && !scenecut_detected)
frames_to_key = num_frames_to_next_key;
if (!kf_cfg->fwd_kf_enabled || scenecut_detected ||
twopass->stats_in >= twopass->stats_buf_ctx->stats_in_end)
- rc->next_is_fwd_key = 0;
+ p_rc->next_is_fwd_key = 0;
return frames_to_key;
}
@@ -2964,9 +2981,9 @@ static double get_kf_group_avg_error(TWO_PASS *twopass,
static int64_t get_kf_group_bits(AV1_COMP *cpi, double kf_group_err,
double kf_group_avg_error) {
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
int64_t kf_group_bits;
- if (cpi->lap_enabled) {
+ if (cpi->ppi->lap_enabled) {
kf_group_bits = (int64_t)rc->frames_to_key * rc->avg_frame_bandwidth;
if (cpi->oxcf.rc_cfg.vbr_corpus_complexity_lap) {
const int num_mbs = (cpi->oxcf.resize_cfg.resize_mode != RESIZE_NONE)
@@ -2990,7 +3007,7 @@ static int64_t get_kf_group_bits(AV1_COMP *cpi, double kf_group_err,
static int calc_avg_stats(AV1_COMP *cpi, FIRSTPASS_STATS *avg_frame_stat) {
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
FIRSTPASS_STATS cur_frame;
av1_zero(cur_frame);
int num_frames = 0;
@@ -3039,7 +3056,7 @@ static double get_kf_boost_score(AV1_COMP *cpi, double kf_raw_err,
double *zero_motion_accumulator,
double *sr_accumulator, int use_avg_stat) {
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
FRAME_INFO *const frame_info = &cpi->frame_info;
FIRSTPASS_STATS frame_stat;
av1_zero(frame_stat);
@@ -3061,8 +3078,7 @@ static double get_kf_boost_score(AV1_COMP *cpi, double kf_raw_err,
// For the first frame in kf group, the second ref indicator is invalid.
if (i > 0) {
*zero_motion_accumulator =
- AOMMIN(*zero_motion_accumulator,
- get_zero_motion_factor(frame_info, &frame_stat));
+ AOMMIN(*zero_motion_accumulator, get_zero_motion_factor(&frame_stat));
} else {
*zero_motion_accumulator = frame_stat.pcnt_inter - frame_stat.pcnt_motion;
}
@@ -3102,8 +3118,9 @@ static double get_kf_boost_score(AV1_COMP *cpi, double kf_raw_err,
*/
static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
- GF_GROUP *const gf_group = &cpi->gf_group;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
+ GF_GROUP *const gf_group = &cpi->ppi->gf_group;
FRAME_INFO *const frame_info = &cpi->frame_info;
AV1_COMMON *const cm = &cpi->common;
CurrentFrame *const current_frame = &cm->current_frame;
@@ -3115,27 +3132,26 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
rc->frames_since_key = 0;
// Use arfs if possible.
- rc->use_arf_in_this_kf_group = is_altref_enabled(
+ p_rc->use_arf_in_this_kf_group = is_altref_enabled(
oxcf->gf_cfg.lag_in_frames, oxcf->gf_cfg.enable_auto_arf);
// Reset the GF group data structures.
av1_zero(*gf_group);
+ cpi->gf_frame_index = 0;
// KF is always a GF so clear frames till next gf counter.
rc->frames_till_gf_update_due = 0;
- rc->frames_to_key = 1;
-
if (has_no_stats_stage(cpi)) {
int num_frames_to_app_forced_key = detect_app_forced_key(cpi);
- rc->this_key_frame_forced =
+ p_rc->this_key_frame_forced =
current_frame->frame_number != 0 && rc->frames_to_key == 0;
if (num_frames_to_app_forced_key != -1)
rc->frames_to_key = num_frames_to_app_forced_key;
else
rc->frames_to_key = AOMMAX(1, kf_cfg->key_freq_max);
correct_frames_to_key(cpi);
- rc->kf_boost = DEFAULT_KF_BOOST;
+ p_rc->kf_boost = DEFAULT_KF_BOOST;
gf_group->update_type[0] = KF_UPDATE;
return;
}
@@ -3153,7 +3169,7 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
int64_t kf_group_bits_clipped = INT64_MAX;
// Is this a forced key frame by interval.
- rc->this_key_frame_forced = rc->next_key_frame_forced;
+ p_rc->this_key_frame_forced = p_rc->next_key_frame_forced;
twopass->kf_group_bits = 0; // Total bits available to kf group
twopass->kf_group_error_left = 0; // Group modified error score.
@@ -3169,7 +3185,7 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
else
rc->frames_to_key = kf_cfg->key_freq_max;
- if (cpi->lap_enabled) correct_frames_to_key(cpi);
+ if (cpi->ppi->lap_enabled) correct_frames_to_key(cpi);
// If there is a max kf interval set by the user we must obey it.
// We already breakout of the loop above at 2x max.
@@ -3191,28 +3207,29 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
calculate_modified_err(frame_info, twopass, oxcf, &tmp_frame);
if (EOF == input_stats(twopass, &tmp_frame)) break;
}
- rc->next_key_frame_forced = 1;
+ p_rc->next_key_frame_forced = 1;
} else if ((twopass->stats_in == twopass->stats_buf_ctx->stats_in_end &&
is_stat_consumption_stage_twopass(cpi)) ||
rc->frames_to_key >= kf_cfg->key_freq_max) {
- rc->next_key_frame_forced = 1;
+ p_rc->next_key_frame_forced = 1;
} else {
- rc->next_key_frame_forced = 0;
+ p_rc->next_key_frame_forced = 0;
}
- if (kf_cfg->fwd_kf_enabled) rc->next_is_fwd_key |= rc->next_key_frame_forced;
+ if (kf_cfg->fwd_kf_enabled)
+ p_rc->next_is_fwd_key |= p_rc->next_key_frame_forced;
// Special case for the last key frame of the file.
if (twopass->stats_in >= twopass->stats_buf_ctx->stats_in_end) {
// Accumulate kf group error.
kf_group_err +=
calculate_modified_err(frame_info, twopass, oxcf, this_frame);
- rc->next_is_fwd_key = 0;
+ p_rc->next_is_fwd_key = 0;
}
// Calculate the number of bits that should be assigned to the kf group.
if ((twopass->bits_left > 0 && twopass->modified_error_left > 0.0) ||
- (cpi->lap_enabled && oxcf->rc_cfg.mode != AOM_Q)) {
+ (cpi->ppi->lap_enabled && oxcf->rc_cfg.mode != AOM_Q)) {
// Maximum number of bits for a single normal frame (not key frame).
const int max_bits = frame_max_bits(rc, oxcf);
@@ -3237,7 +3254,7 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
}
twopass->kf_group_bits = AOMMAX(0, twopass->kf_group_bits);
- if (cpi->lap_enabled) {
+ if (cpi->ppi->lap_enabled) {
// In the case of single pass based on LAP, frames to key may have an
// inaccurate value, and hence should be clipped to an appropriate
// interval.
@@ -3268,17 +3285,17 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
twopass->section_intra_rating = calculate_section_intra_ratio(
start_position, twopass->stats_buf_ctx->stats_in_end, rc->frames_to_key);
- rc->kf_boost = (int)boost_score;
+ p_rc->kf_boost = (int)boost_score;
- if (cpi->lap_enabled) {
+ if (cpi->ppi->lap_enabled) {
if (oxcf->rc_cfg.mode == AOM_Q) {
- rc->kf_boost = get_projected_kf_boost(cpi);
+ p_rc->kf_boost = get_projected_kf_boost(cpi);
} else {
// TODO(any): Explore using average frame stats for AOM_Q as well.
boost_score = get_kf_boost_score(
cpi, kf_raw_err, &zero_motion_accumulator, &sr_accumulator, 1);
reset_fpf_position(twopass, start_position);
- rc->kf_boost += (int)boost_score;
+ p_rc->kf_boost += (int)boost_score;
}
}
@@ -3286,13 +3303,13 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// if the kf group is very short.
if ((zero_motion_accumulator > STATIC_KF_GROUP_FLOAT_THRESH) &&
(rc->frames_to_key > 8)) {
- rc->kf_boost = AOMMAX(rc->kf_boost, MIN_STATIC_KF_BOOST);
+ p_rc->kf_boost = AOMMAX(p_rc->kf_boost, MIN_STATIC_KF_BOOST);
} else {
// Apply various clamps for min and max boost
- rc->kf_boost = AOMMAX(rc->kf_boost, (rc->frames_to_key * 3));
- rc->kf_boost = AOMMAX(rc->kf_boost, MIN_KF_BOOST);
+ p_rc->kf_boost = AOMMAX(p_rc->kf_boost, (rc->frames_to_key * 3));
+ p_rc->kf_boost = AOMMAX(p_rc->kf_boost, MIN_KF_BOOST);
#ifdef STRICT_RC
- rc->kf_boost = AOMMIN(rc->kf_boost, MAX_KF_BOOST);
+ p_rc->kf_boost = AOMMIN(p_rc->kf_boost, MAX_KF_BOOST);
#endif
}
@@ -3301,9 +3318,10 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
// very high, we calculate the bits based on a clipped value of
// frames_to_key.
kf_bits = calculate_boost_bits(
- AOMMIN(rc->frames_to_key, frames_to_key_clipped) - 1, rc->kf_boost,
+ AOMMIN(rc->frames_to_key, frames_to_key_clipped) - 1, p_rc->kf_boost,
AOMMIN(twopass->kf_group_bits, kf_group_bits_clipped));
- // printf("kf boost = %d kf_bits = %d kf_zeromotion_pct = %d\n", rc->kf_boost,
+ // printf("kf boost = %d kf_bits = %d kf_zeromotion_pct = %d\n",
+ // p_rc->kf_boost,
// kf_bits, twopass->kf_zeromotion_pct);
kf_bits = adjust_boost_bits_for_target_level(cpi, rc, kf_bits,
twopass->kf_group_bits, 0);
@@ -3315,7 +3333,7 @@ static void find_next_key_frame(AV1_COMP *cpi, FIRSTPASS_STATS *this_frame) {
gf_group->update_type[0] = KF_UPDATE;
// Note the total error score of the kf group minus the key frame itself.
- if (cpi->lap_enabled)
+ if (cpi->ppi->lap_enabled)
// As we don't have enough stats to know the actual error of the group,
// we assume the complexity of each frame to be equal to 1, and set the
// error as the number of frames in the group(minus the keyframe).
@@ -3335,7 +3353,7 @@ static int is_skippable_frame(const AV1_COMP *cpi) {
// first pass, and so do its previous and forward frames, then this frame
// can be skipped for partition check, and the partition size is assigned
// according to the variance
- const TWO_PASS *const twopass = &cpi->twopass;
+ const TWO_PASS *const twopass = &cpi->ppi->twopass;
return (!frame_is_intra_only(&cpi->common) &&
twopass->stats_in - 2 > twopass->stats_buf_ctx->stats_in_start &&
@@ -3358,34 +3376,78 @@ static int get_section_target_bandwidth(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
CurrentFrame *const current_frame = &cm->current_frame;
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
int section_target_bandwidth;
const int frames_left = (int)(twopass->stats_buf_ctx->total_stats->count -
current_frame->frame_number);
- if (cpi->lap_enabled)
+ if (cpi->ppi->lap_enabled)
section_target_bandwidth = (int)rc->avg_frame_bandwidth;
else
section_target_bandwidth = (int)(twopass->bits_left / frames_left);
return section_target_bandwidth;
}
+static INLINE void set_twopass_params_based_on_fp_stats(
+ const AV1_COMP *cpi, const FIRSTPASS_STATS *this_frame_ptr) {
+ if (this_frame_ptr == NULL) return;
+
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
+ const int num_mbs = (cpi->oxcf.resize_cfg.resize_mode != RESIZE_NONE)
+ ? cpi->initial_mbs
+ : cpi->common.mi_params.MBs;
+ // The multiplication by 256 reverses a scaling factor of (>> 8)
+ // applied when combining MB error values for the frame.
+ twopass->mb_av_energy = log((this_frame_ptr->intra_error / num_mbs) + 1.0);
+
+ const FIRSTPASS_STATS *const total_stats =
+ twopass->stats_buf_ctx->total_stats;
+ if (is_fp_wavelet_energy_invalid(total_stats) == 0) {
+ twopass->frame_avg_haar_energy =
+ log((this_frame_ptr->frame_avg_wavelet_energy / num_mbs) + 1.0);
+ }
+
+ // Set the frame content type flag.
+ if (this_frame_ptr->intra_skip_pct >= FC_ANIMATION_THRESH)
+ twopass->fr_content_type = FC_GRAPHICS_ANIMATION;
+ else
+ twopass->fr_content_type = FC_NORMAL;
+}
+
static void process_first_pass_stats(AV1_COMP *cpi,
FIRSTPASS_STATS *this_frame) {
AV1_COMMON *const cm = &cpi->common;
CurrentFrame *const current_frame = &cm->current_frame;
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
+ FIRSTPASS_STATS *total_stats = twopass->stats_buf_ctx->total_stats;
+
+ if (current_frame->frame_number == 0) {
+ const GFConfig *const gf_cfg = &cpi->oxcf.gf_cfg;
+ const RateControlCfg *const rc_cfg = &cpi->oxcf.rc_cfg;
+ if (use_ml_model_to_decide_flat_gop(rc_cfg) && can_disable_altref(gf_cfg) &&
+ is_fp_stats_to_predict_flat_gop_invalid(total_stats)) {
+ // warn(
+ // "First pass stats required in the ML model to predict a flat GOP "
+ // "structure is invalid. Continuing encoding by disabling the ML "
+ // "model.\n");
+ // The first pass statistics like tr_coded_error, pcnt_third_ref,
+ // frame_avg_wavelet_energy are invalid as their calculations were
+ // skipped in the first pass of encoding. As these stats are required
+ // in the ML model to predict a flat GOP structure, the ML model would be
+ // disabled. This case arises when the encode configuration used in first
+ // pass encoding is different from second pass encoding.
+ }
+ }
if (cpi->oxcf.rc_cfg.mode != AOM_Q && current_frame->frame_number == 0 &&
- cpi->gf_group.index == 0 && cpi->twopass.stats_buf_ctx->total_stats &&
- cpi->twopass.stats_buf_ctx->total_left_stats) {
- if (cpi->lap_enabled) {
+ cpi->gf_frame_index == 0 && total_stats &&
+ cpi->ppi->twopass.stats_buf_ctx->total_left_stats) {
+ if (cpi->ppi->lap_enabled) {
/*
* Accumulate total_stats using available limited number of stats,
* and assign it to total_left_stats.
*/
- *cpi->twopass.stats_buf_ctx->total_left_stats =
- *cpi->twopass.stats_buf_ctx->total_stats;
+ *cpi->ppi->twopass.stats_buf_ctx->total_left_stats = *total_stats;
}
// Special case code for first frame.
const int section_target_bandwidth = get_section_target_bandwidth(cpi);
@@ -3406,43 +3468,25 @@ static void process_first_pass_stats(AV1_COMP *cpi,
rc->active_worst_quality = tmp_q;
rc->ni_av_qi = tmp_q;
rc->last_q[INTER_FRAME] = tmp_q;
- rc->avg_q = av1_convert_qindex_to_q(tmp_q, cm->seq_params.bit_depth);
+ rc->avg_q = av1_convert_qindex_to_q(tmp_q, cm->seq_params->bit_depth);
rc->avg_frame_qindex[INTER_FRAME] = tmp_q;
rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.rc_cfg.best_allowed_q) / 2;
rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME];
}
- int err = 0;
- if (cpi->lap_enabled) {
- err = input_stats_lap(twopass, this_frame);
+ if (cpi->ppi->lap_enabled) {
+ input_stats_lap(twopass, this_frame);
} else {
- err = input_stats(twopass, this_frame);
- }
- if (err == EOF) return;
-
- {
- const int num_mbs = (cpi->oxcf.resize_cfg.resize_mode != RESIZE_NONE)
- ? cpi->initial_mbs
- : cm->mi_params.MBs;
- // The multiplication by 256 reverses a scaling factor of (>> 8)
- // applied when combining MB error values for the frame.
- twopass->mb_av_energy = log((this_frame->intra_error / num_mbs) + 1.0);
- twopass->frame_avg_haar_energy =
- log((this_frame->frame_avg_wavelet_energy / num_mbs) + 1.0);
+ input_stats(twopass, this_frame);
}
-
- // Set the frame content type flag.
- if (this_frame->intra_skip_pct >= FC_ANIMATION_THRESH)
- twopass->fr_content_type = FC_GRAPHICS_ANIMATION;
- else
- twopass->fr_content_type = FC_NORMAL;
+ set_twopass_params_based_on_fp_stats(cpi, this_frame);
}
static void setup_target_rate(AV1_COMP *cpi) {
RATE_CONTROL *const rc = &cpi->rc;
- GF_GROUP *const gf_group = &cpi->gf_group;
+ GF_GROUP *const gf_group = &cpi->ppi->gf_group;
- int target_rate = gf_group->bit_allocation[gf_group->index];
+ int target_rate = gf_group->bit_allocation[cpi->gf_frame_index];
if (has_no_stats_stage(cpi)) {
av1_rc_set_frame_target(cpi, target_rate, cpi->common.width,
@@ -3452,24 +3496,160 @@ static void setup_target_rate(AV1_COMP *cpi) {
rc->base_frame_target = target_rate;
}
+static void mark_flashes(FIRSTPASS_STATS *first_stats,
+ FIRSTPASS_STATS *last_stats) {
+ FIRSTPASS_STATS *this_stats = first_stats, *next_stats;
+ while (this_stats < last_stats - 1) {
+ next_stats = this_stats + 1;
+ if (next_stats->pcnt_second_ref > next_stats->pcnt_inter &&
+ next_stats->pcnt_second_ref >= 0.5) {
+ this_stats->is_flash = 1;
+ } else {
+ this_stats->is_flash = 0;
+ }
+ this_stats = next_stats;
+ }
+ // We always treat the last one as none flash.
+ if (last_stats - 1 >= first_stats) {
+ (last_stats - 1)->is_flash = 0;
+ }
+}
+
+// Estimate the noise variance of each frame from the first pass stats
+static void estimate_noise(FIRSTPASS_STATS *first_stats,
+ FIRSTPASS_STATS *last_stats) {
+ FIRSTPASS_STATS *this_stats, *next_stats;
+ double C1, C2, C3, noise;
+ int count = 0;
+ for (this_stats = first_stats + 2; this_stats < last_stats; this_stats++) {
+ this_stats->noise_var = 0.0;
+ // flashes tend to have high correlation of innovations, so ignore them.
+ if (this_stats->is_flash || (this_stats - 1)->is_flash ||
+ (this_stats - 2)->is_flash)
+ continue;
+
+ C1 = (this_stats - 1)->intra_error *
+ (this_stats->intra_error - this_stats->coded_error);
+ C2 = (this_stats - 2)->intra_error *
+ ((this_stats - 1)->intra_error - (this_stats - 1)->coded_error);
+ C3 = (this_stats - 2)->intra_error *
+ (this_stats->intra_error - this_stats->sr_coded_error);
+ if (C1 <= 0 || C2 <= 0 || C3 <= 0) continue;
+ C1 = sqrt(C1);
+ C2 = sqrt(C2);
+ C3 = sqrt(C3);
+
+ noise = (this_stats - 1)->intra_error - C1 * C2 / C3;
+ noise = AOMMAX(noise, 0.01);
+ this_stats->noise_var = noise;
+ count++;
+ }
+
+ // Copy noise from the neighbor if the noise value is not trustworthy
+ for (this_stats = first_stats + 2; this_stats < last_stats; this_stats++) {
+ if (this_stats->is_flash || (this_stats - 1)->is_flash ||
+ (this_stats - 2)->is_flash)
+ continue;
+ if (this_stats->noise_var < 1.0) {
+ int found = 0;
+ // TODO(bohanli): consider expanding to two directions at the same time
+ for (next_stats = this_stats + 1; next_stats < last_stats; next_stats++) {
+ if (next_stats->is_flash || (next_stats - 1)->is_flash ||
+ (next_stats - 2)->is_flash || next_stats->noise_var < 1.0)
+ continue;
+ found = 1;
+ this_stats->noise_var = next_stats->noise_var;
+ break;
+ }
+ if (found) continue;
+ for (next_stats = this_stats - 1; next_stats >= first_stats + 2;
+ next_stats--) {
+ if (next_stats->is_flash || (next_stats - 1)->is_flash ||
+ (next_stats - 2)->is_flash || next_stats->noise_var < 1.0)
+ continue;
+ this_stats->noise_var = next_stats->noise_var;
+ break;
+ }
+ }
+ }
+
+ // copy the noise if this is a flash
+ for (this_stats = first_stats + 2; this_stats < last_stats; this_stats++) {
+ if (this_stats->is_flash || (this_stats - 1)->is_flash ||
+ (this_stats - 2)->is_flash) {
+ int found = 0;
+ for (next_stats = this_stats + 1; next_stats < last_stats; next_stats++) {
+ if (next_stats->is_flash || (next_stats - 1)->is_flash ||
+ (next_stats - 2)->is_flash)
+ continue;
+ found = 1;
+ this_stats->noise_var = next_stats->noise_var;
+ break;
+ }
+ if (found) continue;
+ for (next_stats = this_stats - 1; next_stats >= first_stats + 2;
+ next_stats--) {
+ if (next_stats->is_flash || (next_stats - 1)->is_flash ||
+ (next_stats - 2)->is_flash)
+ continue;
+ this_stats->noise_var = next_stats->noise_var;
+ break;
+ }
+ }
+ }
+
+ // if we are at the first 2 frames, copy the noise
+ for (this_stats = first_stats;
+ this_stats < first_stats + 2 && (first_stats + 2) < last_stats;
+ this_stats++) {
+ this_stats->noise_var = (first_stats + 2)->noise_var;
+ }
+}
+
+// Estimate correlation coefficient of each frame with its previous frame.
+static void estimate_coeff(FIRSTPASS_STATS *first_stats,
+ FIRSTPASS_STATS *last_stats) {
+ FIRSTPASS_STATS *this_stats;
+ for (this_stats = first_stats + 1; this_stats < last_stats; this_stats++) {
+ const double C =
+ sqrt(AOMMAX((this_stats - 1)->intra_error *
+ (this_stats->intra_error - this_stats->coded_error),
+ 0.001));
+ const double cor_coeff =
+ C /
+ AOMMAX((this_stats - 1)->intra_error - this_stats->noise_var, 0.001);
+
+ this_stats->cor_coeff =
+ cor_coeff *
+ sqrt(AOMMAX((this_stats - 1)->intra_error - this_stats->noise_var,
+ 0.001) /
+ AOMMAX(this_stats->intra_error - this_stats->noise_var, 0.001));
+ // clip correlation coefficient.
+ this_stats->cor_coeff = AOMMIN(AOMMAX(this_stats->cor_coeff, 0), 1);
+ }
+ first_stats->cor_coeff = 1.0;
+}
+
void av1_get_second_pass_params(AV1_COMP *cpi,
EncodeFrameParams *const frame_params,
const EncodeFrameInput *const frame_input,
unsigned int frame_flags) {
RATE_CONTROL *const rc = &cpi->rc;
- TWO_PASS *const twopass = &cpi->twopass;
- GF_GROUP *const gf_group = &cpi->gf_group;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
+ GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
const FIRSTPASS_STATS *const start_pos = twopass->stats_in;
if (is_stat_consumption_stage(cpi) && !twopass->stats_in) return;
- const int update_type = gf_group->update_type[gf_group->index];
- frame_params->frame_type = gf_group->frame_type[gf_group->index];
+ assert(twopass->stats_in != NULL);
+ const int update_type = gf_group->update_type[cpi->gf_frame_index];
+ frame_params->frame_type = gf_group->frame_type[cpi->gf_frame_index];
- if (gf_group->index < gf_group->size && !(frame_flags & FRAMEFLAGS_KEY)) {
- assert(gf_group->index < gf_group->size);
+ if (cpi->gf_frame_index < gf_group->size && !(frame_flags & FRAMEFLAGS_KEY)) {
+ assert(cpi->gf_frame_index < gf_group->size);
setup_target_rate(cpi);
@@ -3481,6 +3661,9 @@ void av1_get_second_pass_params(AV1_COMP *cpi,
if (cpi->sf.part_sf.allow_partition_search_skip && oxcf->pass == 2) {
cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
}
+ const FIRSTPASS_STATS *const this_frame_ptr = read_frame_stats(
+ twopass, gf_group->arf_src_offset[cpi->gf_frame_index]);
+ set_twopass_params_based_on_fp_stats(cpi, this_frame_ptr);
return;
}
}
@@ -3493,7 +3676,7 @@ void av1_get_second_pass_params(AV1_COMP *cpi,
av1_zero(this_frame);
// call above fn
if (is_stat_consumption_stage(cpi)) {
- if (gf_group->index < gf_group->size || rc->frames_to_key == 0)
+ if (cpi->gf_frame_index < gf_group->size || rc->frames_to_key == 0)
process_first_pass_stats(cpi, &this_frame);
} else {
rc->active_worst_quality = oxcf->rc_cfg.cq_level;
@@ -3504,7 +3687,7 @@ void av1_get_second_pass_params(AV1_COMP *cpi,
this_frame_copy = this_frame;
int is_overlay_forward_kf =
rc->frames_to_key == 0 &&
- gf_group->update_type[gf_group->index] == OVERLAY_UPDATE;
+ gf_group->update_type[cpi->gf_frame_index] == OVERLAY_UPDATE;
if (rc->frames_to_key <= 0 && !is_overlay_forward_kf) {
assert(rc->frames_to_key >= -1);
// Define next KF group and assign bits to it.
@@ -3554,12 +3737,12 @@ void av1_get_second_pass_params(AV1_COMP *cpi,
}
// Define a new GF/ARF group. (Should always enter here for key frames).
- if (gf_group->index == gf_group->size) {
+ if (cpi->gf_frame_index == gf_group->size) {
assert(cpi->common.current_frame.frame_number == 0 ||
- gf_group->index == gf_group->size);
+ cpi->gf_frame_index == gf_group->size);
const FIRSTPASS_STATS *const start_position = twopass->stats_in;
- if (cpi->lap_enabled && cpi->rc.enable_scenecut_detection) {
+ if (cpi->ppi->lap_enabled && cpi->ppi->p_rc.enable_scenecut_detection) {
int num_frames_to_detect_scenecut, frames_to_key;
num_frames_to_detect_scenecut = MAX_GF_LENGTH_LAP + 1;
frames_to_key = define_kf_interval(cpi, &this_frame, NULL,
@@ -3578,41 +3761,45 @@ void av1_get_second_pass_params(AV1_COMP *cpi,
: MAX_GF_LENGTH_LAP;
// Identify regions if needed.
+ // TODO(bohanli): identify regions for all stats available.
if (rc->frames_since_key == 0 || rc->frames_since_key == 1 ||
- (rc->frames_till_regions_update - rc->frames_since_key <
+ (p_rc->frames_till_regions_update - rc->frames_since_key <
rc->frames_to_key &&
- rc->frames_till_regions_update - rc->frames_since_key <
+ p_rc->frames_till_regions_update - rc->frames_since_key <
max_gop_length + 1)) {
- int is_first_stat =
- twopass->stats_in == twopass->stats_buf_ctx->stats_in_start;
- const FIRSTPASS_STATS *stats_start = twopass->stats_in + is_first_stat;
- // offset of stats_start from the current frame
- int offset = is_first_stat || (rc->frames_since_key == 0);
- // offset of the region indices from the previous key frame
- rc->regions_offset = rc->frames_since_key;
// how many frames we can analyze from this frame
- int rest_frames = AOMMIN(rc->frames_to_key + rc->next_is_fwd_key,
+ int rest_frames = AOMMIN(rc->frames_to_key + p_rc->next_is_fwd_key,
MAX_FIRSTPASS_ANALYSIS_FRAMES);
- rest_frames =
- AOMMIN(rest_frames,
- (int)(twopass->stats_buf_ctx->stats_in_end - stats_start + 1) +
- offset);
-
- rc->frames_till_regions_update = rest_frames;
-
- identify_regions(stats_start, rest_frames - offset, offset, rc->regions,
- &rc->num_regions, rc->cor_coeff);
+ rest_frames = AOMMIN(
+ rest_frames, (int)(twopass->stats_buf_ctx->stats_in_end -
+ twopass->stats_in + (rc->frames_since_key == 0)));
+ p_rc->frames_till_regions_update = rest_frames;
+
+ if (cpi->ppi->lap_enabled) {
+ mark_flashes(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+ estimate_noise(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+ estimate_coeff(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+ identify_regions(twopass->stats_in, rest_frames,
+ (rc->frames_since_key == 0), p_rc->regions,
+ &p_rc->num_regions);
+ } else {
+ identify_regions(twopass->stats_in - (rc->frames_since_key == 0),
+ rest_frames, 0, p_rc->regions, &p_rc->num_regions);
+ }
}
int cur_region_idx =
- find_regions_index(rc->regions, rc->num_regions,
- rc->frames_since_key - rc->regions_offset);
+ find_regions_index(p_rc->regions, p_rc->num_regions,
+ rc->frames_since_key - p_rc->regions_offset);
if ((cur_region_idx >= 0 &&
- rc->regions[cur_region_idx].type == SCENECUT_REGION) ||
+ p_rc->regions[cur_region_idx].type == SCENECUT_REGION) ||
rc->frames_since_key == 0) {
// If we start from a scenecut, then the last GOP's arf boost is not
// needed for this GOP.
- cpi->gf_state.arf_gf_boost_lst = 0;
+ cpi->ppi->gf_state.arf_gf_boost_lst = 0;
}
// TODO(jingning): Resoleve the redundant calls here.
@@ -3621,62 +3808,49 @@ void av1_get_second_pass_params(AV1_COMP *cpi,
}
if (max_gop_length > 16 && oxcf->algo_cfg.enable_tpl_model &&
- !cpi->sf.tpl_sf.disable_gop_length_decision) {
- int this_idx = rc->frames_since_key + rc->gf_intervals[rc->cur_gf_index] -
- rc->regions_offset - 1;
+ cpi->sf.tpl_sf.gop_length_decision_method != 3) {
+ int this_idx = rc->frames_since_key +
+ p_rc->gf_intervals[p_rc->cur_gf_index] -
+ p_rc->regions_offset - 1;
int this_region =
- find_regions_index(rc->regions, rc->num_regions, this_idx);
+ find_regions_index(p_rc->regions, p_rc->num_regions, this_idx);
int next_region =
- find_regions_index(rc->regions, rc->num_regions, this_idx + 1);
+ find_regions_index(p_rc->regions, p_rc->num_regions, this_idx + 1);
int is_last_scenecut =
- (rc->gf_intervals[rc->cur_gf_index] >= rc->frames_to_key ||
- rc->regions[this_region].type == SCENECUT_REGION ||
- rc->regions[next_region].type == SCENECUT_REGION);
- int ori_gf_int = rc->gf_intervals[rc->cur_gf_index];
+ (p_rc->gf_intervals[p_rc->cur_gf_index] >= rc->frames_to_key ||
+ p_rc->regions[this_region].type == SCENECUT_REGION ||
+ p_rc->regions[next_region].type == SCENECUT_REGION);
+ int ori_gf_int = p_rc->gf_intervals[p_rc->cur_gf_index];
- if (rc->gf_intervals[rc->cur_gf_index] > 16) {
+ if (p_rc->gf_intervals[p_rc->cur_gf_index] > 16 &&
+ rc->min_gf_interval <= 16) {
// The calculate_gf_length function is previously used with
// max_gop_length = 32 with look-ahead gf intervals.
define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 0);
this_frame = this_frame_copy;
- int is_temporal_filter_enabled =
- (rc->frames_since_key > 0 && gf_group->arf_index > -1);
- if (is_temporal_filter_enabled) {
- int arf_src_index = gf_group->arf_src_offset[gf_group->arf_index];
- FRAME_UPDATE_TYPE arf_update_type =
- gf_group->update_type[gf_group->arf_index];
- int is_forward_keyframe = 0;
- av1_temporal_filter(cpi, arf_src_index, arf_update_type,
- is_forward_keyframe, NULL);
- aom_extend_frame_borders(&cpi->alt_ref_buffer,
- av1_num_planes(&cpi->common));
- }
- if (!av1_tpl_setup_stats(cpi, 1, frame_params, frame_input)) {
- // Tpl decides that a shorter gf interval is better.
+
+ if (is_shorter_gf_interval_better(cpi, frame_params, frame_input)) {
+ // A shorter gf interval is better.
// TODO(jingning): Remove redundant computations here.
max_gop_length = 16;
calculate_gf_length(cpi, max_gop_length, 1);
if (is_last_scenecut &&
- (ori_gf_int - rc->gf_intervals[rc->cur_gf_index] < 4)) {
- rc->gf_intervals[rc->cur_gf_index] = ori_gf_int;
+ (ori_gf_int - p_rc->gf_intervals[p_rc->cur_gf_index] < 4)) {
+ p_rc->gf_intervals[p_rc->cur_gf_index] = ori_gf_int;
}
- } else {
- // Tpl stats is reused only when the ARF frame is temporally filtered
- if (is_temporal_filter_enabled)
- cpi->tpl_data.skip_tpl_setup_stats = 1;
}
}
}
define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 0);
- if (gf_group->update_type[gf_group->index] != ARF_UPDATE &&
+ if (gf_group->update_type[cpi->gf_frame_index] != ARF_UPDATE &&
rc->frames_since_key > 0)
process_first_pass_stats(cpi, &this_frame);
define_gf_group(cpi, &this_frame, frame_params, max_gop_length, 1);
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
- assert(gf_group->index == 0);
+ rc->frames_till_gf_update_due = p_rc->baseline_gf_interval;
+ assert(cpi->gf_frame_index == 0);
#if ARF_STATS_OUTPUT
{
FILE *fpfile;
@@ -3684,18 +3858,22 @@ void av1_get_second_pass_params(AV1_COMP *cpi,
++arf_count;
fprintf(fpfile, "%10d %10d %10d %10d %10d\n",
cpi->common.current_frame.frame_number,
- rc->frames_till_gf_update_due, rc->kf_boost, arf_count,
- rc->gfu_boost);
+ rc->frames_till_gf_update_due, cpi->ppi->p_rc.kf_boost, arf_count,
+ p_rc->gfu_boost);
fclose(fpfile);
}
#endif
}
- assert(gf_group->index < gf_group->size);
+ assert(cpi->gf_frame_index < gf_group->size);
- if (gf_group->update_type[gf_group->index] == ARF_UPDATE ||
- gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE) {
+ if (gf_group->update_type[cpi->gf_frame_index] == ARF_UPDATE ||
+ gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE) {
reset_fpf_position(twopass, start_pos);
+
+ const FIRSTPASS_STATS *const this_frame_ptr = read_frame_stats(
+ twopass, gf_group->arf_src_offset[cpi->gf_frame_index]);
+ set_twopass_params_based_on_fp_stats(cpi, this_frame_ptr);
} else {
// Update the total stats remaining structure.
if (twopass->stats_buf_ctx->total_left_stats)
@@ -3703,7 +3881,7 @@ void av1_get_second_pass_params(AV1_COMP *cpi,
&this_frame_copy);
}
- frame_params->frame_type = gf_group->frame_type[gf_group->index];
+ frame_params->frame_type = gf_group->frame_type[cpi->gf_frame_index];
// Do the firstpass stats indicate that this frame is skippable for the
// partition search?
@@ -3716,13 +3894,20 @@ void av1_get_second_pass_params(AV1_COMP *cpi,
void av1_init_second_pass(AV1_COMP *cpi) {
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
FRAME_INFO *const frame_info = &cpi->frame_info;
double frame_rate;
FIRSTPASS_STATS *stats;
if (!twopass->stats_buf_ctx->stats_in_end) return;
+ mark_flashes(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+ estimate_noise(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+ estimate_coeff(twopass->stats_buf_ctx->stats_in_start,
+ twopass->stats_buf_ctx->stats_in_end);
+
stats = twopass->stats_buf_ctx->total_stats;
*stats = *twopass->stats_buf_ctx->stats_in_end;
@@ -3779,7 +3964,7 @@ void av1_init_second_pass(AV1_COMP *cpi) {
}
void av1_init_single_pass_lap(AV1_COMP *cpi) {
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
if (!twopass->stats_buf_ctx->stats_in_end) return;
@@ -3813,7 +3998,7 @@ void av1_init_single_pass_lap(AV1_COMP *cpi) {
#define MINQ_ADJ_LIMIT_CQ 20
#define HIGH_UNDERSHOOT_RATIO 2
void av1_twopass_postencode_update(AV1_COMP *cpi) {
- TWO_PASS *const twopass = &cpi->twopass;
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
RATE_CONTROL *const rc = &cpi->rc;
const RateControlCfg *const rc_cfg = &cpi->oxcf.rc_cfg;
@@ -3840,7 +4025,8 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
// Update the active best quality pyramid.
if (!rc->is_src_frame_alt_ref) {
- const int pyramid_level = cpi->gf_group.layer_depth[cpi->gf_group.index];
+ const int pyramid_level =
+ cpi->ppi->gf_group.layer_depth[cpi->gf_frame_index];
int i;
for (i = pyramid_level; i <= MAX_ARF_LAYERS; ++i) {
rc->active_best_quality[i] = cpi->common.quant_params.base_qindex;
@@ -3871,9 +4057,9 @@ void av1_twopass_postencode_update(AV1_COMP *cpi) {
(double)twopass->rolling_arf_group_target_bits,
twopass->bpm_factor,
av1_convert_qindex_to_q(cpi->common.quant_params.base_qindex,
- cm->seq_params.bit_depth),
+ cm->seq_params->bit_depth),
av1_convert_qindex_to_q(rc->active_worst_quality,
- cm->seq_params.bit_depth));
+ cm->seq_params->bit_depth));
fclose(fpfile);
}
#endif
diff --git a/third_party/libaom/source/libaom/av1/encoder/pickcdef.c b/third_party/libaom/source/libaom/av1/encoder/pickcdef.c
index 55e466d601..f9758343dc 100644
--- a/third_party/libaom/source/libaom/av1/encoder/pickcdef.c
+++ b/third_party/libaom/source/libaom/av1/encoder/pickcdef.c
@@ -454,13 +454,13 @@ static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
(mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
cdef_search_ctx->nhfb =
(mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
- cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
+ cdef_search_ctx->coeff_shift = AOMMAX(cm->seq_params->bit_depth - 8, 0);
cdef_search_ctx->damping = 3 + (cm->quant_params.base_qindex >> 6);
cdef_search_ctx->total_strengths = nb_cdef_strengths[pick_method];
cdef_search_ctx->num_planes = num_planes;
cdef_search_ctx->pick_method = pick_method;
cdef_search_ctx->sb_count = 0;
- av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
+ av1_setup_dst_planes(xd->plane, cm->seq_params->sb_size, frame, 0, 0, 0,
num_planes);
// Initialize plane wise information.
for (int pli = 0; pli < num_planes; pli++) {
@@ -478,7 +478,7 @@ static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
}
// Function pointer initialization.
#if CONFIG_AV1_HIGHBITDEPTH
- if (cm->seq_params.use_highbitdepth) {
+ if (cm->seq_params->use_highbitdepth) {
cdef_search_ctx->copy_fn = copy_sb16_16_highbd;
cdef_search_ctx->compute_cdef_dist_fn = compute_cdef_dist_highbd;
} else {
@@ -491,13 +491,20 @@ static AOM_INLINE void cdef_params_init(const YV12_BUFFER_CONFIG *frame,
#endif
}
-static void pick_cdef_from_qp(AV1_COMMON *const cm) {
- const int bd = cm->seq_params.bit_depth;
+static void pick_cdef_from_qp(AV1_COMMON *const cm, int skip_cdef,
+ int frames_since_key) {
+ const int bd = cm->seq_params->bit_depth;
const int q =
av1_ac_quant_QTX(cm->quant_params.base_qindex, 0, bd) >> (bd - 8);
CdefInfo *const cdef_info = &cm->cdef_info;
- cdef_info->cdef_bits = 0;
- cdef_info->nb_cdef_strengths = 1;
+ // Check the speed feature to avoid extra signaling.
+ if (skip_cdef) {
+ cdef_info->cdef_bits = 1;
+ cdef_info->nb_cdef_strengths = 2;
+ } else {
+ cdef_info->cdef_bits = 0;
+ cdef_info->nb_cdef_strengths = 1;
+ }
cdef_info->cdef_damping = 3 + (cm->quant_params.base_qindex >> 6);
int predicted_y_f1 = 0;
@@ -537,13 +544,22 @@ static void pick_cdef_from_qp(AV1_COMMON *const cm) {
cdef_info->cdef_uv_strengths[0] =
predicted_uv_f1 * CDEF_SEC_STRENGTHS + predicted_uv_f2;
+ if (skip_cdef) {
+ cdef_info->cdef_strengths[1] = 0;
+ cdef_info->cdef_uv_strengths[1] = 0;
+ }
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int nvfb = (mi_params->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
const int nhfb = (mi_params->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
MB_MODE_INFO **mbmi = mi_params->mi_grid_base;
for (int r = 0; r < nvfb; ++r) {
for (int c = 0; c < nhfb; ++c) {
- mbmi[MI_SIZE_64X64 * c]->cdef_strength = 0;
+ MB_MODE_INFO *current_mbmi = mbmi[MI_SIZE_64X64 * c];
+ current_mbmi->cdef_strength = 0;
+ if (skip_cdef && current_mbmi->skip_cdef_curr_sb &&
+ frames_since_key > 10) {
+ current_mbmi->cdef_strength = 1;
+ }
}
mbmi += MI_SIZE_64X64 * mi_params->mi_stride;
}
@@ -551,10 +567,10 @@ static void pick_cdef_from_qp(AV1_COMMON *const cm) {
void av1_cdef_search(MultiThreadInfo *mt_info, const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
- MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method,
- int rdmult) {
+ MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult,
+ int skip_cdef_feature, int frames_since_key) {
if (pick_method == CDEF_PICK_FROM_Q) {
- pick_cdef_from_qp(cm);
+ pick_cdef_from_qp(cm, skip_cdef_feature, frames_since_key);
return;
}
const CommonModeInfoParams *const mi_params = &cm->mi_params;
diff --git a/third_party/libaom/source/libaom/av1/encoder/pickcdef.h b/third_party/libaom/source/libaom/av1/encoder/pickcdef.h
index 7fe1edb695..6bea1b0945 100644
--- a/third_party/libaom/source/libaom/av1/encoder/pickcdef.h
+++ b/third_party/libaom/source/libaom/av1/encoder/pickcdef.h
@@ -58,20 +58,6 @@ typedef uint64_t (*compute_cdef_dist_t)(void *dst, int dstride, uint16_t *src,
BLOCK_SIZE bsize, int coeff_shift,
int row, int col);
-// Data related to CDEF search multi-thread synchronization.
-typedef struct AV1CdefSyncData {
-#if CONFIG_MULTITHREAD
- // Mutex lock used while dispatching jobs.
- pthread_mutex_t *mutex_;
-#endif // CONFIG_MULTITHREAD
- // Flag to indicate all blocks are processed and end of frame is reached
- int end_of_frame;
- // Row index in units of 64x64 block
- int fbr;
- // Column index in units of 64x64 block
- int fbc;
-} AV1CdefSync;
-
/*! \brief CDEF search context.
*/
typedef struct {
@@ -224,6 +210,8 @@ void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
* \param[in] xd Pointer to common current coding block structure
* \param[in] pick_method The method used to select params
* \param[in] rdmult rd multiplier to use in making param choices
+ * \param[in] skip_cdef_feature Speed feature to skip cdef
+ * \param[in] frames_since_key Number of frames since key frame
*
* \return Nothing is returned. Instead, optimal CDEF parameters are stored
* in the \c cdef_info structure of type \ref CdefInfo inside \c cm:
@@ -239,7 +227,8 @@ void av1_cdef_mse_calc_block(CdefSearchCtx *cdef_search_ctx, int fbr, int fbc,
void av1_cdef_search(struct MultiThreadInfo *mt_info,
const YV12_BUFFER_CONFIG *frame,
const YV12_BUFFER_CONFIG *ref, AV1_COMMON *cm,
- MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult);
+ MACROBLOCKD *xd, CDEF_PICK_METHOD pick_method, int rdmult,
+ int skip_cdef_feature, int frames_since_key);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/libaom/source/libaom/av1/encoder/picklpf.c b/third_party/libaom/source/libaom/av1/encoder/picklpf.c
index 9b3924f5ce..44030767b5 100644
--- a/third_party/libaom/source/libaom/av1/encoder/picklpf.c
+++ b/third_party/libaom/source/libaom/av1/encoder/picklpf.c
@@ -39,8 +39,8 @@ static void yv12_copy_plane(const YV12_BUFFER_CONFIG *src_bc,
int av1_get_max_filter_level(const AV1_COMP *cpi) {
if (is_stat_consumption_stage_twopass(cpi)) {
- return cpi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
- : MAX_LOOP_FILTER;
+ return cpi->ppi->twopass.section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4
+ : MAX_LOOP_FILTER;
} else {
return MAX_LOOP_FILTER;
}
@@ -78,16 +78,16 @@ static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd,
0,
#endif
mt_info->workers, num_workers,
- &mt_info->lf_row_sync);
+ &mt_info->lf_row_sync, 0);
else
av1_loop_filter_frame(&cm->cur_frame->buf, cm, &cpi->td.mb.e_mbd,
#if CONFIG_LPF_MASK
0,
#endif
- plane, plane + 1, partial_frame);
+ plane, plane + 1, partial_frame, 0);
filt_err = aom_get_sse_plane(sd, &cm->cur_frame->buf, plane,
- cm->seq_params.use_highbitdepth);
+ cm->seq_params->use_highbitdepth);
// Re-instate the unfiltered frame
yv12_copy_plane(&cpi->last_frame_uf, &cm->cur_frame->buf, plane);
@@ -153,8 +153,8 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step;
if ((is_stat_consumption_stage_twopass(cpi)) &&
- (cpi->twopass.section_intra_rating < 20))
- bias = (bias * cpi->twopass.section_intra_rating) / 20;
+ (cpi->ppi->twopass.section_intra_rating < 20))
+ bias = (bias * cpi->ppi->twopass.section_intra_rating) / 20;
// yx, bias less for large block size
if (cm->features.tx_mode != ONLY_4X4) bias >>= 1;
@@ -205,7 +205,7 @@ static int search_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
if (best_cost_ret)
*best_cost_ret = RDCOST_DBL_WITH_NATIVE_BD_DIST(
- x->rdmult, 0, (best_err << 4), cm->seq_params.bit_depth);
+ x->rdmult, 0, (best_err << 4), cm->seq_params->bit_depth);
return filt_best;
}
@@ -226,7 +226,7 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
const int min_filter_level = 0;
const int max_filter_level = av1_get_max_filter_level(cpi);
const int q = av1_ac_quant_QTX(cm->quant_params.base_qindex, 0,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
// based on tests result for rtc test set
// 0.04590 boosted or 0.02295 non-booseted in 18-bit fixed point
const int strength_boost_q_treshold = 0;
@@ -244,7 +244,7 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
// And high bit depth separately:
// filt_guess = q * 0.316206 + 3.87252
int filt_guess;
- switch (cm->seq_params.bit_depth) {
+ switch (cm->seq_params->bit_depth) {
case AOM_BITS_8:
filt_guess =
(cm->current_frame.frame_type == KEY_FRAME)
@@ -263,7 +263,7 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
"or AOM_BITS_12");
return;
}
- if (cm->seq_params.bit_depth != AOM_BITS_8 &&
+ if (cm->seq_params->bit_depth != AOM_BITS_8 &&
cm->current_frame.frame_type == KEY_FRAME)
filt_guess -= 4;
// TODO(chengchen): retrain the model for Y, U, V filter levels
@@ -272,10 +272,20 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
lf->filter_level_u = clamp(filt_guess, min_filter_level, max_filter_level);
lf->filter_level_v = clamp(filt_guess, min_filter_level, max_filter_level);
} else {
- const int last_frame_filter_level[4] = { lf->filter_level[0],
- lf->filter_level[1],
- lf->filter_level_u,
- lf->filter_level_v };
+ int last_frame_filter_level[4] = { 0 };
+ if (!frame_is_intra_only(cm)) {
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ last_frame_filter_level[0] = cpi->ppi->filter_level[0];
+ last_frame_filter_level[1] = cpi->ppi->filter_level[1];
+ last_frame_filter_level[2] = cpi->ppi->filter_level_u;
+ last_frame_filter_level[3] = cpi->ppi->filter_level_v;
+#else
+ last_frame_filter_level[0] = lf->filter_level[0];
+ last_frame_filter_level[1] = lf->filter_level[1];
+ last_frame_filter_level[2] = lf->filter_level_u;
+ last_frame_filter_level[3] = lf->filter_level_v;
+#endif
+ }
lf->filter_level[0] = lf->filter_level[1] =
search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
@@ -297,5 +307,14 @@ void av1_pick_filter_level(const YV12_BUFFER_CONFIG *sd, AV1_COMP *cpi,
search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE,
last_frame_filter_level, NULL, 2, 0);
}
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Store current frame loopfilter levels if update flag is set.
+ if (cpi->do_frame_data_update) {
+ cpi->ppi->filter_level[0] = lf->filter_level[0];
+ cpi->ppi->filter_level[1] = lf->filter_level[1];
+ cpi->ppi->filter_level_u = lf->filter_level_u;
+ cpi->ppi->filter_level_v = lf->filter_level_v;
+ }
+#endif
}
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/pickrst.c b/third_party/libaom/source/libaom/av1/encoder/pickrst.c
index 21965138be..2c12cb014f 100644
--- a/third_party/libaom/source/libaom/av1/encoder/pickrst.c
+++ b/third_party/libaom/source/libaom/av1/encoder/pickrst.c
@@ -199,8 +199,8 @@ static int64_t try_restoration_unit(const RestSearchCtxt *rsc,
const int is_uv = plane > 0;
const RestorationInfo *rsi = &cm->rst_info[plane];
RestorationLineBuffers rlbs;
- const int bit_depth = cm->seq_params.bit_depth;
- const int highbd = cm->seq_params.use_highbitdepth;
+ const int bit_depth = cm->seq_params->bit_depth;
+ const int highbd = cm->seq_params->use_highbitdepth;
const YV12_BUFFER_CONFIG *fts = &cm->cur_frame->buf;
// TODO(yunqing): For now, only use optimized LR filter in decoder. Can be
@@ -209,8 +209,8 @@ static int64_t try_restoration_unit(const RestSearchCtxt *rsc,
av1_loop_restoration_filter_unit(
limits, rui, &rsi->boundaries, &rlbs, tile_rect, rsc->tile_stripe0,
- is_uv && cm->seq_params.subsampling_x,
- is_uv && cm->seq_params.subsampling_y, highbd, bit_depth,
+ is_uv && cm->seq_params->subsampling_x,
+ is_uv && cm->seq_params->subsampling_y, highbd, bit_depth,
fts->buffers[plane], fts->strides[is_uv], rsc->dst->buffers[plane],
rsc->dst->strides[is_uv], cm->rst_tmpbuf, optimized_lr);
@@ -886,8 +886,8 @@ static AOM_INLINE void search_sgrproj(const RestorationTileLimits *limits,
const MACROBLOCK *const x = rsc->x;
const AV1_COMMON *const cm = rsc->cm;
- const int highbd = cm->seq_params.use_highbitdepth;
- const int bit_depth = cm->seq_params.bit_depth;
+ const int highbd = cm->seq_params->use_highbitdepth;
+ const int bit_depth = cm->seq_params->bit_depth;
const int64_t bits_none = x->mode_costs.sgrproj_restore_cost[0];
// Prune evaluation of RESTORE_SGRPROJ if 'skip_sgr_eval' is set
@@ -905,8 +905,8 @@ static AOM_INLINE void search_sgrproj(const RestorationTileLimits *limits,
rsc->src_buffer + limits->v_start * rsc->src_stride + limits->h_start;
const int is_uv = rsc->plane > 0;
- const int ss_x = is_uv && cm->seq_params.subsampling_x;
- const int ss_y = is_uv && cm->seq_params.subsampling_y;
+ const int ss_x = is_uv && cm->seq_params->subsampling_x;
+ const int ss_y = is_uv && cm->seq_params->subsampling_y;
const int procunit_width = RESTORATION_PROC_UNIT_SIZE >> ss_x;
const int procunit_height = RESTORATION_PROC_UNIT_SIZE >> ss_y;
@@ -1474,12 +1474,12 @@ static AOM_INLINE void search_wiener(const RestorationTileLimits *limits,
const int scale[3] = { 0, 1, 2 };
// Obtain the normalized Qscale
const int qs = av1_dc_quant_QTX(rsc->cm->quant_params.base_qindex, 0,
- rsc->cm->seq_params.bit_depth) >>
+ rsc->cm->seq_params->bit_depth) >>
3;
// Derive threshold as sqr(normalized Qscale) * scale / 16,
const uint64_t thresh =
(qs * qs * scale[rsc->lpf_sf->prune_wiener_based_on_src_var]) >> 4;
- const int highbd = rsc->cm->seq_params.use_highbitdepth;
+ const int highbd = rsc->cm->seq_params->use_highbitdepth;
const uint64_t src_var =
var_restoration_unit(limits, rsc->src, rsc->plane, highbd);
// Do not perform Wiener search if source variance is lower than threshold
@@ -1510,11 +1510,11 @@ static AOM_INLINE void search_wiener(const RestorationTileLimits *limits,
#if CONFIG_AV1_HIGHBITDEPTH
const AV1_COMMON *const cm = rsc->cm;
- if (cm->seq_params.use_highbitdepth) {
+ if (cm->seq_params->use_highbitdepth) {
av1_compute_stats_highbd(reduced_wiener_win, rsc->dgd_buffer,
rsc->src_buffer, limits->h_start, limits->h_end,
limits->v_start, limits->v_end, rsc->dgd_stride,
- rsc->src_stride, M, H, cm->seq_params.bit_depth);
+ rsc->src_stride, M, H, cm->seq_params->bit_depth);
} else {
av1_compute_stats(reduced_wiener_win, rsc->dgd_buffer, rsc->src_buffer,
limits->h_start, limits->h_end, limits->v_start,
@@ -1567,10 +1567,10 @@ static AOM_INLINE void search_wiener(const RestorationTileLimits *limits,
double cost_none = RDCOST_DBL_WITH_NATIVE_BD_DIST(
x->rdmult, bits_none >> 4, rusi->sse[RESTORE_NONE],
- rsc->cm->seq_params.bit_depth);
+ rsc->cm->seq_params->bit_depth);
double cost_wiener = RDCOST_DBL_WITH_NATIVE_BD_DIST(
x->rdmult, bits_wiener >> 4, rusi->sse[RESTORE_WIENER],
- rsc->cm->seq_params.bit_depth);
+ rsc->cm->seq_params->bit_depth);
RestorationType rtype =
(cost_wiener < cost_none) ? RESTORE_WIENER : RESTORE_NONE;
@@ -1601,7 +1601,7 @@ static AOM_INLINE void search_norestore(const RestorationTileLimits *limits,
RestSearchCtxt *rsc = (RestSearchCtxt *)priv;
RestUnitSearchInfo *rusi = &rsc->rusi[rest_unit_idx];
- const int highbd = rsc->cm->seq_params.use_highbitdepth;
+ const int highbd = rsc->cm->seq_params->use_highbitdepth;
rusi->sse[RESTORE_NONE] = sse_restoration_unit(
limits, rsc->src, &rsc->cm->cur_frame->buf, rsc->plane, highbd);
@@ -1653,8 +1653,8 @@ static AOM_INLINE void search_switchable(const RestorationTileLimits *limits,
}
const int64_t coeff_bits = coeff_pcost << AV1_PROB_COST_SHIFT;
const int64_t bits = x->mode_costs.switchable_restore_cost[r] + coeff_bits;
- double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(x->rdmult, bits >> 4, sse,
- rsc->cm->seq_params.bit_depth);
+ double cost = RDCOST_DBL_WITH_NATIVE_BD_DIST(
+ x->rdmult, bits >> 4, sse, rsc->cm->seq_params->bit_depth);
if (r == RESTORE_SGRPROJ && rusi->sgrproj.ep < 10)
cost *= (1 + DUAL_SGR_PENALTY_MULT * rsc->lpf_sf->dual_sgr_penalty_level);
if (r == 0 || cost < best_cost) {
@@ -1694,7 +1694,7 @@ static double search_rest_type(RestSearchCtxt *rsc, RestorationType rtype) {
av1_foreach_rest_unit_in_plane(rsc->cm, rsc->plane, funs[rtype], rsc,
&rsc->tile_rect, rsc->cm->rst_tmpbuf, NULL);
return RDCOST_DBL_WITH_NATIVE_BD_DIST(
- rsc->x->rdmult, rsc->bits >> 4, rsc->sse, rsc->cm->seq_params.bit_depth);
+ rsc->x->rdmult, rsc->bits >> 4, rsc->sse, rsc->cm->seq_params->bit_depth);
}
static int rest_tiles_in_plane(const AV1_COMMON *cm, int plane) {
@@ -1740,7 +1740,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
double best_cost = 0;
RestorationType best_rtype = RESTORE_NONE;
- const int highbd = rsc.cm->seq_params.use_highbitdepth;
+ const int highbd = rsc.cm->seq_params->use_highbitdepth;
if (!cpi->sf.lpf_sf.disable_loop_restoration_chroma || !plane) {
av1_extend_frame(rsc.dgd_buffer, rsc.plane_width, rsc.plane_height,
rsc.dgd_stride, RESTORATION_BORDER, RESTORATION_BORDER,
diff --git a/third_party/libaom/source/libaom/av1/encoder/ratectrl.c b/third_party/libaom/source/libaom/av1/encoder/ratectrl.c
index 33befa6147..c24c822b9b 100644
--- a/third_party/libaom/source/libaom/av1/encoder/ratectrl.c
+++ b/third_party/libaom/source/libaom/av1/encoder/ratectrl.c
@@ -233,11 +233,12 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers);
LAYER_CONTEXT *lc = &svc->layer_context[layer];
RATE_CONTROL *lrc = &lc->rc;
+ PRIMARY_RATE_CONTROL *lp_rc = &lc->p_rc;
lrc->bits_off_target +=
(int)(lc->target_bandwidth / lc->framerate) - encoded_frame_size;
// Clip buffer level to maximum buffer size for the layer.
lrc->bits_off_target =
- AOMMIN(lrc->bits_off_target, lrc->maximum_buffer_size);
+ AOMMIN(lrc->bits_off_target, lp_rc->maximum_buffer_size);
lrc->buffer_level = lrc->bits_off_target;
}
}
@@ -245,6 +246,7 @@ static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) {
static void update_buffer_level(AV1_COMP *cpi, int encoded_frame_size) {
const AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
// Non-viewable frames are a special case and are treated as pure overhead.
if (!cm->show_frame)
@@ -253,10 +255,11 @@ static void update_buffer_level(AV1_COMP *cpi, int encoded_frame_size) {
rc->bits_off_target += rc->avg_frame_bandwidth - encoded_frame_size;
// Clip the buffer level to the maximum specified buffer size.
- rc->bits_off_target = AOMMIN(rc->bits_off_target, rc->maximum_buffer_size);
+ rc->bits_off_target = AOMMIN(rc->bits_off_target, p_rc->maximum_buffer_size);
rc->buffer_level = rc->bits_off_target;
- if (cpi->use_svc) update_layer_buffer_level(&cpi->svc, encoded_frame_size);
+ if (cpi->ppi->use_svc)
+ update_layer_buffer_level(&cpi->svc, encoded_frame_size);
}
int av1_rc_get_default_min_gf_interval(int width, int height,
@@ -285,7 +288,24 @@ int av1_rc_get_default_max_gf_interval(double framerate, int min_gf_interval) {
return AOMMAX(interval, min_gf_interval);
}
-void av1_rc_init(const AV1EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
+void av1_primary_rc_init(const AV1EncoderConfig *oxcf,
+ PRIMARY_RATE_CONTROL *p_rc) {
+ int min_gf_interval = oxcf->gf_cfg.min_gf_interval;
+ int max_gf_interval = oxcf->gf_cfg.max_gf_interval;
+ if (min_gf_interval == 0)
+ min_gf_interval = av1_rc_get_default_min_gf_interval(
+ oxcf->frm_dim_cfg.width, oxcf->frm_dim_cfg.height,
+ oxcf->input_cfg.init_framerate);
+ if (max_gf_interval == 0)
+ max_gf_interval = av1_rc_get_default_max_gf_interval(
+ oxcf->input_cfg.init_framerate, min_gf_interval);
+ p_rc->baseline_gf_interval = (min_gf_interval + max_gf_interval) / 2;
+ p_rc->this_key_frame_forced = 0;
+ p_rc->next_key_frame_forced = 0;
+}
+
+void av1_rc_init(const AV1EncoderConfig *oxcf, int pass, RATE_CONTROL *rc,
+ const PRIMARY_RATE_CONTROL *const p_rc) {
const RateControlCfg *const rc_cfg = &oxcf->rc_cfg;
int i;
@@ -302,8 +322,8 @@ void av1_rc_init(const AV1EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->last_q[KEY_FRAME] = rc_cfg->best_allowed_q;
rc->last_q[INTER_FRAME] = rc_cfg->worst_allowed_q;
- rc->buffer_level = rc->starting_buffer_level;
- rc->bits_off_target = rc->starting_buffer_level;
+ rc->buffer_level = p_rc->starting_buffer_level;
+ rc->bits_off_target = p_rc->starting_buffer_level;
rc->rolling_target_bits = rc->avg_frame_bandwidth;
rc->rolling_actual_bits = rc->avg_frame_bandwidth;
@@ -312,8 +332,6 @@ void av1_rc_init(const AV1EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
rc->total_target_bits = 0;
rc->frames_since_key = 8; // Sensible default for first frame.
- rc->this_key_frame_forced = 0;
- rc->next_key_frame_forced = 0;
rc->frames_till_gf_update_due = 0;
rc->ni_av_qi = rc_cfg->worst_allowed_q;
@@ -337,7 +355,6 @@ void av1_rc_init(const AV1EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
if (rc->max_gf_interval == 0)
rc->max_gf_interval = av1_rc_get_default_max_gf_interval(
oxcf->input_cfg.init_framerate, rc->min_gf_interval);
- rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2;
rc->avg_frame_low_motion = 0;
rc->resize_state = ORIG;
@@ -349,6 +366,7 @@ void av1_rc_init(const AV1EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) {
int av1_rc_drop_frame(AV1_COMP *cpi) {
const AV1EncoderConfig *oxcf = &cpi->oxcf;
RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
if (!oxcf->rc_cfg.drop_frames_water_mark) {
return 0;
@@ -360,7 +378,7 @@ int av1_rc_drop_frame(AV1_COMP *cpi) {
// If buffer is below drop_mark, for now just drop every other frame
// (starting with the next frame) until it increases back over drop_mark.
int drop_mark = (int)(oxcf->rc_cfg.drop_frames_water_mark *
- rc->optimal_buffer_level / 100);
+ p_rc->optimal_buffer_level / 100);
if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) {
--rc->decimation_factor;
} else if (rc->buffer_level <= drop_mark && rc->decimation_factor == 0) {
@@ -384,6 +402,7 @@ int av1_rc_drop_frame(AV1_COMP *cpi) {
static int adjust_q_cbr(const AV1_COMP *cpi, int q, int active_worst_quality) {
const RATE_CONTROL *const rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const AV1_COMMON *const cm = &cpi->common;
const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
const int max_delta = 16;
@@ -397,7 +416,7 @@ static int adjust_q_cbr(const AV1_COMP *cpi, int q, int active_worst_quality) {
(cm->width != cm->prev_frame->width ||
cm->height != cm->prev_frame->height || change_avg_frame_bandwidth);
// Apply some control/clamp to QP under certain conditions.
- if (cm->current_frame.frame_type != KEY_FRAME && !cpi->use_svc &&
+ if (cm->current_frame.frame_type != KEY_FRAME && !cpi->ppi->use_svc &&
rc->frames_since_key > 1 && !change_target_bits_mb &&
(!cpi->oxcf.rc_cfg.gf_cbr_boost_pct ||
!(refresh_frame_flags->alt_ref_frame ||
@@ -411,7 +430,7 @@ static int adjust_q_cbr(const AV1_COMP *cpi, int q, int active_worst_quality) {
// Adjust Q base on source content change from scene detection.
if (cpi->sf.rt_sf.check_scene_detection && rc->prev_avg_source_sad > 0 &&
rc->frames_since_key > 10) {
- const int bit_depth = cm->seq_params.bit_depth;
+ const int bit_depth = cm->seq_params->bit_depth;
double delta =
(double)rc->avg_source_sad / (double)rc->prev_avg_source_sad - 1.0;
// Push Q downwards if content change is decreasing and buffer level
@@ -419,14 +438,14 @@ static int adjust_q_cbr(const AV1_COMP *cpi, int q, int active_worst_quality) {
// only for high Q to avoid excess overshoot.
// Else reduce decrease in Q from previous frame if content change is
// increasing and buffer is below max (so not undershooting).
- if (delta < 0.0 && rc->buffer_level > (rc->optimal_buffer_level >> 2) &&
+ if (delta < 0.0 && rc->buffer_level > (p_rc->optimal_buffer_level >> 2) &&
q > (rc->worst_quality >> 1)) {
double q_adj_factor = 1.0 + 0.5 * tanh(4.0 * delta);
double q_val = av1_convert_qindex_to_q(q, bit_depth);
q += av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth);
} else if (rc->q_1_frame - q > 0 && delta > 0.1 &&
- rc->buffer_level < AOMMIN(rc->maximum_buffer_size,
- rc->optimal_buffer_level << 1)) {
+ rc->buffer_level < AOMMIN(p_rc->maximum_buffer_size,
+ p_rc->optimal_buffer_level << 1)) {
q = (3 * q + rc->q_1_frame) >> 2;
}
}
@@ -452,8 +471,9 @@ static const RATE_FACTOR_LEVEL rate_factor_levels[FRAME_UPDATE_TYPES] = {
GF_ARF_LOW, // INTNL_ARF_UPDATE
};
-static RATE_FACTOR_LEVEL get_rate_factor_level(const GF_GROUP *const gf_group) {
- const FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_group->index];
+static RATE_FACTOR_LEVEL get_rate_factor_level(const GF_GROUP *const gf_group,
+ int gf_frame_index) {
+ const FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_frame_index];
assert(update_type < FRAME_UPDATE_TYPES);
return rate_factor_levels[update_type];
}
@@ -480,12 +500,13 @@ static double get_rate_correction_factor(const AV1_COMP *cpi, int width,
if (cpi->common.current_frame.frame_type == KEY_FRAME) {
rcf = rc->rate_correction_factors[KF_STD];
} else if (is_stat_consumption_stage(cpi)) {
- const RATE_FACTOR_LEVEL rf_lvl = get_rate_factor_level(&cpi->gf_group);
+ const RATE_FACTOR_LEVEL rf_lvl =
+ get_rate_factor_level(&cpi->ppi->gf_group, cpi->gf_frame_index);
rcf = rc->rate_correction_factors[rf_lvl];
} else {
if ((refresh_frame_flags->alt_ref_frame ||
refresh_frame_flags->golden_frame) &&
- !rc->is_src_frame_alt_ref && !cpi->use_svc &&
+ !rc->is_src_frame_alt_ref && !cpi->ppi->use_svc &&
(cpi->oxcf.rc_cfg.mode != AOM_CBR ||
cpi->oxcf.rc_cfg.gf_cbr_boost_pct > 20))
rcf = rc->rate_correction_factors[GF_ARF_STD];
@@ -524,12 +545,13 @@ static void set_rate_correction_factor(AV1_COMP *cpi, double factor, int width,
if (cpi->common.current_frame.frame_type == KEY_FRAME) {
rc->rate_correction_factors[KF_STD] = factor;
} else if (is_stat_consumption_stage(cpi)) {
- const RATE_FACTOR_LEVEL rf_lvl = get_rate_factor_level(&cpi->gf_group);
+ const RATE_FACTOR_LEVEL rf_lvl =
+ get_rate_factor_level(&cpi->ppi->gf_group, cpi->gf_frame_index);
rc->rate_correction_factors[rf_lvl] = factor;
} else {
if ((refresh_frame_flags->alt_ref_frame ||
refresh_frame_flags->golden_frame) &&
- !rc->is_src_frame_alt_ref && !cpi->use_svc &&
+ !rc->is_src_frame_alt_ref && !cpi->ppi->use_svc &&
(cpi->oxcf.rc_cfg.mode != AOM_CBR ||
cpi->oxcf.rc_cfg.gf_cbr_boost_pct > 20))
rc->rate_correction_factors[GF_ARF_STD] = factor;
@@ -564,7 +586,7 @@ void av1_rc_update_rate_correction_factors(AV1_COMP *cpi, int width,
} else {
projected_size_based_on_q = av1_estimate_bits_at_q(
cm->current_frame.frame_type, cm->quant_params.base_qindex, MBs,
- rate_correction_factor, cm->seq_params.bit_depth,
+ rate_correction_factor, cm->seq_params->bit_depth,
cpi->is_screen_content_type);
}
// Work out a size correction factor.
@@ -620,7 +642,7 @@ static int get_bits_per_mb(const AV1_COMP *cpi, int use_cyclic_refresh,
return use_cyclic_refresh
? av1_cyclic_refresh_rc_bits_per_mb(cpi, q, correction_factor)
: av1_rc_bits_per_mb(cm->current_frame.frame_type, q,
- correction_factor, cm->seq_params.bit_depth,
+ correction_factor, cm->seq_params->bit_depth,
cpi->is_screen_content_type);
}
@@ -724,26 +746,31 @@ static int get_active_quality(int q, int gfu_boost, int low, int high,
}
}
-static int get_kf_active_quality(const RATE_CONTROL *const rc, int q,
+static int get_kf_active_quality(const PRIMARY_RATE_CONTROL *const p_rc, int q,
aom_bit_depth_t bit_depth) {
int *kf_low_motion_minq;
int *kf_high_motion_minq;
ASSIGN_MINQ_TABLE(bit_depth, kf_low_motion_minq);
ASSIGN_MINQ_TABLE(bit_depth, kf_high_motion_minq);
- return get_active_quality(q, rc->kf_boost, kf_low, kf_high,
+ return get_active_quality(q, p_rc->kf_boost, kf_low, kf_high,
kf_low_motion_minq, kf_high_motion_minq);
}
-static int get_gf_active_quality(const RATE_CONTROL *const rc, int q,
- aom_bit_depth_t bit_depth) {
+static int get_gf_active_quality_no_rc(int gfu_boost, int q,
+ aom_bit_depth_t bit_depth) {
int *arfgf_low_motion_minq;
int *arfgf_high_motion_minq;
ASSIGN_MINQ_TABLE(bit_depth, arfgf_low_motion_minq);
ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
- return get_active_quality(q, rc->gfu_boost, gf_low, gf_high,
+ return get_active_quality(q, gfu_boost, gf_low, gf_high,
arfgf_low_motion_minq, arfgf_high_motion_minq);
}
+static int get_gf_active_quality(const PRIMARY_RATE_CONTROL *const p_rc, int q,
+ aom_bit_depth_t bit_depth) {
+ return get_gf_active_quality_no_rc(p_rc->gfu_boost, q, bit_depth);
+}
+
static int get_gf_high_motion_quality(int q, aom_bit_depth_t bit_depth) {
int *arfgf_high_motion_minq;
ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq);
@@ -782,8 +809,9 @@ static int calc_active_worst_quality_no_stats_cbr(const AV1_COMP *cpi) {
// (at buffer = critical level).
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *p_rc = &cpi->ppi->p_rc;
// Buffer level below which we push active_worst to worst_quality.
- int64_t critical_level = rc->optimal_buffer_level >> 3;
+ int64_t critical_level = p_rc->optimal_buffer_level >> 3;
int64_t buff_lvl_step = 0;
int adjustment = 0;
int active_worst_quality;
@@ -799,25 +827,26 @@ static int calc_active_worst_quality_no_stats_cbr(const AV1_COMP *cpi) {
rc->avg_frame_qindex[KEY_FRAME])
: rc->avg_frame_qindex[INTER_FRAME];
active_worst_quality = AOMMIN(rc->worst_quality, ambient_qp * 5 / 4);
- if (rc->buffer_level > rc->optimal_buffer_level) {
+ if (rc->buffer_level > p_rc->optimal_buffer_level) {
// Adjust down.
// Maximum limit for down adjustment, ~30%.
int max_adjustment_down = active_worst_quality / 3;
if (max_adjustment_down) {
- buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) /
- max_adjustment_down);
+ buff_lvl_step =
+ ((p_rc->maximum_buffer_size - p_rc->optimal_buffer_level) /
+ max_adjustment_down);
if (buff_lvl_step)
- adjustment = (int)((rc->buffer_level - rc->optimal_buffer_level) /
+ adjustment = (int)((rc->buffer_level - p_rc->optimal_buffer_level) /
buff_lvl_step);
active_worst_quality -= adjustment;
}
} else if (rc->buffer_level > critical_level) {
// Adjust up from ambient Q.
if (critical_level) {
- buff_lvl_step = (rc->optimal_buffer_level - critical_level);
+ buff_lvl_step = (p_rc->optimal_buffer_level - critical_level);
if (buff_lvl_step) {
adjustment = (int)((rc->worst_quality - ambient_qp) *
- (rc->optimal_buffer_level - rc->buffer_level) /
+ (p_rc->optimal_buffer_level - rc->buffer_level) /
buff_lvl_step);
}
active_worst_quality = ambient_qp + adjustment;
@@ -835,10 +864,11 @@ static int calc_active_best_quality_no_stats_cbr(const AV1_COMP *cpi,
int width, int height) {
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
const CurrentFrame *const current_frame = &cm->current_frame;
int *rtc_minq;
- const int bit_depth = cm->seq_params.bit_depth;
+ const int bit_depth = cm->seq_params->bit_depth;
int active_best_quality = rc->best_quality;
ASSIGN_MINQ_TABLE(bit_depth, rtc_minq);
@@ -846,7 +876,7 @@ static int calc_active_best_quality_no_stats_cbr(const AV1_COMP *cpi,
// Handle the special case for key frames forced when we have reached
// the maximum key frame interval. Here force the Q to a range
// based on the ambient Q to reduce the risk of popping.
- if (rc->this_key_frame_forced) {
+ if (p_rc->this_key_frame_forced) {
int qindex = rc->last_boosted_qindex;
double last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
int delta_qindex = av1_compute_qdelta(rc, last_boosted_q,
@@ -856,8 +886,8 @@ static int calc_active_best_quality_no_stats_cbr(const AV1_COMP *cpi,
// not first frame of one pass and kf_boost is set
double q_adj_factor = 1.0;
double q_val;
- active_best_quality =
- get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME], bit_depth);
+ active_best_quality = get_kf_active_quality(
+ p_rc, rc->avg_frame_qindex[KEY_FRAME], bit_depth);
// Allow somewhat lower kf minq with small image formats.
if ((width * height) <= (352 * 288)) {
q_adj_factor -= 0.25;
@@ -868,7 +898,7 @@ static int calc_active_best_quality_no_stats_cbr(const AV1_COMP *cpi,
active_best_quality +=
av1_compute_qdelta(rc, q_val, q_val * q_adj_factor, bit_depth);
}
- } else if (!rc->is_src_frame_alt_ref && !cpi->use_svc &&
+ } else if (!rc->is_src_frame_alt_ref && !cpi->ppi->use_svc &&
cpi->oxcf.rc_cfg.gf_cbr_boost_pct &&
(refresh_frame_flags->golden_frame ||
refresh_frame_flags->alt_ref_frame)) {
@@ -880,7 +910,7 @@ static int calc_active_best_quality_no_stats_cbr(const AV1_COMP *cpi,
rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
q = rc->avg_frame_qindex[INTER_FRAME];
}
- active_best_quality = get_gf_active_quality(rc, q, bit_depth);
+ active_best_quality = get_gf_active_quality(p_rc, q, bit_depth);
} else {
// Use the lower of active_worst_quality and recent/average Q.
FRAME_TYPE frame_type =
@@ -913,9 +943,10 @@ static int rc_pick_q_and_bounds_no_stats_cbr(const AV1_COMP *cpi, int width,
int *top_index) {
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const CurrentFrame *const current_frame = &cm->current_frame;
int q;
- const int bit_depth = cm->seq_params.bit_depth;
+ const int bit_depth = cm->seq_params->bit_depth;
int active_worst_quality = calc_active_worst_quality_no_stats_cbr(cpi);
int active_best_quality = calc_active_best_quality_no_stats_cbr(
cpi, active_worst_quality, width, height);
@@ -932,7 +963,7 @@ static int rc_pick_q_and_bounds_no_stats_cbr(const AV1_COMP *cpi, int width,
*bottom_index = active_best_quality;
// Limit Q range for the adaptive loop.
- if (current_frame->frame_type == KEY_FRAME && !rc->this_key_frame_forced &&
+ if (current_frame->frame_type == KEY_FRAME && !p_rc->this_key_frame_forced &&
current_frame->frame_number != 0) {
int qdelta = 0;
aom_clear_system_state();
@@ -944,7 +975,7 @@ static int rc_pick_q_and_bounds_no_stats_cbr(const AV1_COMP *cpi, int width,
}
// Special case code to try and match quality with forced key frames
- if (current_frame->frame_type == KEY_FRAME && rc->this_key_frame_forced) {
+ if (current_frame->frame_type == KEY_FRAME && p_rc->this_key_frame_forced) {
q = rc->last_boosted_qindex;
} else {
q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
@@ -1018,7 +1049,7 @@ static int get_active_cq_level(const RATE_CONTROL *rc,
* \c oxcf->cq_level, or slightly modified for some
* special cases)
* \param[in] bit_depth Bit depth of the codec (same as
- * \c cm->seq_params.bit_depth)
+ * \c cm->seq_params->bit_depth)
* \return Returns selected q index to be used for encoding this frame.
*/
static int get_q_using_fixed_offsets(const AV1EncoderConfig *const oxcf,
@@ -1037,13 +1068,16 @@ static int get_q_using_fixed_offsets(const AV1EncoderConfig *const oxcf,
return cq_level;
}
offset_idx = 0;
- } else if (update_type == ARF_UPDATE || update_type == GF_UPDATE) {
- offset_idx = 1;
- } else if (update_type == INTNL_ARF_UPDATE) {
- offset_idx =
- AOMMIN(gf_group->layer_depth[gf_index], FIXED_QP_OFFSET_COUNT - 1);
- } else { // Leaf level / overlay frame.
- assert(update_type == LF_UPDATE || update_type == OVERLAY_UPDATE ||
+ } else if (update_type == ARF_UPDATE || update_type == GF_UPDATE ||
+ update_type == INTNL_ARF_UPDATE || update_type == LF_UPDATE) {
+ if (gf_group->layer_depth[gf_index] >=
+ gf_group->max_layer_depth_allowed + 1) { // Leaf.
+ return cq_level; // Directly Return worst quality allowed.
+ }
+ offset_idx = AOMMIN(gf_group->layer_depth[gf_index],
+ gf_group->max_layer_depth_allowed);
+ } else { // Overlay frame.
+ assert(update_type == OVERLAY_UPDATE ||
update_type == INTNL_OVERLAY_UPDATE);
return cq_level; // Directly Return worst quality allowed.
}
@@ -1081,10 +1115,11 @@ static int rc_pick_q_and_bounds_no_stats(const AV1_COMP *cpi, int width,
int *bottom_index, int *top_index) {
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const CurrentFrame *const current_frame = &cm->current_frame;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const enum aom_rc_mode rc_mode = oxcf->rc_cfg.mode;
assert(has_no_stats_stage(cpi));
@@ -1097,7 +1132,7 @@ static int rc_pick_q_and_bounds_no_stats(const AV1_COMP *cpi, int width,
const int cq_level =
get_active_cq_level(rc, oxcf, frame_is_intra_only(cm), cpi->superres_mode,
cm->superres_scale_denominator);
- const int bit_depth = cm->seq_params.bit_depth;
+ const int bit_depth = cm->seq_params->bit_depth;
if (oxcf->q_cfg.use_fixed_qp_offsets) {
return get_q_using_fixed_offsets(oxcf, rc, gf_group, gf_index, cq_level,
@@ -1117,7 +1152,7 @@ static int rc_pick_q_and_bounds_no_stats(const AV1_COMP *cpi, int width,
const int delta_qindex =
av1_compute_qdelta(rc, q_val, q_val * 0.25, bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
- } else if (rc->this_key_frame_forced) {
+ } else if (p_rc->this_key_frame_forced) {
const int qindex = rc->last_boosted_qindex;
const double last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
const int delta_qindex = av1_compute_qdelta(
@@ -1126,8 +1161,8 @@ static int rc_pick_q_and_bounds_no_stats(const AV1_COMP *cpi, int width,
} else { // not first frame of one pass and kf_boost is set
double q_adj_factor = 1.0;
- active_best_quality =
- get_kf_active_quality(rc, rc->avg_frame_qindex[KEY_FRAME], bit_depth);
+ active_best_quality = get_kf_active_quality(
+ p_rc, rc->avg_frame_qindex[KEY_FRAME], bit_depth);
// Allow somewhat lower kf minq with small image formats.
if ((width * height) <= (352 * 288)) {
@@ -1148,14 +1183,29 @@ static int rc_pick_q_and_bounds_no_stats(const AV1_COMP *cpi, int width,
// Use the lower of active_worst_quality and recent
// average Q as basis for GF/ARF best Q limit unless last frame was
// a key frame.
+ int avg_frame_qindex_inter_frame;
+ int avg_frame_qindex_key_frame;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ avg_frame_qindex_inter_frame =
+ (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
+ ? cpi->ppi->temp_avg_frame_qindex[INTER_FRAME]
+ : cpi->rc.avg_frame_qindex[INTER_FRAME];
+ avg_frame_qindex_key_frame =
+ (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
+ ? cpi->ppi->temp_avg_frame_qindex[KEY_FRAME]
+ : cpi->rc.avg_frame_qindex[KEY_FRAME];
+#else
+ avg_frame_qindex_inter_frame = rc->avg_frame_qindex[INTER_FRAME];
+ avg_frame_qindex_key_frame = rc->avg_frame_qindex[KEY_FRAME];
+#endif
q = (rc->frames_since_key > 1 &&
- rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality)
- ? rc->avg_frame_qindex[INTER_FRAME]
- : rc->avg_frame_qindex[KEY_FRAME];
+ avg_frame_qindex_inter_frame < active_worst_quality)
+ ? avg_frame_qindex_inter_frame
+ : avg_frame_qindex_key_frame;
// For constrained quality dont allow Q less than the cq level
if (rc_mode == AOM_CQ) {
if (q < cq_level) q = cq_level;
- active_best_quality = get_gf_active_quality(rc, q, bit_depth);
+ active_best_quality = get_gf_active_quality(p_rc, q, bit_depth);
// Constrained quality use slightly lower active best.
active_best_quality = active_best_quality * 15 / 16;
} else if (rc_mode == AOM_Q) {
@@ -1167,7 +1217,7 @@ static int rc_pick_q_and_bounds_no_stats(const AV1_COMP *cpi, int width,
: av1_compute_qdelta(rc, q_val, q_val * 0.50, bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
} else {
- active_best_quality = get_gf_active_quality(rc, q, bit_depth);
+ active_best_quality = get_gf_active_quality(p_rc, q, bit_depth);
}
} else {
if (rc_mode == AOM_Q) {
@@ -1206,8 +1256,8 @@ static int rc_pick_q_and_bounds_no_stats(const AV1_COMP *cpi, int width,
{
int qdelta = 0;
aom_clear_system_state();
- if (current_frame->frame_type == KEY_FRAME && !rc->this_key_frame_forced &&
- current_frame->frame_number != 0) {
+ if (current_frame->frame_type == KEY_FRAME &&
+ !p_rc->this_key_frame_forced && current_frame->frame_number != 0) {
qdelta = av1_compute_qdelta_by_rate(
&cpi->rc, current_frame->frame_type, active_worst_quality, 2.0,
cpi->is_screen_content_type, bit_depth);
@@ -1226,7 +1276,7 @@ static int rc_pick_q_and_bounds_no_stats(const AV1_COMP *cpi, int width,
q = active_best_quality;
// Special case code to try and match quality with forced key frames
} else if ((current_frame->frame_type == KEY_FRAME) &&
- rc->this_key_frame_forced) {
+ p_rc->this_key_frame_forced) {
q = rc->last_boosted_qindex;
} else {
q = av1_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality,
@@ -1251,16 +1301,17 @@ static const double arf_layer_deltas[MAX_ARF_LAYERS + 1] = { 2.50, 2.00, 1.75,
1.50, 1.25, 1.15,
1.0 };
int av1_frame_type_qdelta(const AV1_COMP *cpi, int q) {
- const GF_GROUP *const gf_group = &cpi->gf_group;
- const RATE_FACTOR_LEVEL rf_lvl = get_rate_factor_level(gf_group);
- const FRAME_TYPE frame_type = gf_group->frame_type[gf_group->index];
- const int arf_layer = AOMMIN(gf_group->layer_depth[gf_group->index], 6);
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ const RATE_FACTOR_LEVEL rf_lvl =
+ get_rate_factor_level(gf_group, cpi->gf_frame_index);
+ const FRAME_TYPE frame_type = gf_group->frame_type[cpi->gf_frame_index];
+ const int arf_layer = AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
const double rate_factor =
(rf_lvl == INTER_NORMAL) ? 1.0 : arf_layer_deltas[arf_layer];
return av1_compute_qdelta_by_rate(&cpi->rc, frame_type, q, rate_factor,
cpi->is_screen_content_type,
- cpi->common.seq_params.bit_depth);
+ cpi->common.seq_params->bit_depth);
}
// This unrestricted Q selection on CQ mode is useful when testing new features,
@@ -1275,7 +1326,7 @@ static int rc_pick_q_and_bounds_no_stats_cq(const AV1_COMP *cpi, int width,
const int cq_level =
get_active_cq_level(rc, oxcf, frame_is_intra_only(cm), cpi->superres_mode,
cm->superres_scale_denominator);
- const int bit_depth = cm->seq_params.bit_depth;
+ const int bit_depth = cm->seq_params->bit_depth;
const int q = (int)av1_convert_qindex_to_q(cq_level, bit_depth);
(void)width;
(void)height;
@@ -1295,10 +1346,11 @@ static void get_intra_q_and_bounds(const AV1_COMP *cpi, int width, int height,
int cq_level, int is_fwd_kf) {
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
int active_best_quality;
int active_worst_quality = *active_worst;
- const int bit_depth = cm->seq_params.bit_depth;
+ const int bit_depth = cm->seq_params->bit_depth;
if (rc->frames_to_key <= 1 && oxcf->rc_cfg.mode == AOM_Q) {
// If the next frame is also a key frame or the current frame is the
@@ -1315,7 +1367,7 @@ static void get_intra_q_and_bounds(const AV1_COMP *cpi, int width, int height,
const int delta_qindex = av1_compute_qdelta(
rc, last_boosted_q, last_boosted_q * 0.25, bit_depth);
active_best_quality = AOMMAX(qindex + delta_qindex, rc->best_quality);
- } else if (rc->this_key_frame_forced) {
+ } else if (p_rc->this_key_frame_forced) {
// Handle the special case for key frames forced when we have reached
// the maximum key frame interval. Here force the Q to a range
// based on the ambient Q to reduce the risk of popping.
@@ -1324,8 +1376,8 @@ static void get_intra_q_and_bounds(const AV1_COMP *cpi, int width, int height,
int qindex;
if (is_stat_consumption_stage_twopass(cpi) &&
- cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
- qindex = AOMMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
+ cpi->ppi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
+ qindex = AOMMIN(p_rc->last_kf_qindex, rc->last_boosted_qindex);
active_best_quality = qindex;
last_boosted_q = av1_convert_qindex_to_q(qindex, bit_depth);
delta_qindex = av1_compute_qdelta(rc, last_boosted_q,
@@ -1346,13 +1398,13 @@ static void get_intra_q_and_bounds(const AV1_COMP *cpi, int width, int height,
// Baseline value derived from cpi->active_worst_quality and kf boost.
active_best_quality =
- get_kf_active_quality(rc, active_worst_quality, bit_depth);
+ get_kf_active_quality(p_rc, active_worst_quality, bit_depth);
if (cpi->is_screen_content_type) {
active_best_quality /= 2;
}
if (is_stat_consumption_stage_twopass(cpi) &&
- cpi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) {
+ cpi->ppi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) {
active_best_quality /= 3;
}
@@ -1363,7 +1415,8 @@ static void get_intra_q_and_bounds(const AV1_COMP *cpi, int width, int height,
// Make a further adjustment based on the kf zero motion measure.
if (is_stat_consumption_stage_twopass(cpi))
- q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct);
+ q_adj_factor +=
+ 0.05 - (0.001 * (double)cpi->ppi->twopass.kf_zeromotion_pct);
// Convert the adjustment factor to a qindex delta
// on active_best_quality.
@@ -1394,8 +1447,9 @@ static void adjust_active_best_and_worst_quality(const AV1_COMP *cpi,
int *active_best) {
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
- const int bit_depth = cpi->common.seq_params.bit_depth;
+ const int bit_depth = cpi->common.seq_params->bit_depth;
int active_best_quality = *active_best;
int active_worst_quality = *active_worst;
// Extension to max or min Q if undershoot or overshoot is outside
@@ -1406,20 +1460,21 @@ static void adjust_active_best_and_worst_quality(const AV1_COMP *cpi,
(refresh_frame_flags->golden_frame || is_intrl_arf_boost ||
refresh_frame_flags->alt_ref_frame))) {
active_best_quality -=
- (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast);
- active_worst_quality += (cpi->twopass.extend_maxq / 2);
+ (cpi->ppi->twopass.extend_minq + cpi->ppi->twopass.extend_minq_fast);
+ active_worst_quality += (cpi->ppi->twopass.extend_maxq / 2);
} else {
active_best_quality -=
- (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2;
- active_worst_quality += cpi->twopass.extend_maxq;
+ (cpi->ppi->twopass.extend_minq + cpi->ppi->twopass.extend_minq_fast) /
+ 2;
+ active_worst_quality += cpi->ppi->twopass.extend_maxq;
}
}
aom_clear_system_state();
#ifndef STRICT_RC
// Static forced key frames Q restrictions dealt with elsewhere.
- if (!(frame_is_intra_only(cm)) || !rc->this_key_frame_forced ||
- (cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) {
+ if (!(frame_is_intra_only(cm)) || !p_rc->this_key_frame_forced ||
+ (cpi->ppi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH)) {
const int qdelta = av1_frame_type_qdelta(cpi, active_worst_quality);
active_worst_quality =
AOMMAX(active_worst_quality + qdelta, active_best_quality);
@@ -1464,18 +1519,19 @@ static int get_q(const AV1_COMP *cpi, const int width, const int height,
const int active_best_quality) {
const AV1_COMMON *const cm = &cpi->common;
const RATE_CONTROL *const rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
int q;
if (cpi->oxcf.rc_cfg.mode == AOM_Q ||
- (frame_is_intra_only(cm) && !rc->this_key_frame_forced &&
- cpi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH &&
+ (frame_is_intra_only(cm) && !p_rc->this_key_frame_forced &&
+ cpi->ppi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH &&
rc->frames_to_key > 1)) {
q = active_best_quality;
// Special case code to try and match quality with forced key frames.
- } else if (frame_is_intra_only(cm) && rc->this_key_frame_forced) {
+ } else if (frame_is_intra_only(cm) && p_rc->this_key_frame_forced) {
// If static since last kf use better of last boosted and last kf q.
- if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
- q = AOMMIN(rc->last_kf_qindex, rc->last_boosted_qindex);
+ if (cpi->ppi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) {
+ q = AOMMIN(p_rc->last_kf_qindex, rc->last_boosted_qindex);
} else {
q = AOMMIN(rc->last_boosted_qindex,
(active_best_quality + active_worst_quality) / 2);
@@ -1504,20 +1560,29 @@ static int get_active_best_quality(const AV1_COMP *const cpi,
const int active_worst_quality,
const int cq_level, const int gf_index) {
const AV1_COMMON *const cm = &cpi->common;
- const int bit_depth = cm->seq_params.bit_depth;
+ const int bit_depth = cm->seq_params->bit_depth;
const RATE_CONTROL *const rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
- const GF_GROUP *gf_group = &cpi->gf_group;
+ const GF_GROUP *gf_group = &cpi->ppi->gf_group;
const enum aom_rc_mode rc_mode = oxcf->rc_cfg.mode;
int *inter_minq;
+ int avg_frame_qindex_inter_frame;
ASSIGN_MINQ_TABLE(bit_depth, inter_minq);
int active_best_quality = 0;
const int is_intrl_arf_boost =
gf_group->update_type[gf_index] == INTNL_ARF_UPDATE;
- const int is_leaf_frame =
- !(refresh_frame_flags->golden_frame ||
- refresh_frame_flags->alt_ref_frame || is_intrl_arf_boost);
+ int is_leaf_frame =
+ !(gf_group->update_type[gf_index] == ARF_UPDATE ||
+ gf_group->update_type[gf_index] == GF_UPDATE || is_intrl_arf_boost);
+
+ // TODO(jingning): Consider to rework this hack that covers issues incurred
+ // in lightfield setting.
+ if (cm->tiles.large_scale) {
+ is_leaf_frame = !(refresh_frame_flags->golden_frame ||
+ refresh_frame_flags->alt_ref_frame || is_intrl_arf_boost);
+ }
const int is_overlay_frame = rc->is_src_frame_alt_ref;
if (is_leaf_frame || is_overlay_frame) {
@@ -1532,31 +1597,35 @@ static int get_active_best_quality(const AV1_COMP *const cpi,
return active_best_quality;
}
- // TODO(chengchen): can we remove this condition?
- if (rc_mode == AOM_Q && !refresh_frame_flags->alt_ref_frame &&
- !refresh_frame_flags->golden_frame && !is_intrl_arf_boost) {
- return cq_level;
- }
-
// Determine active_best_quality for frames that are not leaf or overlay.
int q = active_worst_quality;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // For quality simulation purpose - for parallel frames use previous
+ // avg_frame_qindex
+ avg_frame_qindex_inter_frame =
+ (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
+ ? cpi->ppi->temp_avg_frame_qindex[INTER_FRAME]
+ : rc->avg_frame_qindex[INTER_FRAME];
+#else
+ avg_frame_qindex_inter_frame = rc->avg_frame_qindex[INTER_FRAME];
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
// Use the lower of active_worst_quality and recent
// average Q as basis for GF/ARF best Q limit unless last frame was
// a key frame.
if (rc->frames_since_key > 1 &&
- rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) {
- q = rc->avg_frame_qindex[INTER_FRAME];
+ avg_frame_qindex_inter_frame < active_worst_quality) {
+ q = avg_frame_qindex_inter_frame;
}
if (rc_mode == AOM_CQ && q < cq_level) q = cq_level;
- active_best_quality = get_gf_active_quality(rc, q, bit_depth);
+ active_best_quality = get_gf_active_quality(p_rc, q, bit_depth);
// Constrained quality use slightly lower active best.
if (rc_mode == AOM_CQ) active_best_quality = active_best_quality * 15 / 16;
const int min_boost = get_gf_high_motion_quality(q, bit_depth);
const int boost = min_boost - active_best_quality;
- active_best_quality = min_boost - (int)(boost * rc->arf_boost_factor);
+ active_best_quality = min_boost - (int)(boost * p_rc->arf_boost_factor);
if (!is_intrl_arf_boost) return active_best_quality;
- if (rc_mode == AOM_Q || rc_mode == AOM_CQ) active_best_quality = rc->arf_q;
+ if (rc_mode == AOM_Q || rc_mode == AOM_CQ) active_best_quality = p_rc->arf_q;
int this_height = gf_group_pyramid_level(gf_group, gf_index);
while (this_height > 1) {
active_best_quality = (active_best_quality + active_worst_quality + 1) / 2;
@@ -1565,6 +1634,87 @@ static int get_active_best_quality(const AV1_COMP *const cpi,
return active_best_quality;
}
+// Returns the q_index for a single frame in the GOP.
+// This function assumes that rc_mode == AOM_Q mode.
+int av1_q_mode_get_q_index(int base_q_index, const GF_GROUP *gf_group,
+ const int gf_index, int arf_q) {
+ const int is_intrl_arf_boost =
+ gf_group->update_type[gf_index] == INTNL_ARF_UPDATE;
+ int is_leaf_or_overlay_frame =
+ gf_group->update_type[gf_index] == LF_UPDATE ||
+ gf_group->update_type[gf_index] == OVERLAY_UPDATE ||
+ gf_group->update_type[gf_index] == INTNL_OVERLAY_UPDATE;
+
+ if (is_leaf_or_overlay_frame) return base_q_index;
+
+ if (!is_intrl_arf_boost) return arf_q;
+
+ int active_best_quality = arf_q;
+ int active_worst_quality = base_q_index;
+ int this_height = gf_group_pyramid_level(gf_group, gf_index);
+ while (this_height > 1) {
+ active_best_quality = (active_best_quality + active_worst_quality + 1) / 2;
+ --this_height;
+ }
+ return active_best_quality;
+}
+
+// Returns the q_index for the ARF in the GOP.
+int av1_get_arf_q_index(int base_q_index, int gfu_boost, int bit_depth,
+ int arf_boost_factor) {
+ int active_best_quality =
+ get_gf_active_quality_no_rc(gfu_boost, base_q_index, bit_depth);
+ const int min_boost = get_gf_high_motion_quality(base_q_index, bit_depth);
+ const int boost = min_boost - active_best_quality;
+ return min_boost - (int)(boost * arf_boost_factor);
+}
+
+static int rc_pick_q_and_bounds_q_mode(const AV1_COMP *cpi, int width,
+ int height, int gf_index,
+ int *bottom_index, int *top_index) {
+ const AV1_COMMON *const cm = &cpi->common;
+ const RATE_CONTROL *const rc = &cpi->rc;
+ const AV1EncoderConfig *const oxcf = &cpi->oxcf;
+ const int cq_level =
+ get_active_cq_level(rc, oxcf, frame_is_intra_only(cm), cpi->superres_mode,
+ cm->superres_scale_denominator);
+ int active_best_quality = 0;
+ int active_worst_quality = rc->active_worst_quality;
+ int q;
+
+ if (frame_is_intra_only(cm)) {
+ const int is_fwd_kf = cm->current_frame.frame_type == KEY_FRAME &&
+ cm->show_frame == 0 && cpi->no_show_fwd_kf;
+ get_intra_q_and_bounds(cpi, width, height, &active_best_quality,
+ &active_worst_quality, cq_level, is_fwd_kf);
+ } else {
+ // Active best quality limited by previous layer.
+ active_best_quality =
+ get_active_best_quality(cpi, active_worst_quality, cq_level, gf_index);
+ }
+
+ *top_index = active_worst_quality;
+ *bottom_index = active_best_quality;
+
+ *top_index = AOMMAX(*top_index, rc->best_quality);
+ *top_index = AOMMIN(*top_index, rc->worst_quality);
+
+ *bottom_index = AOMMAX(*bottom_index, rc->best_quality);
+ *bottom_index = AOMMIN(*bottom_index, rc->worst_quality);
+
+ q = active_best_quality;
+
+ q = AOMMAX(q, rc->best_quality);
+ q = AOMMIN(q, rc->worst_quality);
+
+ assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality);
+ assert(*bottom_index <= rc->worst_quality &&
+ *bottom_index >= rc->best_quality);
+ assert(q <= rc->worst_quality && q >= rc->best_quality);
+
+ return q;
+}
+
/*!\brief Picks q and q bounds given rate control parameters in \c cpi->rc.
*
* Handles the the general cases not covered by
@@ -1587,20 +1737,25 @@ static int rc_pick_q_and_bounds(const AV1_COMP *cpi, int width, int height,
const RATE_CONTROL *const rc = &cpi->rc;
const AV1EncoderConfig *const oxcf = &cpi->oxcf;
const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
- const GF_GROUP *gf_group = &cpi->gf_group;
+ const GF_GROUP *gf_group = &cpi->ppi->gf_group;
assert(IMPLIES(has_no_stats_stage(cpi),
cpi->oxcf.rc_cfg.mode == AOM_Q &&
gf_group->update_type[gf_index] != ARF_UPDATE));
const int cq_level =
get_active_cq_level(rc, oxcf, frame_is_intra_only(cm), cpi->superres_mode,
cm->superres_scale_denominator);
- const int bit_depth = cm->seq_params.bit_depth;
+ const int bit_depth = cm->seq_params->bit_depth;
if (oxcf->q_cfg.use_fixed_qp_offsets) {
- return get_q_using_fixed_offsets(oxcf, rc, gf_group, gf_group->index,
+ return get_q_using_fixed_offsets(oxcf, rc, gf_group, cpi->gf_frame_index,
cq_level, bit_depth);
}
+ if (oxcf->rc_cfg.mode == AOM_Q) {
+ return rc_pick_q_and_bounds_q_mode(cpi, width, height, gf_index,
+ bottom_index, top_index);
+ }
+
int active_best_quality = 0;
int active_worst_quality = rc->active_worst_quality;
int q;
@@ -1620,8 +1775,7 @@ static int rc_pick_q_and_bounds(const AV1_COMP *cpi, int width, int height,
// Active best quality limited by previous layer.
const int pyramid_level = gf_group_pyramid_level(gf_group, gf_index);
- if ((pyramid_level <= 1) || (pyramid_level > MAX_ARF_LAYERS) ||
- (oxcf->rc_cfg.mode == AOM_Q)) {
+ if ((pyramid_level <= 1) || (pyramid_level > MAX_ARF_LAYERS)) {
active_best_quality = get_active_best_quality(cpi, active_worst_quality,
cq_level, gf_index);
} else {
@@ -1668,13 +1822,13 @@ static int rc_pick_q_and_bounds(const AV1_COMP *cpi, int width, int height,
return q;
}
-int av1_rc_pick_q_and_bounds(const AV1_COMP *cpi, RATE_CONTROL *rc, int width,
- int height, int gf_index, int *bottom_index,
- int *top_index) {
+int av1_rc_pick_q_and_bounds(const AV1_COMP *cpi, int width, int height,
+ int gf_index, int *bottom_index, int *top_index) {
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
int q;
// TODO(sarahparker) merge no-stats vbr and altref q computation
// with rc_pick_q_and_bounds().
- const GF_GROUP *gf_group = &cpi->gf_group;
+ const GF_GROUP *gf_group = &cpi->ppi->gf_group;
if ((cpi->oxcf.rc_cfg.mode != AOM_Q ||
gf_group->update_type[gf_index] == ARF_UPDATE) &&
has_no_stats_stage(cpi)) {
@@ -1694,7 +1848,7 @@ int av1_rc_pick_q_and_bounds(const AV1_COMP *cpi, RATE_CONTROL *rc, int width,
q = rc_pick_q_and_bounds(cpi, width, height, gf_index, bottom_index,
top_index);
}
- if (gf_group->update_type[gf_index] == ARF_UPDATE) rc->arf_q = q;
+ if (gf_group->update_type[gf_index] == ARF_UPDATE) p_rc->arf_q = q;
return q;
}
@@ -1756,11 +1910,12 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
const AV1_COMMON *const cm = &cpi->common;
const CurrentFrame *const current_frame = &cm->current_frame;
RATE_CONTROL *const rc = &cpi->rc;
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const RefreshFrameFlagsInfo *const refresh_frame_flags = &cpi->refresh_frame;
const int is_intrnl_arf =
- gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE;
+ gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
const int qindex = cm->quant_params.base_qindex;
@@ -1776,7 +1931,7 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
rc->avg_frame_qindex[KEY_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2);
} else {
- if ((cpi->use_svc && cpi->oxcf.rc_cfg.mode == AOM_CBR) ||
+ if ((cpi->ppi->use_svc && cpi->oxcf.rc_cfg.mode == AOM_CBR) ||
(!rc->is_src_frame_alt_ref &&
!(refresh_frame_flags->golden_frame || is_intrnl_arf ||
refresh_frame_flags->alt_ref_frame))) {
@@ -1784,7 +1939,7 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
rc->avg_frame_qindex[INTER_FRAME] =
ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2);
rc->ni_frames++;
- rc->tot_q += av1_convert_qindex_to_q(qindex, cm->seq_params.bit_depth);
+ rc->tot_q += av1_convert_qindex_to_q(qindex, cm->seq_params->bit_depth);
rc->avg_q = rc->tot_q / rc->ni_frames;
// Calculate the average Q for normal inter frames (not key or GFU
// frames).
@@ -1792,7 +1947,23 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames;
}
}
-
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ /* TODO(FPMT): The current update is happening in cpi->rc.avg_frame_qindex,
+ * this need to be taken care appropriately in final FPMT implementation
+ * to carry these values to subsequent frames. The avg_frame_qindex update
+ * is accumulated across frames, so the values from all individual parallel
+ * frames need to be taken into account after all the parallel frames are
+ * encoded.
+ *
+ * The variable temp_avg_frame_qindex introduced only for quality simulation
+ * purpose, it retains the value previous to the parallel encode frames. The
+ * variable is updated based on the update flag.
+ */
+ if (cpi->do_frame_data_update && !rc->is_src_frame_alt_ref) {
+ for (int index = 0; index < FRAME_TYPES; index++)
+ cpi->ppi->temp_avg_frame_qindex[index] = rc->avg_frame_qindex[index];
+ }
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
// Keep record of last boosted (KF/GF/ARF) Q value.
// If the current frame is coded at a lower Q then we also update it.
// If all mbs in this group are skipped only update if the Q value is
@@ -1800,12 +1971,12 @@ void av1_rc_postencode_update(AV1_COMP *cpi, uint64_t bytes_used) {
// This is used to help set quality in forced key frames to reduce popping
if ((qindex < rc->last_boosted_qindex) ||
(current_frame->frame_type == KEY_FRAME) ||
- (!rc->constrained_gf_group &&
+ (!p_rc->constrained_gf_group &&
(refresh_frame_flags->alt_ref_frame || is_intrnl_arf ||
(refresh_frame_flags->golden_frame && !rc->is_src_frame_alt_ref)))) {
rc->last_boosted_qindex = qindex;
}
- if (current_frame->frame_type == KEY_FRAME) rc->last_kf_qindex = qindex;
+ if (current_frame->frame_type == KEY_FRAME) p_rc->last_kf_qindex = qindex;
update_buffer_level(cpi, rc->projected_frame_size);
rc->prev_avg_frame_bandwidth = rc->avg_frame_bandwidth;
@@ -1853,6 +2024,7 @@ void av1_rc_postencode_update_drop_frame(AV1_COMP *cpi) {
cpi->rc.frames_to_key--;
cpi->rc.rc_2_frame = 0;
cpi->rc.rc_1_frame = 0;
+ cpi->rc.prev_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth;
}
int av1_find_qindex(double desired_q, aom_bit_depth_t bit_depth,
@@ -1954,7 +2126,7 @@ void av1_rc_set_gf_interval_range(const AV1_COMP *const cpi,
* The no.of.stats available in the case of LAP is limited,
* hence setting to max_gf_interval.
*/
- if (cpi->lap_enabled)
+ if (cpi->ppi->lap_enabled)
rc->static_scene_max_gf_interval = rc->max_gf_interval + 1;
else
rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;
@@ -2003,8 +2175,8 @@ static void vbr_rate_correction(AV1_COMP *cpi, int *this_frame_target) {
RATE_CONTROL *const rc = &cpi->rc;
int64_t vbr_bits_off_target = rc->vbr_bits_off_target;
const int stats_count =
- cpi->twopass.stats_buf_ctx->total_stats != NULL
- ? (int)cpi->twopass.stats_buf_ctx->total_stats->count
+ cpi->ppi->twopass.stats_buf_ctx->total_stats != NULL
+ ? (int)cpi->ppi->twopass.stats_buf_ctx->total_stats->count
: 0;
const int frame_window = AOMMIN(
16, (int)(stats_count - (int)cpi->common.current_frame.frame_number));
@@ -2048,16 +2220,17 @@ int av1_calc_pframe_target_size_one_pass_vbr(
const AV1_COMP *const cpi, FRAME_UPDATE_TYPE frame_update_type) {
static const int af_ratio = 10;
const RATE_CONTROL *const rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
int64_t target;
#if USE_ALTREF_FOR_ONE_PASS
if (frame_update_type == KF_UPDATE || frame_update_type == GF_UPDATE ||
frame_update_type == ARF_UPDATE) {
- target = ((int64_t)rc->avg_frame_bandwidth * rc->baseline_gf_interval *
+ target = ((int64_t)rc->avg_frame_bandwidth * p_rc->baseline_gf_interval *
af_ratio) /
- (rc->baseline_gf_interval + af_ratio - 1);
+ (p_rc->baseline_gf_interval + af_ratio - 1);
} else {
- target = ((int64_t)rc->avg_frame_bandwidth * rc->baseline_gf_interval) /
- (rc->baseline_gf_interval + af_ratio - 1);
+ target = ((int64_t)rc->avg_frame_bandwidth * p_rc->baseline_gf_interval) /
+ (p_rc->baseline_gf_interval + af_ratio - 1);
}
if (target > INT_MAX) target = INT_MAX;
#else
@@ -2077,9 +2250,10 @@ int av1_calc_pframe_target_size_one_pass_cbr(
const AV1_COMP *cpi, FRAME_UPDATE_TYPE frame_update_type) {
const AV1EncoderConfig *oxcf = &cpi->oxcf;
const RATE_CONTROL *rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *p_rc = &cpi->ppi->p_rc;
const RateControlCfg *rc_cfg = &oxcf->rc_cfg;
- const int64_t diff = rc->optimal_buffer_level - rc->buffer_level;
- const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100;
+ const int64_t diff = p_rc->optimal_buffer_level - rc->buffer_level;
+ const int64_t one_pct_bits = 1 + p_rc->optimal_buffer_level / 100;
int min_frame_target =
AOMMAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS);
int target;
@@ -2087,17 +2261,17 @@ int av1_calc_pframe_target_size_one_pass_cbr(
if (rc_cfg->gf_cbr_boost_pct) {
const int af_ratio_pct = rc_cfg->gf_cbr_boost_pct + 100;
if (frame_update_type == GF_UPDATE || frame_update_type == OVERLAY_UPDATE) {
- target =
- (rc->avg_frame_bandwidth * rc->baseline_gf_interval * af_ratio_pct) /
- (rc->baseline_gf_interval * 100 + af_ratio_pct - 100);
+ target = (rc->avg_frame_bandwidth * p_rc->baseline_gf_interval *
+ af_ratio_pct) /
+ (p_rc->baseline_gf_interval * 100 + af_ratio_pct - 100);
} else {
- target = (rc->avg_frame_bandwidth * rc->baseline_gf_interval * 100) /
- (rc->baseline_gf_interval * 100 + af_ratio_pct - 100);
+ target = (rc->avg_frame_bandwidth * p_rc->baseline_gf_interval * 100) /
+ (p_rc->baseline_gf_interval * 100 + af_ratio_pct - 100);
}
} else {
target = rc->avg_frame_bandwidth;
}
- if (cpi->use_svc) {
+ if (cpi->ppi->use_svc) {
// Note that for layers, avg_frame_bandwidth is the cumulative
// per-frame-bandwidth. For the target size of this frame, use the
// layer average frame size (i.e., non-cumulative per-frame-bw).
@@ -2129,11 +2303,12 @@ int av1_calc_pframe_target_size_one_pass_cbr(
int av1_calc_iframe_target_size_one_pass_cbr(const AV1_COMP *cpi) {
const RATE_CONTROL *rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *p_rc = &cpi->ppi->p_rc;
int target;
if (cpi->common.current_frame.frame_number == 0) {
- target = ((rc->starting_buffer_level / 2) > INT_MAX)
+ target = ((p_rc->starting_buffer_level / 2) > INT_MAX)
? INT_MAX
- : (int)(rc->starting_buffer_level / 2);
+ : (int)(p_rc->starting_buffer_level / 2);
} else {
int kf_boost = 32;
double framerate = cpi->framerate;
@@ -2177,7 +2352,7 @@ void av1_set_reference_structure_one_pass_rt(AV1_COMP *cpi, int gf_update) {
int gld_idx = 0;
int alt_ref_idx = 0;
ext_refresh_frame_flags->update_pending = 1;
- svc->external_ref_frame_config = 1;
+ svc->set_ref_frame_config = 1;
ext_flags->ref_frame_flags = 0;
ext_refresh_frame_flags->last_frame = 1;
ext_refresh_frame_flags->golden_frame = 0;
@@ -2268,9 +2443,9 @@ static void rc_scene_detection_onepass_rt(AV1_COMP *cpi) {
int num_samples = 0;
const int thresh = 6;
// SAD is computed on 64x64 blocks
- const int sb_size_by_mb = (cm->seq_params.sb_size == BLOCK_128X128)
- ? (cm->seq_params.mib_size >> 1)
- : cm->seq_params.mib_size;
+ const int sb_size_by_mb = (cm->seq_params->sb_size == BLOCK_128X128)
+ ? (cm->seq_params->mib_size >> 1)
+ : cm->seq_params->mib_size;
const int sb_cols = (num_mi_cols + sb_size_by_mb - 1) / sb_size_by_mb;
const int sb_rows = (num_mi_rows + sb_size_by_mb - 1) / sb_size_by_mb;
uint64_t sum_sq_thresh = 10000; // sum = sqrt(thresh / 64*64)) ~1.5
@@ -2286,12 +2461,12 @@ static void rc_scene_detection_onepass_rt(AV1_COMP *cpi) {
(sbi_row < sb_rows - 1 && sbi_col < sb_cols - 1) &&
((sbi_row % 2 == 0 && sbi_col % 2 == 0) ||
(sbi_row % 2 != 0 && sbi_col % 2 != 0)))) {
- tmp_sad = cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y,
- last_src_ystride);
+ tmp_sad = cpi->ppi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y,
+ last_src_ystride);
if (check_light_change) {
unsigned int sse, variance;
- variance = cpi->fn_ptr[bsize].vf(src_y, src_ystride, last_src_y,
- last_src_ystride, &sse);
+ variance = cpi->ppi->fn_ptr[bsize].vf(
+ src_y, src_ystride, last_src_y, last_src_ystride, &sse);
// Note: sse - variance = ((sum * sum) >> 12)
// Detect large lighting change.
if (variance < (sse >> 1) && (sse - variance) > sum_sq_thresh) {
@@ -2344,7 +2519,8 @@ static void rc_scene_detection_onepass_rt(AV1_COMP *cpi) {
static int set_gf_interval_update_onepass_rt(AV1_COMP *cpi,
FRAME_TYPE frame_type) {
RATE_CONTROL *const rc = &cpi->rc;
- GF_GROUP *const gf_group = &cpi->gf_group;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ GF_GROUP *const gf_group = &cpi->ppi->gf_group;
ResizePendingParams *const resize_pending_params =
&cpi->resize_pending_params;
int gf_update = 0;
@@ -2360,34 +2536,34 @@ static int set_gf_interval_update_onepass_rt(AV1_COMP *cpi,
if (cpi->oxcf.q_cfg.aq_mode == CYCLIC_REFRESH_AQ)
av1_cyclic_refresh_set_golden_update(cpi);
else
- rc->baseline_gf_interval = MAX_GF_INTERVAL;
- if (rc->baseline_gf_interval > rc->frames_to_key)
- rc->baseline_gf_interval = rc->frames_to_key;
- rc->gfu_boost = DEFAULT_GF_BOOST_RT;
- rc->constrained_gf_group =
- (rc->baseline_gf_interval >= rc->frames_to_key) ? 1 : 0;
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
- gf_group->index = 0;
+ p_rc->baseline_gf_interval = MAX_GF_INTERVAL;
+ if (p_rc->baseline_gf_interval > rc->frames_to_key)
+ p_rc->baseline_gf_interval = rc->frames_to_key;
+ p_rc->gfu_boost = DEFAULT_GF_BOOST_RT;
+ p_rc->constrained_gf_group =
+ (p_rc->baseline_gf_interval >= rc->frames_to_key) ? 1 : 0;
+ rc->frames_till_gf_update_due = p_rc->baseline_gf_interval;
+ cpi->gf_frame_index = 0;
// SVC does not use GF as periodic boost.
// TODO(marpan): Find better way to disable this for SVC.
- if (cpi->use_svc) {
+ if (cpi->ppi->use_svc) {
SVC *const svc = &cpi->svc;
- rc->baseline_gf_interval = MAX_STATIC_GF_GROUP_LENGTH - 1;
- rc->gfu_boost = 1;
- rc->constrained_gf_group = 0;
- rc->frames_till_gf_update_due = rc->baseline_gf_interval;
+ p_rc->baseline_gf_interval = MAX_STATIC_GF_GROUP_LENGTH - 1;
+ p_rc->gfu_boost = 1;
+ p_rc->constrained_gf_group = 0;
+ rc->frames_till_gf_update_due = p_rc->baseline_gf_interval;
for (int layer = 0;
layer < svc->number_spatial_layers * svc->number_temporal_layers;
++layer) {
LAYER_CONTEXT *const lc = &svc->layer_context[layer];
- lc->rc.baseline_gf_interval = rc->baseline_gf_interval;
- lc->rc.gfu_boost = rc->gfu_boost;
- lc->rc.constrained_gf_group = rc->constrained_gf_group;
+ lc->p_rc.baseline_gf_interval = p_rc->baseline_gf_interval;
+ lc->p_rc.gfu_boost = p_rc->gfu_boost;
+ lc->p_rc.constrained_gf_group = p_rc->constrained_gf_group;
lc->rc.frames_till_gf_update_due = rc->frames_till_gf_update_due;
lc->group_index = 0;
}
}
- gf_group->size = rc->baseline_gf_interval;
+ gf_group->size = p_rc->baseline_gf_interval;
gf_group->update_type[0] =
(frame_type == KEY_FRAME) ? KF_UPDATE : GF_UPDATE;
gf_update = 1;
@@ -2398,6 +2574,7 @@ static int set_gf_interval_update_onepass_rt(AV1_COMP *cpi,
static void resize_reset_rc(AV1_COMP *cpi, int resize_width, int resize_height,
int prev_width, int prev_height) {
RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
SVC *const svc = &cpi->svc;
double tot_scale_change = 1.0;
int target_bits_per_frame;
@@ -2406,8 +2583,8 @@ static void resize_reset_rc(AV1_COMP *cpi, int resize_width, int resize_height,
tot_scale_change = (double)(resize_width * resize_height) /
(double)(prev_width * prev_height);
// Reset buffer level to optimal, update target size.
- rc->buffer_level = rc->optimal_buffer_level;
- rc->bits_off_target = rc->optimal_buffer_level;
+ rc->buffer_level = p_rc->optimal_buffer_level;
+ rc->bits_off_target = p_rc->optimal_buffer_level;
rc->this_frame_target =
av1_calc_pframe_target_size_one_pass_cbr(cpi, INTER_FRAME);
target_bits_per_frame = rc->this_frame_target;
@@ -2431,8 +2608,8 @@ static void resize_reset_rc(AV1_COMP *cpi, int resize_width, int resize_height,
svc->number_temporal_layers +
tl];
lc->rc.resize_state = rc->resize_state;
- lc->rc.buffer_level = lc->rc.optimal_buffer_level;
- lc->rc.bits_off_target = lc->rc.optimal_buffer_level;
+ lc->rc.buffer_level = lc->p_rc.optimal_buffer_level;
+ lc->rc.bits_off_target = lc->p_rc.optimal_buffer_level;
lc->rc.rate_correction_factors[INTER_FRAME] =
rc->rate_correction_factors[INTER_FRAME];
}
@@ -2464,6 +2641,7 @@ static void resize_reset_rc(AV1_COMP *cpi, int resize_width, int resize_height,
static void dynamic_resize_one_pass_cbr(AV1_COMP *cpi) {
const AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
RESIZE_ACTION resize_action = NO_RESIZE;
const int avg_qp_thr1 = 70;
const int avg_qp_thr2 = 50;
@@ -2486,7 +2664,7 @@ static void dynamic_resize_one_pass_cbr(AV1_COMP *cpi) {
if (cpi->rc.frames_since_key > cpi->framerate) {
const int window = AOMMIN(30, (int)(2 * cpi->framerate));
rc->resize_avg_qp += rc->last_q[INTER_FRAME];
- if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100))
+ if (cpi->rc.buffer_level < (int)(30 * p_rc->optimal_buffer_level / 100))
++rc->resize_buffer_underflow;
++rc->resize_count;
// Check for resize action every "window" frames.
@@ -2548,8 +2726,9 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
EncodeFrameParams *const frame_params,
unsigned int frame_flags) {
RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
AV1_COMMON *const cm = &cpi->common;
- GF_GROUP *const gf_group = &cpi->gf_group;
+ GF_GROUP *const gf_group = &cpi->ppi->gf_group;
SVC *const svc = &cpi->svc;
ResizePendingParams *const resize_pending_params =
&cpi->resize_pending_params;
@@ -2559,35 +2738,35 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
svc->number_temporal_layers);
// Turn this on to explicitly set the reference structure rather than
// relying on internal/default structure.
- if (cpi->use_svc) {
+ if (cpi->ppi->use_svc) {
av1_update_temporal_layer_framerate(cpi);
av1_restore_layer_context(cpi);
}
// Set frame type.
- if ((!cpi->use_svc && rc->frames_to_key == 0) ||
- (cpi->use_svc && svc->spatial_layer_id == 0 &&
+ if ((!cpi->ppi->use_svc && rc->frames_to_key == 0) ||
+ (cpi->ppi->use_svc && svc->spatial_layer_id == 0 &&
(cpi->oxcf.kf_cfg.key_freq_max == 0 ||
svc->current_superframe % cpi->oxcf.kf_cfg.key_freq_max == 0)) ||
(frame_flags & FRAMEFLAGS_KEY)) {
frame_params->frame_type = KEY_FRAME;
- rc->this_key_frame_forced =
+ p_rc->this_key_frame_forced =
cm->current_frame.frame_number != 0 && rc->frames_to_key == 0;
rc->frames_to_key = cpi->oxcf.kf_cfg.key_freq_max;
- rc->kf_boost = DEFAULT_KF_BOOST_RT;
- gf_group->update_type[gf_group->index] = KF_UPDATE;
- gf_group->frame_type[gf_group->index] = KEY_FRAME;
- gf_group->refbuf_state[gf_group->index] = REFBUF_RESET;
- if (cpi->use_svc) {
+ p_rc->kf_boost = DEFAULT_KF_BOOST_RT;
+ gf_group->update_type[cpi->gf_frame_index] = KF_UPDATE;
+ gf_group->frame_type[cpi->gf_frame_index] = KEY_FRAME;
+ gf_group->refbuf_state[cpi->gf_frame_index] = REFBUF_RESET;
+ if (cpi->ppi->use_svc) {
if (cm->current_frame.frame_number > 0)
av1_svc_reset_temporal_layers(cpi, 1);
svc->layer_context[layer].is_key_frame = 1;
}
} else {
frame_params->frame_type = INTER_FRAME;
- gf_group->update_type[gf_group->index] = LF_UPDATE;
- gf_group->frame_type[gf_group->index] = INTER_FRAME;
- gf_group->refbuf_state[gf_group->index] = REFBUF_UPDATE;
- if (cpi->use_svc) {
+ gf_group->update_type[cpi->gf_frame_index] = LF_UPDATE;
+ gf_group->frame_type[cpi->gf_frame_index] = INTER_FRAME;
+ gf_group->refbuf_state[cpi->gf_frame_index] = REFBUF_UPDATE;
+ if (cpi->ppi->use_svc) {
LAYER_CONTEXT *lc = &svc->layer_context[layer];
lc->is_key_frame =
svc->spatial_layer_id == 0
@@ -2596,7 +2775,7 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
}
}
// Check for scene change, for non-SVC for now.
- if (!cpi->use_svc && cpi->sf.rt_sf.check_scene_detection)
+ if (!cpi->ppi->use_svc && cpi->sf.rt_sf.check_scene_detection)
rc_scene_detection_onepass_rt(cpi);
// Check for dynamic resize, for single spatial layer for now.
// For temporal layers only check on base temporal layer.
@@ -2628,14 +2807,14 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
target = av1_calc_iframe_target_size_one_pass_cbr(cpi);
} else {
target = av1_calc_pframe_target_size_one_pass_cbr(
- cpi, gf_group->update_type[gf_group->index]);
+ cpi, gf_group->update_type[cpi->gf_frame_index]);
}
} else {
if (frame_params->frame_type == KEY_FRAME) {
target = av1_calc_iframe_target_size_one_pass_vbr(cpi);
} else {
target = av1_calc_pframe_target_size_one_pass_vbr(
- cpi, gf_group->update_type[gf_group->index]);
+ cpi, gf_group->update_type[cpi->gf_frame_index]);
}
}
if (cpi->oxcf.rc_cfg.mode == AOM_Q)
@@ -2644,11 +2823,21 @@ void av1_get_one_pass_rt_params(AV1_COMP *cpi,
av1_rc_set_frame_target(cpi, target, cm->width, cm->height);
rc->base_frame_target = target;
cm->current_frame.frame_type = frame_params->frame_type;
+ // For fixed mode SVC: if KSVC is enabled remove inter layer
+ // prediction on spatial enhancement layer frames for frames
+ // whose base is not KEY frame.
+ if (cpi->ppi->use_svc && !svc->use_flexible_mode && svc->ksvc_fixed_mode &&
+ svc->number_spatial_layers > 1 &&
+ !svc->layer_context[layer].is_key_frame) {
+ ExternalFlags *const ext_flags = &cpi->ext_flags;
+ ext_flags->ref_frame_flags ^= AOM_GOLD_FLAG;
+ }
}
int av1_encodedframe_overshoot_cbr(AV1_COMP *cpi, int *q) {
AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
SPEED_FEATURES *const sf = &cpi->sf;
int thresh_qp = 7 * (rc->worst_quality >> 3);
// Lower thresh_qp for video (more overshoot at lower Q) to be
@@ -2670,8 +2859,8 @@ int av1_encodedframe_overshoot_cbr(AV1_COMP *cpi, int *q) {
// have settled down to a very different (low QP) state, then not adjusting
// them may cause next frame to select low QP and overshoot again.
cpi->rc.avg_frame_qindex[INTER_FRAME] = *q;
- rc->buffer_level = rc->optimal_buffer_level;
- rc->bits_off_target = rc->optimal_buffer_level;
+ rc->buffer_level = p_rc->optimal_buffer_level;
+ rc->bits_off_target = p_rc->optimal_buffer_level;
// Reset rate under/over-shoot flags.
cpi->rc.rc_1_frame = 0;
cpi->rc.rc_2_frame = 0;
@@ -2680,7 +2869,7 @@ int av1_encodedframe_overshoot_cbr(AV1_COMP *cpi, int *q) {
(int)(((uint64_t)target_size << BPER_MB_NORMBITS) / cm->mi_params.MBs);
// Rate correction factor based on target_bits_per_mb and qp (==max_QP).
// This comes from the inverse computation of vp9_rc_bits_per_mb().
- q2 = av1_convert_qindex_to_q(*q, cm->seq_params.bit_depth);
+ q2 = av1_convert_qindex_to_q(*q, cm->seq_params->bit_depth);
enumerator = 1800000; // Factor for inter frame.
enumerator += (int)(enumerator * q2) >> 12;
new_correction_factor = (double)target_bits_per_mb * q2 / enumerator;
diff --git a/third_party/libaom/source/libaom/av1/encoder/ratectrl.h b/third_party/libaom/source/libaom/av1/encoder/ratectrl.h
index 3f1756f5ca..a1567f038c 100644
--- a/third_party/libaom/source/libaom/av1/encoder/ratectrl.h
+++ b/third_party/libaom/source/libaom/av1/encoder/ratectrl.h
@@ -129,11 +129,6 @@ typedef struct {
int this_frame_target; // Actual frame target after rc adjustment.
/*!
- * Target bit budget for the current GF / ARF group of frame.
- */
- int64_t gf_group_bits;
-
- /*!
* Projected size for current frame
*/
int projected_frame_size;
@@ -159,20 +154,6 @@ typedef struct {
int last_boosted_qindex;
/*!
- * Q used for last boosted (non leaf) frame
- */
- int last_kf_qindex;
-
- /*!
- * Boost factor used to calculate the extra bits allocated to ARFs and GFs
- */
- int gfu_boost;
- /*!
- * Boost factor used to calculate the extra bits allocated to the key frame
- */
- int kf_boost;
-
- /*!
* Correction factors used to adjust the q estimate for a given target rate
* in the encode loop.
*/
@@ -193,28 +174,10 @@ typedef struct {
*/
int intervals_till_gf_calculate_due;
- /*!
- * Stores the determined gf group lengths for a set of gf groups
- */
- int gf_intervals[MAX_NUM_GF_INTERVALS];
-
- /*!
- * The current group's index into gf_intervals[]
- */
- int cur_gf_index;
-
/*!\cond */
- int num_regions;
- REGIONS regions[MAX_FIRSTPASS_ANALYSIS_FRAMES];
- double cor_coeff[MAX_FIRSTPASS_ANALYSIS_FRAMES];
- int regions_offset; // offset of regions from the last keyframe
- int frames_till_regions_update;
-
int min_gf_interval;
int max_gf_interval;
int static_scene_max_gf_interval;
- int baseline_gf_interval;
- int constrained_gf_group;
/*!\endcond */
/*!
* Frames before the next key frame
@@ -222,8 +185,6 @@ typedef struct {
int frames_to_key;
/*!\cond */
int frames_since_key;
- int this_key_frame_forced;
- int next_key_frame_forced;
int is_src_frame_alt_ref;
int sframe_due;
@@ -269,18 +230,6 @@ typedef struct {
*/
int best_quality;
- /*!
- * Initial buffuer level in ms for CBR / low delay encoding
- */
- int64_t starting_buffer_level;
- /*!
- * Optimum / target buffuer level in ms for CBR / low delay encoding
- */
- int64_t optimal_buffer_level;
- /*!
- * Maximum target buffuer level in ms for CBR / low delay encoding
- */
- int64_t maximum_buffer_size;
/*!\cond */
// rate control history for last frame(1) and the frame before(2).
@@ -292,14 +241,8 @@ typedef struct {
int q_1_frame;
int q_2_frame;
- float_t arf_boost_factor;
-
/*!\endcond */
/*!
- * Q index used for ALT frame
- */
- int arf_q;
- /*!
* Proposed maximum alloed Q for current frame
*/
int active_worst_quality;
@@ -309,35 +252,119 @@ typedef struct {
int active_best_quality[MAX_ARF_LAYERS + 1];
/*!\cond */
+ // Track amount of low motion in scene
+ int avg_frame_low_motion;
+
+ // For dynamic resize, 1 pass cbr.
+ RESIZE_STATE resize_state;
+ int resize_avg_qp;
+ int resize_buffer_underflow;
+ int resize_count;
+ /*!\endcond */
+} RATE_CONTROL;
+
+/*!
+ * \brief Primary Rate Control parameters and status
+ */
+typedef struct {
+ // Sub-gop level Rate targetting variables
+
+ /*!
+ * Target bit budget for the current GF / ARF group of frame.
+ */
+ int64_t gf_group_bits;
+
+ /*!
+ * Boost factor used to calculate the extra bits allocated to the key frame
+ */
+ int kf_boost;
+
+ /*!
+ * Boost factor used to calculate the extra bits allocated to ARFs and GFs
+ */
+ int gfu_boost;
+
+ /*!
+ * Stores the determined gf group lengths for a set of gf groups
+ */
+ int gf_intervals[MAX_NUM_GF_INTERVALS];
+
+ /*!
+ * The current group's index into gf_intervals[]
+ */
+ int cur_gf_index;
+
+ /*!\cond */
+ int num_regions;
+
+ REGIONS regions[MAX_FIRSTPASS_ANALYSIS_FRAMES];
+ int regions_offset; // offset of regions from the last keyframe
+ int frames_till_regions_update;
+
+ int baseline_gf_interval;
+
+ int constrained_gf_group;
+
+ int this_key_frame_forced;
+
+ int next_key_frame_forced;
+ /*!\endcond */
+
+ /*!
+ * Initial buffuer level in ms for CBR / low delay encoding
+ */
+ int64_t starting_buffer_level;
+
+ /*!
+ * Optimum / target buffuer level in ms for CBR / low delay encoding
+ */
+ int64_t optimal_buffer_level;
+
+ /*!
+ * Maximum target buffuer level in ms for CBR / low delay encoding
+ */
+ int64_t maximum_buffer_size;
+
+ /*!
+ * Q index used for ALT frame
+ */
+ int arf_q;
+
+ /*!\cond */
+ float_t arf_boost_factor;
+
int base_layer_qp;
// Total number of stats used only for kf_boost calculation.
int num_stats_used_for_kf_boost;
+
// Total number of stats used only for gfu_boost calculation.
int num_stats_used_for_gfu_boost;
+
// Total number of stats required by gfu_boost calculation.
int num_stats_required_for_gfu_boost;
+
int next_is_fwd_key;
+
int enable_scenecut_detection;
- int use_arf_in_this_kf_group;
- // Track amount of low motion in scene
- int avg_frame_low_motion;
- // For dynamic resize, 1 pass cbr.
- RESIZE_STATE resize_state;
- int resize_avg_qp;
- int resize_buffer_underflow;
- int resize_count;
+ int use_arf_in_this_kf_group;
/*!\endcond */
-} RATE_CONTROL;
-/*!\cond */
+ /*!
+ * Q used for last boosted (non leaf) frame
+ */
+ int last_kf_qindex;
+} PRIMARY_RATE_CONTROL;
struct AV1_COMP;
struct AV1EncoderConfig;
+void av1_primary_rc_init(const struct AV1EncoderConfig *oxcf,
+ PRIMARY_RATE_CONTROL *p_rc);
+
void av1_rc_init(const struct AV1EncoderConfig *oxcf, int pass,
- RATE_CONTROL *rc);
+ RATE_CONTROL *rc, const PRIMARY_RATE_CONTROL *const p_rc);
int av1_estimate_bits_at_q(FRAME_TYPE frame_kind, int q, int mbs,
double correction_factor, aom_bit_depth_t bit_depth,
@@ -415,7 +442,6 @@ void av1_rc_compute_frame_size_bounds(const struct AV1_COMP *cpi,
*
* \ingroup rate_control
* \param[in] cpi Top level encoder structure
- * \param[in,out] rc Top level rate control structure
* \param[in] width Coded frame width
* \param[in] height Coded frame height
* \param[in] gf_index Index of this frame in the golden frame group
@@ -424,9 +450,8 @@ void av1_rc_compute_frame_size_bounds(const struct AV1_COMP *cpi,
* \return Returns selected q index to be used for encoding this frame.
* Also, updates \c rc->arf_q.
*/
-int av1_rc_pick_q_and_bounds(const struct AV1_COMP *cpi, RATE_CONTROL *rc,
- int width, int height, int gf_index,
- int *bottom_index, int *top_index);
+int av1_rc_pick_q_and_bounds(const struct AV1_COMP *cpi, int width, int height,
+ int gf_index, int *bottom_index, int *top_index);
/*!\brief Estimates q to achieve a target bits per frame
*
diff --git a/third_party/libaom/source/libaom/av1/encoder/rc_utils.h b/third_party/libaom/source/libaom/av1/encoder/rc_utils.h
index 98cec2e003..0a9d02d17b 100644
--- a/third_party/libaom/source/libaom/av1/encoder/rc_utils.h
+++ b/third_party/libaom/source/libaom/av1/encoder/rc_utils.h
@@ -19,18 +19,45 @@
extern "C" {
#endif
-static AOM_INLINE void set_rc_buffer_sizes(RATE_CONTROL *rc,
- const RateControlCfg *rc_cfg) {
+static AOM_INLINE void check_reset_rc_flag(AV1_COMP *cpi) {
+ RATE_CONTROL *rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ if (cpi->common.current_frame.frame_number >
+ (unsigned int)cpi->svc.number_spatial_layers) {
+ if (cpi->ppi->use_svc) {
+ av1_svc_check_reset_layer_rc_flag(cpi);
+ } else {
+ if (rc->avg_frame_bandwidth > (3 * rc->prev_avg_frame_bandwidth >> 1) ||
+ rc->avg_frame_bandwidth < (rc->prev_avg_frame_bandwidth >> 1)) {
+ rc->rc_1_frame = 0;
+ rc->rc_2_frame = 0;
+ rc->bits_off_target = p_rc->optimal_buffer_level;
+ rc->buffer_level = p_rc->optimal_buffer_level;
+ }
+ }
+ }
+}
+
+static AOM_INLINE void set_rc_buffer_sizes(AV1_COMP *cpi) {
+ RATE_CONTROL *rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
+ const RateControlCfg *const rc_cfg = &cpi->oxcf.rc_cfg;
+
const int64_t bandwidth = rc_cfg->target_bandwidth;
const int64_t starting = rc_cfg->starting_buffer_level_ms;
const int64_t optimal = rc_cfg->optimal_buffer_level_ms;
const int64_t maximum = rc_cfg->maximum_buffer_size_ms;
- rc->starting_buffer_level = starting * bandwidth / 1000;
- rc->optimal_buffer_level =
+ p_rc->starting_buffer_level = starting * bandwidth / 1000;
+ p_rc->optimal_buffer_level =
(optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000;
- rc->maximum_buffer_size =
+ p_rc->maximum_buffer_size =
(maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000;
+
+ // Under a configuration change, where maximum_buffer_size may change,
+ // keep buffer level clipped to the maximum allowed buffer size.
+ rc->bits_off_target = AOMMIN(rc->bits_off_target, p_rc->maximum_buffer_size);
+ rc->buffer_level = AOMMIN(rc->buffer_level, p_rc->maximum_buffer_size);
}
static AOM_INLINE void config_target_level(AV1_COMP *const cpi,
@@ -38,7 +65,7 @@ static AOM_INLINE void config_target_level(AV1_COMP *const cpi,
aom_clear_system_state();
AV1EncoderConfig *const oxcf = &cpi->oxcf;
- SequenceHeader *const seq_params = &cpi->common.seq_params;
+ SequenceHeader *const seq_params = cpi->common.seq_params;
TileConfig *const tile_cfg = &oxcf->tile_cfg;
RateControlCfg *const rc_cfg = &oxcf->rc_cfg;
@@ -48,11 +75,11 @@ static AOM_INLINE void config_target_level(AV1_COMP *const cpi,
av1_get_max_bitrate_for_level(target_level, tier, profile);
const int64_t max_bitrate = (int64_t)(level_bitrate_limit * 0.70);
rc_cfg->target_bandwidth = AOMMIN(rc_cfg->target_bandwidth, max_bitrate);
- // Also need to update cpi->twopass.bits_left.
- TWO_PASS *const twopass = &cpi->twopass;
+ // Also need to update cpi->ppi->twopass.bits_left.
+ TWO_PASS *const twopass = &cpi->ppi->twopass;
FIRSTPASS_STATS *stats = twopass->stats_buf_ctx->total_stats;
if (stats != NULL)
- cpi->twopass.bits_left =
+ cpi->ppi->twopass.bits_left =
(int64_t)(stats->duration * rc_cfg->target_bandwidth / 10000000.0);
// Adjust max over-shoot percentage.
@@ -226,6 +253,7 @@ static AOM_INLINE void recode_loop_update_q(
int *const low_cr_seen, const int loop_count) {
AV1_COMMON *const cm = &cpi->common;
RATE_CONTROL *const rc = &cpi->rc;
+ PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
const RateControlCfg *const rc_cfg = &cpi->oxcf.rc_cfg;
*loop = 0;
@@ -263,14 +291,15 @@ static AOM_INLINE void recode_loop_update_q(
&frame_over_shoot_limit);
if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
- if (cm->current_frame.frame_type == KEY_FRAME && rc->this_key_frame_forced &&
+ if (cm->current_frame.frame_type == KEY_FRAME &&
+ p_rc->this_key_frame_forced &&
rc->projected_frame_size < rc->max_frame_bandwidth) {
int64_t kf_err;
const int64_t high_err_target = cpi->ambient_err;
const int64_t low_err_target = cpi->ambient_err >> 1;
#if CONFIG_AV1_HIGHBITDEPTH
- if (cm->seq_params.use_highbitdepth) {
+ if (cm->seq_params->use_highbitdepth) {
kf_err = aom_highbd_get_y_sse(cpi->source, &cm->cur_frame->buf);
} else {
kf_err = aom_get_y_sse(cpi->source, &cm->cur_frame->buf);
@@ -323,11 +352,11 @@ static AOM_INLINE void recode_loop_update_q(
if (*q == *q_high &&
rc->projected_frame_size >= rc->max_frame_bandwidth) {
const double q_val_high_current =
- av1_convert_qindex_to_q(*q_high, cm->seq_params.bit_depth);
+ av1_convert_qindex_to_q(*q_high, cm->seq_params->bit_depth);
const double q_val_high_new =
q_val_high_current *
((double)rc->projected_frame_size / rc->max_frame_bandwidth);
- *q_high = av1_find_qindex(q_val_high_new, cm->seq_params.bit_depth,
+ *q_high = av1_find_qindex(q_val_high_new, cm->seq_params->bit_depth,
rc->best_quality, rc->worst_quality);
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/rd.c b/third_party/libaom/source/libaom/av1/encoder/rd.c
index 389b4bfe3b..e361264f16 100644
--- a/third_party/libaom/source/libaom/av1/encoder/rd.c
+++ b/third_party/libaom/source/libaom/av1/encoder/rd.c
@@ -354,11 +354,45 @@ static const int rd_layer_depth_factor[7] = {
160, 160, 160, 160, 192, 208, 224
};
+// Returns the default rd multiplier for inter frames for a given qindex.
+// The function here is a first pass estimate based on data from
+// a previous Vizer run
+static double def_inter_rd_multiplier(int qindex) {
+ return 3.2 + (0.0035 * (double)qindex);
+}
+
+// Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
+// The function here is a first pass estimate based on data from
+// a previous Vizer run
+static double def_arf_rd_multiplier(int qindex) {
+ return 3.25 + (0.0035 * (double)qindex);
+}
+
+// Returns the default rd multiplier for key frames for a given qindex.
+// The function here is a first pass estimate based on data from
+// a previous Vizer run
+static double def_kf_rd_multiplier(int qindex) {
+ return 3.3 + (0.0035 * (double)qindex);
+}
+
int av1_compute_rd_mult_based_on_qindex(const AV1_COMP *cpi, int qindex) {
- const int q = av1_dc_quant_QTX(qindex, 0, cpi->common.seq_params.bit_depth);
- int rdmult = (int)(((int64_t)88 * q * q) / 24);
+ const int q = av1_dc_quant_QTX(qindex, 0, cpi->common.seq_params->bit_depth);
+ const FRAME_UPDATE_TYPE update_type =
+ cpi->ppi->gf_group.update_type[cpi->gf_frame_index];
+ int rdmult = q * q;
+
+ if (update_type == KF_UPDATE) {
+ double def_rd_q_mult = def_kf_rd_multiplier(qindex);
+ rdmult = (int)((double)rdmult * def_rd_q_mult);
+ } else if ((update_type == GF_UPDATE) || (update_type == ARF_UPDATE)) {
+ double def_rd_q_mult = def_arf_rd_multiplier(qindex);
+ rdmult = (int)((double)rdmult * def_rd_q_mult);
+ } else {
+ double def_rd_q_mult = def_inter_rd_multiplier(qindex);
+ rdmult = (int)((double)rdmult * def_rd_q_mult);
+ }
- switch (cpi->common.seq_params.bit_depth) {
+ switch (cpi->common.seq_params->bit_depth) {
case AOM_BITS_8: break;
case AOM_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
case AOM_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
@@ -373,9 +407,10 @@ int av1_compute_rd_mult(const AV1_COMP *cpi, int qindex) {
int64_t rdmult = av1_compute_rd_mult_based_on_qindex(cpi, qindex);
if (is_stat_consumption_stage(cpi) &&
(cpi->common.current_frame.frame_type != KEY_FRAME)) {
- const GF_GROUP *const gf_group = &cpi->gf_group;
- const int boost_index = AOMMIN(15, (cpi->rc.gfu_boost / 100));
- const int layer_depth = AOMMIN(gf_group->layer_depth[gf_group->index], 6);
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ const int boost_index = AOMMIN(15, (cpi->ppi->p_rc.gfu_boost / 100));
+ const int layer_depth =
+ AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], 6);
// Layer depth adjustment
rdmult = (rdmult * rd_layer_depth_factor[layer_depth]) >> 7;
@@ -386,21 +421,30 @@ int av1_compute_rd_mult(const AV1_COMP *cpi, int qindex) {
return (int)rdmult;
}
-int av1_get_deltaq_offset(const AV1_COMP *cpi, int qindex, double beta) {
+int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta) {
assert(beta > 0.0);
- int q = av1_dc_quant_QTX(qindex, 0, cpi->common.seq_params.bit_depth);
+ int q = av1_dc_quant_QTX(qindex, 0, bit_depth);
int newq = (int)rint(q / sqrt(beta));
int orig_qindex = qindex;
+ if (newq == q) {
+ return 0;
+ }
if (newq < q) {
- do {
+ while (qindex > 0) {
qindex--;
- q = av1_dc_quant_QTX(qindex, 0, cpi->common.seq_params.bit_depth);
- } while (newq < q && qindex > 0);
+ q = av1_dc_quant_QTX(qindex, 0, bit_depth);
+ if (newq >= q) {
+ break;
+ }
+ }
} else {
- do {
+ while (qindex < MAXQ) {
qindex++;
- q = av1_dc_quant_QTX(qindex, 0, cpi->common.seq_params.bit_depth);
- } while (newq > q && qindex < MAXQ);
+ q = av1_dc_quant_QTX(qindex, 0, bit_depth);
+ if (newq <= q) {
+ break;
+ }
+ }
}
return qindex - orig_qindex;
}
@@ -409,7 +453,7 @@ int av1_get_adaptive_rdmult(const AV1_COMP *cpi, double beta) {
assert(beta > 0.0);
const AV1_COMMON *cm = &cpi->common;
int q = av1_dc_quant_QTX(cm->quant_params.base_qindex, 0,
- cm->seq_params.bit_depth);
+ cm->seq_params->bit_depth);
return (int)(av1_compute_rd_mult(cpi, q) / beta);
}
@@ -433,7 +477,7 @@ static int compute_rd_thresh_factor(int qindex, aom_bit_depth_t bit_depth) {
}
void av1_set_sad_per_bit(const AV1_COMP *cpi, int *sadperbit, int qindex) {
- switch (cpi->common.seq_params.bit_depth) {
+ switch (cpi->common.seq_params->bit_depth) {
case AOM_BITS_8: *sadperbit = sad_per_bit_lut_8[qindex]; break;
case AOM_BITS_10: *sadperbit = sad_per_bit_lut_10[qindex]; break;
case AOM_BITS_12: *sadperbit = sad_per_bit_lut_12[qindex]; break;
@@ -450,7 +494,7 @@ static void set_block_thresholds(const AV1_COMMON *cm, RD_OPT *rd) {
av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex) +
cm->quant_params.y_dc_delta_q,
0, MAXQ);
- const int q = compute_rd_thresh_factor(qindex, cm->seq_params.bit_depth);
+ const int q = compute_rd_thresh_factor(qindex, cm->seq_params->bit_depth);
for (bsize = 0; bsize < BLOCK_SIZES_ALL; ++bsize) {
// Threshold here seems unnecessarily harsh but fine given actual
@@ -577,6 +621,13 @@ void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
}
}
+void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs) {
+ dv_costs->dv_costs[0] = &dv_costs->dv_costs_alloc[0][MV_MAX];
+ dv_costs->dv_costs[1] = &dv_costs->dv_costs_alloc[1][MV_MAX];
+ av1_build_nmv_cost_table(dv_costs->joint_mv, dv_costs->dv_costs, ndvc,
+ MV_SUBPEL_NONE);
+}
+
void av1_initialize_rd_consts(AV1_COMP *cpi) {
AV1_COMMON *const cm = &cpi->common;
MACROBLOCK *const x = &cpi->td.mb;
@@ -610,14 +661,9 @@ void av1_initialize_rd_consts(AV1_COMP *cpi) {
cost_upd_freq.mode == COST_UPD_TILE || fill_costs)
av1_fill_mode_rates(cm, &x->mode_costs, cm->fc);
- if (!use_nonrd_pick_mode && frame_is_intra_only(cm) &&
- cm->features.allow_screen_content_tools &&
+ if (!use_nonrd_pick_mode && av1_allow_intrabc(cm) &&
!is_stat_generation_stage(cpi)) {
- IntraBCMVCosts *const dv_costs = &cpi->dv_costs;
- int *dvcost[2] = { &dv_costs->mv_component[0][MV_MAX],
- &dv_costs->mv_component[1][MV_MAX] };
- av1_build_nmv_cost_table(dv_costs->joint_mv, dvcost, &cm->fc->ndvc,
- MV_SUBPEL_NONE);
+ av1_fill_dv_costs(&cm->fc->ndvc, x->dv_costs);
}
}
@@ -1016,12 +1062,16 @@ void av1_mv_pred(const AV1_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
const uint8_t *const ref_y_ptr =
&ref_y_buffer[ref_y_stride * fp_row + fp_col];
// Find sad for current vector.
- const int this_sad = cpi->fn_ptr[block_size].sdf(
+ const int this_sad = cpi->ppi->fn_ptr[block_size].sdf(
src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride);
// Note if it is the best so far.
if (this_sad < best_sad) {
best_sad = this_sad;
}
+ if (i == 0)
+ x->pred_mv0_sad[ref_frame] = this_sad;
+ else if (i == 1)
+ x->pred_mv1_sad[ref_frame] = this_sad;
}
// Note the index of the mv that worked best in the reference list.
@@ -1287,7 +1337,7 @@ void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,
const THR_MODES top_mode = MAX_MODES;
const int max_rd_thresh_factor = use_adaptive_rd_thresh * RD_THRESH_MAX_FACT;
- const int bsize_is_1_to_4 = bsize > cm->seq_params.sb_size;
+ const int bsize_is_1_to_4 = bsize > cm->seq_params->sb_size;
BLOCK_SIZE min_size, max_size;
if (bsize_is_1_to_4) {
// This part handles block sizes with 1:4 and 4:1 aspect ratios
@@ -1296,7 +1346,7 @@ void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,
max_size = bsize;
} else {
min_size = AOMMAX(bsize - 2, BLOCK_4X4);
- max_size = AOMMIN(bsize + 2, (int)cm->seq_params.sb_size);
+ max_size = AOMMIN(bsize + 2, (int)cm->seq_params->sb_size);
}
for (THR_MODES mode = 0; mode < top_mode; ++mode) {
diff --git a/third_party/libaom/source/libaom/av1/encoder/rd.h b/third_party/libaom/source/libaom/av1/encoder/rd.h
index e37c86b9d5..c1ba819ae2 100644
--- a/third_party/libaom/source/libaom/av1/encoder/rd.h
+++ b/third_party/libaom/source/libaom/av1/encoder/rd.h
@@ -81,20 +81,6 @@ typedef struct RD_OPT {
double r0;
} RD_OPT;
-typedef struct {
- // Cost of transmitting the actual motion vector.
- // mv_component[0][i] is the cost of motion vector with horizontal component
- // (mv_row) equal to i - MV_MAX.
- // mv_component[1][i] is the cost of motion vector with vertical component
- // (mv_col) equal to i - MV_MAX.
- int mv_component[2][MV_VALS];
-
- // joint_mv[i] is the cost of transmitting joint mv(MV_JOINT_TYPE) of
- // type i.
- // TODO(huisu@google.com): we can update dv_joint_cost per SB.
- int joint_mv[MV_JOINTS];
-} IntraBCMVCosts;
-
static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
#if CONFIG_RD_DEBUG
int plane;
@@ -110,12 +96,6 @@ static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
// encoded, as there will only be 1 plane
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
rd_stats->txb_coeff_cost[plane] = 0;
- {
- int r, c;
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
- rd_stats->txb_coeff_cost_map[plane][r][c] = 0;
- }
}
#endif
}
@@ -135,19 +115,18 @@ static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
// encoded, as there will only be 1 plane
for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
rd_stats->txb_coeff_cost[plane] = INT_MAX;
- {
- int r, c;
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
- rd_stats->txb_coeff_cost_map[plane][r][c] = INT16_MAX;
- }
}
#endif
}
static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
const RD_STATS *rd_stats_src) {
- assert(rd_stats_dst->rate != INT_MAX && rd_stats_src->rate != INT_MAX);
+ if (rd_stats_dst->rate == INT_MAX || rd_stats_src->rate == INT_MAX) {
+ // If rd_stats_dst or rd_stats_src has invalid rate, we will make
+ // rd_stats_dst invalid.
+ av1_invalid_rd_stats(rd_stats_dst);
+ return;
+ }
rd_stats_dst->rate = (int)AOMMIN(
((int64_t)rd_stats_dst->rate + (int64_t)rd_stats_src->rate), INT_MAX);
if (!rd_stats_dst->zero_rate)
@@ -160,18 +139,6 @@ static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
// encoded, as there will only be 1 plane
for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
- {
- // TODO(angiebird): optimize this part
- int r, c;
- int ref_txb_coeff_cost = 0;
- for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
- for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
- rd_stats_dst->txb_coeff_cost_map[plane][r][c] +=
- rd_stats_src->txb_coeff_cost_map[plane][r][c];
- ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c];
- }
- assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]);
- }
}
#endif
}
@@ -375,9 +342,11 @@ void av1_fill_coeff_costs(CoeffCosts *coeff_costs, FRAME_CONTEXT *fc,
void av1_fill_mv_costs(const nmv_context *nmvc, int integer_mv, int usehp,
MvCosts *mv_costs);
+void av1_fill_dv_costs(const nmv_context *ndvc, IntraBCMVCosts *dv_costs);
+
int av1_get_adaptive_rdmult(const struct AV1_COMP *cpi, double beta);
-int av1_get_deltaq_offset(const struct AV1_COMP *cpi, int qindex, double beta);
+int av1_get_deltaq_offset(aom_bit_depth_t bit_depth, int qindex, double beta);
#ifdef __cplusplus
} // extern "C"
diff --git a/third_party/libaom/source/libaom/av1/encoder/rdopt.c b/third_party/libaom/source/libaom/av1/encoder/rdopt.c
index 6200ac11dd..3ca0cb4143 100644
--- a/third_party/libaom/source/libaom/av1/encoder/rdopt.c
+++ b/third_party/libaom/source/libaom/av1/encoder/rdopt.c
@@ -627,8 +627,8 @@ static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x,
get_plane_block_size(mbmi->bsize, pd->subsampling_x, pd->subsampling_y);
unsigned int sse;
- cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
- &sse);
+ cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
+ pd->dst.stride, &sse);
total_sse += sse;
if (!plane && sse_y) *sse_y = sse;
}
@@ -1156,13 +1156,16 @@ static int64_t handle_newmv(const AV1_COMP *const cpi, MACROBLOCK *const x,
int_mv best_mv;
av1_single_motion_search(cpi, x, bsize, ref_idx, rate_mv, search_range,
- mode_info, &best_mv);
+ mode_info, &best_mv, args);
if (best_mv.as_int == INVALID_MV) return INT64_MAX;
args->single_newmv[ref_mv_idx][refs[0]] = best_mv;
args->single_newmv_rate[ref_mv_idx][refs[0]] = *rate_mv;
args->single_newmv_valid[ref_mv_idx][refs[0]] = 1;
cur_mv[0].as_int = best_mv.as_int;
+
+ // Return after single_newmv is set.
+ if (mode_info[mbmi->ref_mv_idx].skip) return INT64_MAX;
}
return 0;
@@ -1276,7 +1279,7 @@ static int64_t motion_mode_rd(
uint8_t best_blk_skip[MAX_MIB_SIZE * MAX_MIB_SIZE];
uint8_t best_tx_type_map[MAX_MIB_SIZE * MAX_MIB_SIZE];
const int rate_mv0 = *rate_mv;
- const int interintra_allowed = cm->seq_params.enable_interintra_compound &&
+ const int interintra_allowed = cm->seq_params->enable_interintra_compound &&
is_interintra_allowed(mbmi) &&
mbmi->compound_idx;
WARP_SAMPLE_INFO *const warp_sample_info =
@@ -1319,7 +1322,7 @@ static int64_t motion_mode_rd(
const int switchable_rate =
av1_is_interp_needed(xd)
? av1_get_switchable_rate(x, xd, interp_filter,
- cm->seq_params.enable_dual_filter)
+ cm->seq_params->enable_dual_filter)
: 0;
int64_t best_rd = INT64_MAX;
int best_rate_mv = rate_mv0;
@@ -1355,11 +1358,18 @@ static int64_t motion_mode_rd(
// Do not search OBMC if the probability of selecting it is below a
// predetermined threshold for this update_type and block size.
- const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
- const int prune_obmc = cpi->frame_probs.obmc_probs[update_type][bsize] <
- cpi->sf.inter_sf.prune_obmc_prob_thresh;
- if ((!cpi->oxcf.motion_mode_cfg.enable_obmc ||
- cpi->sf.rt_sf.use_nonrd_pick_mode || prune_obmc) &&
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
+ int obmc_probability;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ obmc_probability =
+ cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize];
+#else
+ obmc_probability = cpi->frame_probs.obmc_probs[update_type][bsize];
+#endif
+ const int prune_obmc =
+ obmc_probability < cpi->sf.inter_sf.prune_obmc_prob_thresh;
+ if ((!cpi->oxcf.motion_mode_cfg.enable_obmc || prune_obmc) &&
mbmi->motion_mode == OBMC_CAUSAL)
continue;
@@ -1373,7 +1383,7 @@ static int64_t motion_mode_rd(
assert(!is_comp_pred);
if (have_newmv_in_inter_mode(this_mode)) {
av1_single_motion_search(cpi, x, bsize, 0, &tmp_rate_mv, INT_MAX, NULL,
- &mbmi->mv[0]);
+ &mbmi->mv[0], NULL);
tmp_rate2 = rate2_nocoeff - rate_mv0 + tmp_rate_mv;
}
if ((mbmi->mv[0].as_int != cur_mv) || eval_motion_mode) {
@@ -1897,10 +1907,11 @@ static bool ref_mv_idx_early_breakout(
}
// Compute the estimated RD cost for the motion vector with simple translation.
-static int64_t simple_translation_pred_rd(
- AV1_COMP *const cpi, MACROBLOCK *x, RD_STATS *rd_stats,
- HandleInterModeArgs *args, int ref_mv_idx, inter_mode_info *mode_info,
- int64_t ref_best_rd, BLOCK_SIZE bsize) {
+static int64_t simple_translation_pred_rd(AV1_COMP *const cpi, MACROBLOCK *x,
+ RD_STATS *rd_stats,
+ HandleInterModeArgs *args,
+ int ref_mv_idx, int64_t ref_best_rd,
+ BLOCK_SIZE bsize) {
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mbmi = xd->mi[0];
MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
@@ -1933,7 +1944,6 @@ static int64_t simple_translation_pred_rd(
const int drl_cost =
get_drl_cost(mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
rd_stats->rate += drl_cost;
- mode_info[ref_mv_idx].drl_cost = drl_cost;
int_mv cur_mv[2];
if (!build_cur_mv(cur_mv, mbmi->mode, cm, x, 0)) {
@@ -1987,8 +1997,8 @@ static INLINE bool mask_check_bit(int mask, int index) {
static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
RD_STATS *rd_stats,
HandleInterModeArgs *const args,
- int64_t ref_best_rd, inter_mode_info *mode_info,
- BLOCK_SIZE bsize, const int ref_set) {
+ int64_t ref_best_rd, BLOCK_SIZE bsize,
+ const int ref_set) {
AV1_COMMON *const cm = &cpi->common;
const MACROBLOCKD *const xd = &x->e_mbd;
const MB_MODE_INFO *const mbmi = xd->mi[0];
@@ -2027,7 +2037,7 @@ static int ref_mv_idx_to_search(AV1_COMP *const cpi, MACROBLOCK *x,
continue;
}
idx_rdcost[ref_mv_idx] = simple_translation_pred_rd(
- cpi, x, rd_stats, args, ref_mv_idx, mode_info, ref_best_rd, bsize);
+ cpi, x, rd_stats, args, ref_mv_idx, ref_best_rd, bsize);
}
// Find the index with the best RD cost.
int best_idx = 0;
@@ -2171,14 +2181,17 @@ typedef struct {
static AOM_INLINE void get_block_level_tpl_stats(
AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int *valid_refs,
PruneInfoFromTpl *inter_cost_info_from_tpl) {
- const GF_GROUP *const gf_group = &cpi->gf_group;
AV1_COMMON *const cm = &cpi->common;
- assert(IMPLIES(gf_group->size > 0, gf_group->index < gf_group->size));
- const int tpl_idx = gf_group->index;
- TplParams *const tpl_data = &cpi->tpl_data;
+ assert(IMPLIES(cpi->ppi->gf_group.size > 0,
+ cpi->gf_frame_index < cpi->ppi->gf_group.size));
+ const int tpl_idx = cpi->gf_frame_index;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
+ if (tpl_idx >= MAX_TPL_FRAME_IDX) {
+ return;
+ }
const TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_idx];
- if (tpl_idx >= MAX_TPL_FRAME_IDX || !tpl_frame->is_valid) {
+ if (!tpl_frame->is_valid) {
return;
}
@@ -2274,101 +2287,6 @@ static AOM_INLINE int prune_modes_based_on_tpl_stats(
return 0;
}
-// If the current mode being searched is NEWMV, this function will look
-// at previously searched MVs and check if they are the same
-// as the current MV. If it finds that this MV is repeated, it compares
-// the cost to the previous MV and skips the rest of the search if it is
-// more expensive.
-static int skip_repeated_newmv(
- AV1_COMP *const cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
- const int do_tx_search, const PREDICTION_MODE this_mode,
- MB_MODE_INFO *best_mbmi, motion_mode_candidate *motion_mode_cand,
- int64_t *ref_best_rd, RD_STATS *best_rd_stats, RD_STATS *best_rd_stats_y,
- RD_STATS *best_rd_stats_uv, inter_mode_info *mode_info,
- HandleInterModeArgs *args, int drl_cost, const int *refs, int_mv *cur_mv,
- int64_t *best_rd, const BUFFER_SET orig_dst, int ref_mv_idx) {
- // This feature only works for NEWMV when a previous mv has been searched
- if (this_mode != NEWMV || ref_mv_idx == 0) return 0;
- MACROBLOCKD *xd = &x->e_mbd;
- const AV1_COMMON *cm = &cpi->common;
- const int num_planes = av1_num_planes(cm);
-
- int skip = 0;
- int this_rate_mv = 0;
- int i;
- for (i = 0; i < ref_mv_idx; ++i) {
- // Check if the motion search result same as previous results
- if (cur_mv[0].as_int == args->single_newmv[i][refs[0]].as_int &&
- args->single_newmv_valid[i][refs[0]]) {
- // If the compared mode has no valid rd, it is unlikely this
- // mode will be the best mode
- if (mode_info[i].rd == INT64_MAX) {
- skip = 1;
- break;
- }
- // Compare the cost difference including drl cost and mv cost
- if (mode_info[i].mv.as_int != INVALID_MV) {
- const int compare_cost = mode_info[i].rate_mv + mode_info[i].drl_cost;
- const int_mv ref_mv = av1_get_ref_mv(x, 0);
- this_rate_mv = av1_mv_bit_cost(
- &mode_info[i].mv.as_mv, &ref_mv.as_mv, x->mv_costs->nmv_joint_cost,
- x->mv_costs->mv_cost_stack, MV_COST_WEIGHT);
- const int this_cost = this_rate_mv + drl_cost;
-
- if (compare_cost <= this_cost) {
- // Skip this mode if it is more expensive as the previous result
- // for this MV
- skip = 1;
- break;
- } else {
- // If the cost is less than current best result, make this
- // the best and update corresponding variables unless the
- // best_mv is the same as ref_mv. In this case we skip and
- // rely on NEAR(EST)MV instead
- if (best_mbmi->ref_mv_idx == i &&
- best_mbmi->mv[0].as_int != ref_mv.as_int) {
- assert(*best_rd != INT64_MAX);
- assert(best_mbmi->mv[0].as_int == mode_info[i].mv.as_int);
- best_mbmi->ref_mv_idx = ref_mv_idx;
- motion_mode_cand->rate_mv = this_rate_mv;
- best_rd_stats->rate += this_cost - compare_cost;
- *best_rd =
- RDCOST(x->rdmult, best_rd_stats->rate, best_rd_stats->dist);
- // We also need to update mode_info here because we are setting
- // (ref_)best_rd here. So we will not be able to search the same
- // mode again with the current configuration.
- mode_info[ref_mv_idx].mv.as_int = best_mbmi->mv[0].as_int;
- mode_info[ref_mv_idx].rate_mv = this_rate_mv;
- mode_info[ref_mv_idx].rd = *best_rd;
- if (*best_rd < *ref_best_rd) *ref_best_rd = *best_rd;
- break;
- }
- }
- }
- }
- }
- if (skip) {
- const THR_MODES mode_enum = get_prediction_mode_idx(
- best_mbmi->mode, best_mbmi->ref_frame[0], best_mbmi->ref_frame[1]);
- // Collect mode stats for multiwinner mode processing
- store_winner_mode_stats(
- &cpi->common, x, best_mbmi, best_rd_stats, best_rd_stats_y,
- best_rd_stats_uv, mode_enum, NULL, bsize, *best_rd,
- cpi->sf.winner_mode_sf.multi_winner_mode_type, do_tx_search);
- args->modelled_rd[this_mode][ref_mv_idx][refs[0]] =
- args->modelled_rd[this_mode][i][refs[0]];
- args->simple_rd[this_mode][ref_mv_idx][refs[0]] =
- args->simple_rd[this_mode][i][refs[0]];
- mode_info[ref_mv_idx].rd = mode_info[i].rd;
- mode_info[ref_mv_idx].rate_mv = this_rate_mv;
- mode_info[ref_mv_idx].mv.as_int = mode_info[i].mv.as_int;
-
- restore_dst_buf(xd, orig_dst, num_planes);
- return 1;
- }
- return 0;
-}
-
/*!\brief High level function to select parameters for compound mode.
*
* \ingroup inter_mode_search
@@ -2427,7 +2345,7 @@ static int process_compound_inter_mode(
MB_MODE_INFO *mbmi = xd->mi[0];
const AV1_COMMON *cm = &cpi->common;
const int masked_compound_used = is_any_masked_compound_used(bsize) &&
- cm->seq_params.enable_masked_compound;
+ cm->seq_params->enable_masked_compound;
int mode_search_mask = (1 << COMPOUND_AVERAGE) | (1 << COMPOUND_DISTWTD) |
(1 << COMPOUND_WEDGE) | (1 << COMPOUND_DIFFWTD);
@@ -2506,6 +2424,76 @@ static int prune_ref_mv_idx_search(int ref_mv_idx, int best_ref_mv_idx,
return 0;
}
+/*!\brief Prunes ZeroMV Search Using Best NEWMV's SSE
+ *
+ * \ingroup inter_mode_search
+ *
+ * Compares the sse of zero mv and the best sse found in single new_mv. If the
+ * sse of the zero_mv is higher, returns 1 to signal zero_mv can be skipped.
+ * Else returns 0.
+ *
+ * Note that the sse of here comes from single_motion_search. So it is
+ * interpolated with the filter in motion search, not the actual interpolation
+ * filter used in encoding.
+ *
+ * \param[in] fn_ptr A table of function pointers to compute SSE.
+ * \param[in] x Pointer to struct holding all the data for
+ * the current macroblock.
+ * \param[in] bsize The current block_size.
+ * \param[in] args The args to handle_inter_mode, used to track
+ * the best SSE.
+ * \return Returns 1 if zero_mv is pruned, 0 otherwise.
+ */
+static AOM_INLINE int prune_zero_mv_with_sse(
+ const aom_variance_fn_ptr_t *fn_ptr, const MACROBLOCK *x, BLOCK_SIZE bsize,
+ const HandleInterModeArgs *args) {
+ const MACROBLOCKD *xd = &x->e_mbd;
+ const MB_MODE_INFO *mbmi = xd->mi[0];
+
+ const int is_comp_pred = has_second_ref(mbmi);
+ const MV_REFERENCE_FRAME *refs = mbmi->ref_frame;
+
+ // Check that the global mv is the same as ZEROMV
+ assert(mbmi->mv[0].as_int == 0);
+ assert(IMPLIES(is_comp_pred, mbmi->mv[0].as_int == 0));
+ assert(xd->global_motion[refs[0]].wmtype == TRANSLATION ||
+ xd->global_motion[refs[0]].wmtype == IDENTITY);
+
+ // Don't prune if we have invalid data
+ for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
+ assert(mbmi->mv[0].as_int == 0);
+ if (args->best_single_sse_in_refs[refs[idx]] == INT32_MAX) {
+ return 0;
+ }
+ }
+
+ // Sum up the sse of ZEROMV and best NEWMV
+ unsigned int this_sse_sum = 0;
+ unsigned int best_sse_sum = 0;
+ for (int idx = 0; idx < 1 + is_comp_pred; idx++) {
+ const struct macroblock_plane *const p = &x->plane[AOM_PLANE_Y];
+ const struct macroblockd_plane *pd = xd->plane;
+ const struct buf_2d *src_buf = &p->src;
+ const struct buf_2d *ref_buf = &pd->pre[idx];
+ const uint8_t *src = src_buf->buf;
+ const uint8_t *ref = ref_buf->buf;
+ const int src_stride = src_buf->stride;
+ const int ref_stride = ref_buf->stride;
+
+ unsigned int this_sse;
+ fn_ptr[bsize].vf(ref, ref_stride, src, src_stride, &this_sse);
+ this_sse_sum += this_sse;
+
+ const unsigned int best_sse = args->best_single_sse_in_refs[refs[idx]];
+ best_sse_sum += best_sse;
+ }
+ if (this_sse_sum > best_sse_sum) {
+ return 1;
+ }
+
+ return 0;
+}
+
/*!\brief AV1 inter mode RD computation
*
* \ingroup inter_mode_search
@@ -2589,12 +2577,11 @@ static int64_t handle_inter_mode(
const int is_comp_pred = has_second_ref(mbmi);
const PREDICTION_MODE this_mode = mbmi->mode;
- const GF_GROUP *const gf_group = &cpi->gf_group;
- const int tpl_idx = gf_group->index;
- TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
+ const int tpl_idx = cpi->gf_frame_index;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
const int prune_modes_based_on_tpl =
cpi->sf.inter_sf.prune_inter_modes_based_on_tpl &&
- tpl_idx < MAX_TPL_FRAME_IDX && tpl_frame->is_valid;
+ tpl_idx < MAX_TPL_FRAME_IDX && tpl_data->tpl_frame[tpl_idx].is_valid;
int i;
// Reference frames for this mode
const int refs[2] = { mbmi->ref_frame[0],
@@ -2606,10 +2593,10 @@ static int64_t handle_inter_mode(
// of these currently holds the best predictor, and use the other
// one for future predictions. In the end, copy from tmp_buf to
// dst if necessary.
- struct macroblockd_plane *p = xd->plane;
+ struct macroblockd_plane *pd = xd->plane;
const BUFFER_SET orig_dst = {
- { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
- { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
+ { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
+ { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
};
const BUFFER_SET tmp_dst = { { tmp_buf, tmp_buf + 1 * MAX_SB_SQUARE,
tmp_buf + 2 * MAX_SB_SQUARE },
@@ -2645,8 +2632,8 @@ static int64_t handle_inter_mode(
// Save MV results from first 2 ref_mv_idx.
int_mv save_mv[MAX_REF_MV_SEARCH - 1][2];
int best_ref_mv_idx = -1;
- const int idx_mask = ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd,
- mode_info, bsize, ref_set);
+ const int idx_mask =
+ ref_mv_idx_to_search(cpi, x, rd_stats, args, ref_best_rd, bsize, ref_set);
const int16_t mode_ctx =
av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
const ModeCosts *mode_costs = &x->mode_costs;
@@ -2669,9 +2656,14 @@ static int64_t handle_inter_mode(
// WARPED_CAUSAL)
// 6.) Update stats if best so far
for (int ref_mv_idx = 0; ref_mv_idx < ref_set; ++ref_mv_idx) {
+ mbmi->ref_mv_idx = ref_mv_idx;
+
mode_info[ref_mv_idx].full_search_mv.as_int = INVALID_MV;
- mode_info[ref_mv_idx].mv.as_int = INVALID_MV;
- mode_info[ref_mv_idx].rd = INT64_MAX;
+ mode_info[ref_mv_idx].full_mv_bestsme = INT_MAX;
+ const int drl_cost = get_drl_cost(
+ mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
+ mode_info[ref_mv_idx].drl_cost = drl_cost;
+ mode_info[ref_mv_idx].skip = 0;
if (!mask_check_bit(idx_mask, ref_mv_idx)) {
// MV did not perform well in simple translation search. Skip it.
@@ -2695,14 +2687,10 @@ static int64_t handle_inter_mode(
mbmi->num_proj_ref = 0;
mbmi->motion_mode = SIMPLE_TRANSLATION;
- mbmi->ref_mv_idx = ref_mv_idx;
// Compute cost for signalling this DRL index
rd_stats->rate = base_rate;
- const int drl_cost = get_drl_cost(
- mbmi, mbmi_ext, mode_costs->drl_mode_cost0, ref_frame_type);
rd_stats->rate += drl_cost;
- mode_info[ref_mv_idx].drl_cost = drl_cost;
int rs = 0;
int compmode_interinter_cost = 0;
@@ -2731,17 +2719,16 @@ static int64_t handle_inter_mode(
if (newmv_ret_val != 0) continue;
- rd_stats->rate += rate_mv;
+ if (is_inter_singleref_mode(this_mode) &&
+ cur_mv[0].as_int != INVALID_MV) {
+ const MV_REFERENCE_FRAME ref = refs[0];
+ const unsigned int this_sse = x->pred_sse[ref];
+ if (this_sse < args->best_single_sse_in_refs[ref]) {
+ args->best_single_sse_in_refs[ref] = this_sse;
+ }
+ }
- // skip NEWMV mode in drl if the motion search result is the same
- // as a previous result
- if (cpi->sf.inter_sf.skip_repeated_newmv &&
- skip_repeated_newmv(cpi, x, bsize, do_tx_search, this_mode,
- &best_mbmi, motion_mode_cand, &ref_best_rd,
- &best_rd_stats, &best_rd_stats_y,
- &best_rd_stats_uv, mode_info, args, drl_cost,
- refs, cur_mv, &best_rd, orig_dst, ref_mv_idx))
- continue;
+ rd_stats->rate += rate_mv;
}
// Copy the motion vector for this mode into mbmi struct
for (i = 0; i < is_comp_pred + 1; ++i) {
@@ -2760,6 +2747,14 @@ static int64_t handle_inter_mode(
cpi->sf.inter_sf.prune_ref_mv_idx_search))
continue;
+ if (cpi->sf.gm_sf.prune_zero_mv_with_sse &&
+ cpi->sf.gm_sf.gm_search_type == GM_DISABLE_SEARCH &&
+ (this_mode == GLOBALMV || this_mode == GLOBAL_GLOBALMV)) {
+ if (prune_zero_mv_with_sse(cpi->ppi->fn_ptr, x, bsize, args)) {
+ continue;
+ }
+ }
+
#if CONFIG_COLLECT_COMPONENT_TIMING
start_timing(cpi, compound_type_rd_time);
#endif
@@ -2843,12 +2838,6 @@ static int64_t handle_inter_mode(
if (ret_val != INT64_MAX) {
int64_t tmp_rd = RDCOST(x->rdmult, rd_stats->rate, rd_stats->dist);
- if (tmp_rd < mode_info[ref_mv_idx].rd) {
- // Only update mode_info if the new result is actually better.
- mode_info[ref_mv_idx].mv.as_int = mbmi->mv[0].as_int;
- mode_info[ref_mv_idx].rate_mv = rate_mv;
- mode_info[ref_mv_idx].rd = tmp_rd;
- }
const THR_MODES mode_enum = get_prediction_mode_idx(
mbmi->mode, mbmi->ref_frame[0], mbmi->ref_frame[1]);
// Collect mode stats for multiwinner mode processing
@@ -2928,11 +2917,11 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
const int mi_col = xd->mi_col;
const int w = block_size_wide[bsize];
const int h = block_size_high[bsize];
- const int sb_row = mi_row >> cm->seq_params.mib_size_log2;
- const int sb_col = mi_col >> cm->seq_params.mib_size_log2;
+ const int sb_row = mi_row >> cm->seq_params->mib_size_log2;
+ const int sb_col = mi_col >> cm->seq_params->mib_size_log2;
MB_MODE_INFO_EXT *const mbmi_ext = &x->mbmi_ext;
- MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
+ const MV_REFERENCE_FRAME ref_frame = INTRA_FRAME;
av1_find_mv_refs(cm, xd, mbmi, ref_frame, mbmi_ext->ref_mv_count,
xd->ref_mv_stack, xd->weight, NULL, mbmi_ext->global_mvs,
mbmi_ext->mode_context);
@@ -2952,7 +2941,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
int_mv dv_ref = nearestmv.as_int == 0 ? nearmv : nearestmv;
if (dv_ref.as_int == 0) {
- av1_find_ref_dv(&dv_ref, tile, cm->seq_params.mib_size, mi_row);
+ av1_find_ref_dv(&dv_ref, tile, cm->seq_params->mib_size, mi_row);
}
// Ref DV should not have sub-pel.
assert((dv_ref.as_mv.col & 7) == 0);
@@ -2983,7 +2972,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
av1_make_default_fullpel_ms_params(&fullms_params, cpi, x, bsize,
&dv_ref.as_mv, lookahead_search_sites,
/*fine_search_interval=*/0);
- const IntraBCMVCosts *const dv_costs = &cpi->dv_costs;
+ const IntraBCMVCosts *const dv_costs = x->dv_costs;
av1_set_ms_to_intra_mode(&fullms_params, dv_costs);
for (enum IntrabcMotionDirection dir = IBC_MOTION_ABOVE;
@@ -2997,19 +2986,19 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
fullms_params.mv_limits.row_min =
(tile->mi_row_start - mi_row) * MI_SIZE;
fullms_params.mv_limits.row_max =
- (sb_row * cm->seq_params.mib_size - mi_row) * MI_SIZE - h;
+ (sb_row * cm->seq_params->mib_size - mi_row) * MI_SIZE - h;
break;
case IBC_MOTION_LEFT:
fullms_params.mv_limits.col_min =
(tile->mi_col_start - mi_col) * MI_SIZE;
fullms_params.mv_limits.col_max =
- (sb_col * cm->seq_params.mib_size - mi_col) * MI_SIZE - w;
+ (sb_col * cm->seq_params->mib_size - mi_col) * MI_SIZE - w;
// TODO(aconverse@google.com): Minimize the overlap between above and
// left areas.
fullms_params.mv_limits.row_min =
(tile->mi_row_start - mi_row) * MI_SIZE;
int bottom_coded_mi_edge =
- AOMMIN((sb_row + 1) * cm->seq_params.mib_size, tile->mi_row_end);
+ AOMMIN((sb_row + 1) * cm->seq_params->mib_size, tile->mi_row_end);
fullms_params.mv_limits.row_max =
(bottom_coded_mi_edge - mi_row) * MI_SIZE - h;
break;
@@ -3047,7 +3036,7 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
get_fullmv_from_mv(&dv)))
continue;
if (!av1_is_dv_valid(dv, cm, xd, mi_row, mi_col, bsize,
- cm->seq_params.mib_size_log2))
+ cm->seq_params->mib_size_log2))
continue;
// DV should not have sub-pel.
@@ -3065,12 +3054,10 @@ static int64_t rd_pick_intrabc_mode_sb(const AV1_COMP *cpi, MACROBLOCK *x,
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
av1_num_planes(cm) - 1);
- int *dvcost[2] = { (int *)&dv_costs->mv_component[0][MV_MAX],
- (int *)&dv_costs->mv_component[1][MV_MAX] };
// TODO(aconverse@google.com): The full motion field defining discount
// in MV_COST_WEIGHT is too large. Explore other values.
const int rate_mv = av1_mv_bit_cost(&dv, &dv_ref.as_mv, dv_costs->joint_mv,
- dvcost, MV_COST_WEIGHT_SUB);
+ dv_costs->dv_costs, MV_COST_WEIGHT_SUB);
const int rate_mode = x->mode_costs.intrabc_cost[1];
RD_STATS rd_stats_yuv, rd_stats_y, rd_stats_uv;
if (!av1_txfm_search(cpi, x, bsize, &rd_stats_yuv, &rd_stats_y,
@@ -3186,7 +3173,6 @@ static AOM_INLINE void rd_pick_skip_mode(
const int num_planes = av1_num_planes(cm);
MACROBLOCKD *const xd = &x->e_mbd;
MB_MODE_INFO *const mbmi = xd->mi[0];
- const TxfmSearchParams *txfm_params = &x->txfm_search_params;
x->compound_idx = 1; // COMPOUND_AVERAGE
RD_STATS skip_mode_rd_stats;
@@ -3247,6 +3233,8 @@ static AOM_INLINE void rd_pick_skip_mode(
mbmi->motion_mode = SIMPLE_TRANSLATION;
mbmi->ref_mv_idx = 0;
mbmi->skip_mode = mbmi->skip_txfm = 1;
+ mbmi->palette_mode_info.palette_size[0] = 0;
+ mbmi->palette_mode_info.palette_size[1] = 0;
set_default_interp_filters(mbmi, cm->features.interp_filter);
@@ -3283,45 +3271,12 @@ static AOM_INLINE void rd_pick_skip_mode(
assert(mode_index != THR_INVALID);
search_state->best_mbmode.skip_mode = 1;
search_state->best_mbmode = *mbmi;
-
- search_state->best_mbmode.skip_mode = search_state->best_mbmode.skip_txfm =
- 1;
- search_state->best_mbmode.mode = NEAREST_NEARESTMV;
- search_state->best_mbmode.ref_frame[0] = mbmi->ref_frame[0];
- search_state->best_mbmode.ref_frame[1] = mbmi->ref_frame[1];
- search_state->best_mbmode.mv[0].as_int = mbmi->mv[0].as_int;
- search_state->best_mbmode.mv[1].as_int = mbmi->mv[1].as_int;
- search_state->best_mbmode.ref_mv_idx = 0;
-
- // Set up tx_size related variables for skip-specific loop filtering.
- search_state->best_mbmode.tx_size =
- block_signals_txsize(bsize)
- ? tx_size_from_tx_mode(bsize, txfm_params->tx_mode_search_type)
- : max_txsize_rect_lookup[bsize];
memset(search_state->best_mbmode.inter_tx_size,
search_state->best_mbmode.tx_size,
sizeof(search_state->best_mbmode.inter_tx_size));
set_txfm_ctxs(search_state->best_mbmode.tx_size, xd->width, xd->height,
search_state->best_mbmode.skip_txfm && is_inter_block(mbmi),
xd);
-
- // Set up color-related variables for skip mode.
- search_state->best_mbmode.uv_mode = UV_DC_PRED;
- search_state->best_mbmode.palette_mode_info.palette_size[0] = 0;
- search_state->best_mbmode.palette_mode_info.palette_size[1] = 0;
-
- search_state->best_mbmode.comp_group_idx = 0;
- search_state->best_mbmode.compound_idx = x->compound_idx;
- search_state->best_mbmode.interinter_comp.type = COMPOUND_AVERAGE;
- search_state->best_mbmode.motion_mode = SIMPLE_TRANSLATION;
-
- search_state->best_mbmode.interintra_mode =
- (INTERINTRA_MODE)(II_DC_PRED - 1);
- search_state->best_mbmode.filter_intra_mode_info.use_filter_intra = 0;
-
- set_default_interp_filters(&search_state->best_mbmode,
- cm->features.interp_filter);
-
search_state->best_mode_index = mode_index;
// Update rd_cost
@@ -3798,7 +3753,7 @@ static AOM_INLINE void set_params_rd_pick_inter_mode(
// compound ref.
if (skip_ref_frame_mask & (1 << ref_frame) &&
!is_ref_frame_used_by_compound_ref(ref_frame, skip_ref_frame_mask) &&
- !is_ref_frame_used_in_cache(ref_frame, x->intermode_cache)) {
+ !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
continue;
}
assert(get_ref_frame_yv12_buf(cm, ref_frame) != NULL);
@@ -3824,7 +3779,7 @@ static AOM_INLINE void set_params_rd_pick_inter_mode(
}
if (skip_ref_frame_mask & (1 << ref_frame) &&
- !is_ref_frame_used_in_cache(ref_frame, x->intermode_cache)) {
+ !is_ref_frame_used_in_cache(ref_frame, x->mb_mode_cache)) {
continue;
}
// Ref mv list population is not required, when compound references are
@@ -3841,9 +3796,16 @@ static AOM_INLINE void set_params_rd_pick_inter_mode(
}
av1_count_overlappable_neighbors(cm, xd);
- const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
- const int prune_obmc = cpi->frame_probs.obmc_probs[update_type][bsize] <
- cpi->sf.inter_sf.prune_obmc_prob_thresh;
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
+ int obmc_probability;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ obmc_probability = cpi->ppi->temp_frame_probs.obmc_probs[update_type][bsize];
+#else
+ obmc_probability = cpi->frame_probs.obmc_probs[update_type][bsize];
+#endif
+ const int prune_obmc =
+ obmc_probability < cpi->sf.inter_sf.prune_obmc_prob_thresh;
if (cpi->oxcf.motion_mode_cfg.enable_obmc && !prune_obmc) {
if (check_num_overlappable_neighbors(mbmi) &&
is_motion_variation_allowed_bsize(bsize)) {
@@ -3874,6 +3836,10 @@ static AOM_INLINE void set_params_rd_pick_inter_mode(
set_mode_eval_params(cpi, x, MODE_EVAL);
x->comp_rd_stats_idx = 0;
+
+ for (int idx = 0; idx < REF_FRAMES; idx++) {
+ args->best_single_sse_in_refs[idx] = INT32_MAX;
+ }
}
static AOM_INLINE void init_inter_mode_search_state(
@@ -4060,8 +4026,8 @@ static int inter_mode_search_order_independent_skip(
}
// Reuse the prediction mode in cache
- if (x->use_intermode_cache) {
- const MB_MODE_INFO *cached_mi = x->intermode_cache;
+ if (x->use_mb_mode_cache) {
+ const MB_MODE_INFO *cached_mi = x->mb_mode_cache;
const PREDICTION_MODE cached_mode = cached_mi->mode;
const MV_REFERENCE_FRAME *cached_frame = cached_mi->ref_frame;
const int cached_mode_is_single = cached_frame[1] <= INTRA_FRAME;
@@ -4156,12 +4122,12 @@ static int inter_mode_search_order_independent_skip(
}
// If we are reusing the prediction from cache, and the current frame is
// required by the cache, then we cannot prune it.
- if (is_ref_frame_used_in_cache(ref_type, x->intermode_cache)) {
+ if (is_ref_frame_used_in_cache(ref_type, x->mb_mode_cache)) {
skip_ref = 0;
// If the cache only needs the current reference type for compound
// prediction, then we can skip motion mode search.
skip_motion_mode = (ref_type <= ALTREF_FRAME &&
- x->intermode_cache->ref_frame[1] > INTRA_FRAME);
+ x->mb_mode_cache->ref_frame[1] > INTRA_FRAME);
}
if (skip_ref) return 1;
}
@@ -4452,12 +4418,14 @@ static INLINE void match_ref_frame(const MB_MODE_INFO *const mbmi,
// Prune compound mode using ref frames of neighbor blocks.
static INLINE int compound_skip_using_neighbor_refs(
MACROBLOCKD *const xd, const PREDICTION_MODE this_mode,
- const MV_REFERENCE_FRAME *ref_frames, int prune_compound_using_neighbors) {
+ const MV_REFERENCE_FRAME *ref_frames, int prune_ext_comp_using_neighbors) {
// Exclude non-extended compound modes from pruning
if (this_mode == NEAREST_NEARESTMV || this_mode == NEAR_NEARMV ||
this_mode == NEW_NEWMV || this_mode == GLOBAL_GLOBALMV)
return 0;
+ if (prune_ext_comp_using_neighbors >= 3) return 1;
+
int is_ref_match[2] = { 0 }; // 0 - match for forward refs
// 1 - match for backward refs
// Check if ref frames of this block matches with left neighbor.
@@ -4472,7 +4440,7 @@ static INLINE int compound_skip_using_neighbor_refs(
const int track_ref_match = is_ref_match[0] + is_ref_match[1];
// Pruning based on ref frame match with neighbors.
- if (track_ref_match >= prune_compound_using_neighbors) return 0;
+ if (track_ref_match >= prune_ext_comp_using_neighbors) return 0;
return 1;
}
@@ -4629,10 +4597,10 @@ static AOM_INLINE void evaluate_motion_mode_for_winner_candidates(
if (!is_inter_singleref_mode(mbmi->mode)) continue;
x->txfm_search_info.skip_txfm = 0;
- struct macroblockd_plane *p = xd->plane;
+ struct macroblockd_plane *pd = xd->plane;
const BUFFER_SET orig_dst = {
- { p[0].dst.buf, p[1].dst.buf, p[2].dst.buf },
- { p[0].dst.stride, p[1].dst.stride, p[2].dst.stride },
+ { pd[0].dst.buf, pd[1].dst.buf, pd[2].dst.buf },
+ { pd[0].dst.stride, pd[1].dst.stride, pd[2].dst.stride },
};
set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
@@ -4681,8 +4649,7 @@ typedef struct {
int skip_ref_frame_mask;
int reach_first_comp_mode;
int mode_thresh_mul_fact;
- int intra_mode_idx_ls[INTRA_MODES];
- int intra_mode_num;
+ int *intra_mode_idx_ls;
int num_single_modes_processed;
int prune_cpd_using_sr_stats_ready;
} InterModeSFArgs;
@@ -4693,7 +4660,6 @@ static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
InterModeSFArgs *args) {
const SPEED_FEATURES *const sf = &cpi->sf;
MACROBLOCKD *const xd = &x->e_mbd;
- MB_MODE_INFO *const mbmi = xd->mi[0];
// Get the actual prediction mode we are trying in this iteration
const THR_MODES mode_enum = av1_default_mode_order[midx];
const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
@@ -4703,6 +4669,8 @@ static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
const MV_REFERENCE_FRAME second_ref_frame = ref_frames[1];
const int comp_pred = second_ref_frame > INTRA_FRAME;
+ if (ref_frame == INTRA_FRAME) return 1;
+
// Check if this mode should be skipped because it is incompatible with the
// current frame
if (inter_mode_compatible_skip(cpi, x, bsize, this_mode, ref_frames))
@@ -4739,23 +4707,6 @@ static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
return 1;
}
- // Speed features to prune out INTRA frames
- if (ref_frame == INTRA_FRAME) {
- if ((!cpi->oxcf.intra_mode_cfg.enable_smooth_intra ||
- sf->intra_sf.disable_smooth_intra) &&
- (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
- mbmi->mode == SMOOTH_V_PRED))
- return 1;
- if (!cpi->oxcf.intra_mode_cfg.enable_paeth_intra &&
- mbmi->mode == PAETH_PRED)
- return 1;
-
- // Intra modes will be handled in another loop later.
- assert(args->intra_mode_num < INTRA_MODES);
- args->intra_mode_idx_ls[args->intra_mode_num++] = mode_enum;
- return 1;
- }
-
if (sf->inter_sf.prune_compound_using_single_ref && comp_pred) {
// After we done with single reference modes, find the 2nd best RD
// for a reference frame. Only search compound modes that have a reference
@@ -4770,10 +4721,10 @@ static int skip_inter_mode(AV1_COMP *cpi, MACROBLOCK *x, const BLOCK_SIZE bsize,
return 1;
}
- if (sf->inter_sf.prune_compound_using_neighbors && comp_pred) {
+ if (sf->inter_sf.prune_ext_comp_using_neighbors && comp_pred) {
if (compound_skip_using_neighbor_refs(
xd, this_mode, ref_frames,
- sf->inter_sf.prune_compound_using_neighbors))
+ sf->inter_sf.prune_ext_comp_using_neighbors))
return 1;
}
@@ -4851,8 +4802,9 @@ static void tx_search_best_inter_candidates(
: INT64_MAX;
*yrd = INT64_MAX;
int64_t best_rd_in_this_partition = INT64_MAX;
+ int num_inter_mode_cands = inter_modes_info->num;
// Iterate over best inter mode candidates and perform tx search
- for (int j = 0; j < inter_modes_info->num; ++j) {
+ for (int j = 0; j < num_inter_mode_cands; ++j) {
const int data_idx = inter_modes_info->rd_idx_pair_arr[j].idx;
*mbmi = inter_modes_info->mbmi_arr[data_idx];
int64_t curr_est_rd = inter_modes_info->est_rd_arr[data_idx];
@@ -4930,6 +4882,27 @@ static void tx_search_best_inter_candidates(
update_search_state(search_state, rd_cost, ctx, &rd_stats, &rd_stats_y,
&rd_stats_uv, mode_enum, x, txfm_search_done);
search_state->best_skip_rd[0] = skip_rd;
+ // Limit the total number of modes to be evaluated if the first is valid
+ // and transform skip or compound
+ if (cpi->sf.inter_sf.inter_mode_txfm_breakout) {
+ if (!j && (search_state->best_mbmode.skip_txfm || rd_stats.skip_txfm)) {
+ // Evaluate more candidates at high quantizers where occurrence of
+ // transform skip is high.
+ const int max_cands_cap[5] = { 2, 3, 5, 7, 9 };
+ const int qindex_band = (5 * x->qindex) >> QINDEX_BITS;
+ num_inter_mode_cands =
+ AOMMIN(max_cands_cap[qindex_band], inter_modes_info->num);
+ } else if (!j && has_second_ref(&search_state->best_mbmode)) {
+ const int aggr = cpi->sf.inter_sf.inter_mode_txfm_breakout - 1;
+ // Evaluate more candidates at low quantizers where occurrence of
+ // single reference mode is high.
+ const int max_cands_cap_cmp[2][4] = { { 10, 7, 5, 4 },
+ { 10, 7, 5, 3 } };
+ const int qindex_band_cmp = (4 * x->qindex) >> QINDEX_BITS;
+ num_inter_mode_cands = AOMMIN(
+ max_cands_cap_cmp[aggr][qindex_band_cmp], inter_modes_info->num);
+ }
+ }
}
}
}
@@ -5050,13 +5023,41 @@ static AOM_INLINE void search_intra_modes_in_interframe(
const int num_4x4 = bsize_to_num_blk(bsize);
// Performs luma search
- for (int j = 0; j < sf_args->intra_mode_num; ++j) {
+ int64_t best_model_rd = INT64_MAX;
+ int64_t top_intra_model_rd[TOP_INTRA_MODEL_COUNT];
+ for (int i = 0; i < TOP_INTRA_MODEL_COUNT; i++) {
+ top_intra_model_rd[i] = INT64_MAX;
+ }
+ for (int mode_idx = INTRA_MODE_START; mode_idx < LUMA_MODE_COUNT;
+ ++mode_idx) {
if (sf->intra_sf.skip_intra_in_interframe &&
search_state->intra_search_state.skip_intra_modes)
break;
- const THR_MODES mode_enum = sf_args->intra_mode_idx_ls[j];
- const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
- const PREDICTION_MODE this_mode = mode_def->mode;
+ set_y_mode_and_delta_angle(mode_idx, mbmi);
+
+ // Use intra_y_mode_mask speed feature to skip intra mode evaluation.
+ if (sf_args->mode_skip_mask->pred_modes[INTRA_FRAME] & (1 << mbmi->mode))
+ continue;
+
+ THR_MODES mode_enum = 0;
+ for (int i = 0; i < INTRA_MODE_END; ++i) {
+ if (mbmi->mode == av1_mode_defs[sf_args->intra_mode_idx_ls[i]].mode) {
+ mode_enum = sf_args->intra_mode_idx_ls[i];
+ break;
+ }
+ }
+ if ((!cpi->oxcf.intra_mode_cfg.enable_smooth_intra ||
+ cpi->sf.intra_sf.disable_smooth_intra) &&
+ (mbmi->mode == SMOOTH_PRED || mbmi->mode == SMOOTH_H_PRED ||
+ mbmi->mode == SMOOTH_V_PRED))
+ continue;
+ if (!cpi->oxcf.intra_mode_cfg.enable_paeth_intra &&
+ mbmi->mode == PAETH_PRED)
+ continue;
+ if (av1_is_directional_mode(mbmi->mode) &&
+ av1_use_angle_delta(bsize) == 0 && mbmi->angle_delta[PLANE_TYPE_Y] != 0)
+ continue;
+ const PREDICTION_MODE this_mode = mbmi->mode;
assert(av1_mode_defs[mode_enum].ref_frame[0] == INTRA_FRAME);
assert(av1_mode_defs[mode_enum].ref_frame[1] == NONE_FRAME);
@@ -5084,7 +5085,8 @@ static AOM_INLINE void search_intra_modes_in_interframe(
int64_t intra_rd_y = INT64_MAX;
const int is_luma_result_valid = av1_handle_intra_y_mode(
intra_search_state, cpi, x, bsize, intra_ref_frame_cost, ctx,
- &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y);
+ &intra_rd_stats_y, search_state->best_rd, &mode_cost_y, &intra_rd_y,
+ &best_model_rd, top_intra_model_rd);
if (is_luma_result_valid && intra_rd_y < yrd_threshold) {
is_best_y_mode_intra = 1;
if (intra_rd_y < best_rd_y) {
@@ -5147,12 +5149,6 @@ static AOM_INLINE void search_intra_modes_in_interframe(
intra_rd_stats_uv.rate +
intra_mode_info_cost_uv(cpi, x, mbmi, bsize, uv_mode_cost);
}
- if (mode != DC_PRED && mode != PAETH_PRED) {
- const int intra_cost_penalty = av1_get_intra_cost_penalty(
- cm->quant_params.base_qindex, cm->quant_params.y_dc_delta_q,
- cm->seq_params.bit_depth);
- intra_rd_stats.rate += intra_cost_penalty;
- }
// Intra block is always coded as non-skip
intra_rd_stats.skip_txfm = 0;
@@ -5189,6 +5185,84 @@ static AOM_INLINE void search_intra_modes_in_interframe(
}
}
+#if !CONFIG_REALTIME_ONLY
+// Prepare inter_cost and intra_cost from TPL stats, which are used as ML
+// features in intra mode pruning.
+static AOM_INLINE void calculate_cost_from_tpl_data(
+ const AV1_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row,
+ int mi_col, int64_t *inter_cost, int64_t *intra_cost) {
+ const AV1_COMMON *const cm = &cpi->common;
+ // Only consider full SB.
+ const BLOCK_SIZE sb_size = cm->seq_params->sb_size;
+ const int tpl_bsize_1d = cpi->ppi->tpl_data.tpl_bsize_1d;
+ const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
+ (block_size_high[sb_size] / tpl_bsize_1d);
+ SuperBlockEnc *sb_enc = &x->sb_enc;
+ if (sb_enc->tpl_data_count == len) {
+ const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
+ const int tpl_stride = sb_enc->tpl_stride;
+ const int tplw = mi_size_wide[tpl_bsize];
+ const int tplh = mi_size_high[tpl_bsize];
+ const int nw = mi_size_wide[bsize] / tplw;
+ const int nh = mi_size_high[bsize] / tplh;
+ if (nw >= 1 && nh >= 1) {
+ const int of_h = mi_row % mi_size_high[sb_size];
+ const int of_w = mi_col % mi_size_wide[sb_size];
+ const int start = of_h / tplh * tpl_stride + of_w / tplw;
+
+ for (int k = 0; k < nh; k++) {
+ for (int l = 0; l < nw; l++) {
+ *inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
+ *intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
+ }
+ }
+ *inter_cost /= nw * nh;
+ *intra_cost /= nw * nh;
+ }
+ }
+}
+#endif // !CONFIG_REALTIME_ONLY
+
+// When the speed feature skip_intra_in_interframe > 0, enable ML model to prune
+// intra mode search.
+static AOM_INLINE void skip_intra_modes_in_interframe(
+ AV1_COMMON *const cm, struct macroblock *x, BLOCK_SIZE bsize,
+ InterModeSearchState *search_state, int64_t inter_cost, int64_t intra_cost,
+ int skip_intra_in_interframe) {
+ MACROBLOCKD *const xd = &x->e_mbd;
+ if (inter_cost >= 0 && intra_cost >= 0) {
+ aom_clear_system_state();
+ const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
+ ? &av1_intrap_nn_config
+ : &av1_intrap_hd_nn_config;
+ float nn_features[6];
+ float scores[2] = { 0.0f };
+
+ nn_features[0] = (float)search_state->best_mbmode.skip_txfm;
+ nn_features[1] = (float)mi_size_wide_log2[bsize];
+ nn_features[2] = (float)mi_size_high_log2[bsize];
+ nn_features[3] = (float)intra_cost;
+ nn_features[4] = (float)inter_cost;
+ const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
+ const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
+ nn_features[5] = (float)(ac_q_max / ac_q);
+
+ av1_nn_predict(nn_features, nn_config, 1, scores);
+ aom_clear_system_state();
+
+ // For two parameters, the max prob returned from av1_nn_softmax equals
+ // 1.0 / (1.0 + e^(-|diff_score|)). Here use scores directly to avoid the
+ // calling of av1_nn_softmax.
+ const float thresh[2] = { 1.4f, 1.4f };
+ if (scores[1] > scores[0] + thresh[skip_intra_in_interframe - 1]) {
+ search_state->intra_search_state.skip_intra_modes = 1;
+ }
+ } else if ((search_state->best_mbmode.skip_txfm) &&
+ (skip_intra_in_interframe >= 2)) {
+ search_state->intra_search_state.skip_intra_modes = 1;
+ }
+}
+
// TODO(chiyotsai@google.com): See the todo for av1_rd_pick_intra_mode_sb.
void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
struct macroblock *x, struct RD_STATS *rd_cost,
@@ -5231,6 +5305,7 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
-1,
-1,
-1,
+ { 0 },
{ 0 } };
for (i = 0; i < MODE_CTX_REF_FRAMES; ++i) args.cmp_mode[i] = -1;
// Indicates the appropriate number of simple translation winner modes for
@@ -5265,10 +5340,13 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
mbmi->partition != PARTITION_HORZ) ||
cpi->sf.inter_sf.prune_ref_frame_for_rect_partitions >= 2) {
picked_ref_frames_mask =
- fetch_picked_ref_frames_mask(x, bsize, cm->seq_params.mib_size);
+ fetch_picked_ref_frames_mask(x, bsize, cm->seq_params->mib_size);
}
}
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, set_params_rd_pick_inter_mode_time);
+#endif
// Skip ref frames that never selected by square blocks.
const int skip_ref_frame_mask =
picked_ref_frames_mask ? ~picked_ref_frames_mask : 0;
@@ -5280,6 +5358,9 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
set_params_rd_pick_inter_mode(cpi, x, &args, bsize, &mode_skip_mask,
skip_ref_frame_mask, ref_costs_single,
ref_costs_comp, yv12_mb);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, set_params_rd_pick_inter_mode_time);
+#endif
int64_t best_est_rd = INT64_MAX;
const InterModeRdModel *md = &tile_data->inter_mode_rd_models[bsize];
@@ -5292,6 +5373,10 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
cpi->sf.rt_sf.force_tx_search_off);
InterModesInfo *inter_modes_info = x->inter_modes_info;
inter_modes_info->num = 0;
+ int intra_mode_idx_ls[INTRA_MODES];
+ for (i = 0; i < INTRA_MODES; ++i) {
+ intra_mode_idx_ls[i] = i + THR_DC;
+ }
// Temporary buffers used by handle_inter_mode().
uint8_t *const tmp_buf = get_buf_by_bd(xd, x->tmp_pred_bufs[0]);
@@ -5337,40 +5422,13 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
const int do_pruning =
(AOMMIN(cm->width, cm->height) > 480 && cpi->speed <= 1) ? 0 : 1;
if (do_pruning && sf->intra_sf.skip_intra_in_interframe &&
- cpi->oxcf.algo_cfg.enable_tpl_model) {
- // Only consider full SB.
- const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
- const int tpl_bsize_1d = cpi->tpl_data.tpl_bsize_1d;
- const int len = (block_size_wide[sb_size] / tpl_bsize_1d) *
- (block_size_high[sb_size] / tpl_bsize_1d);
- SuperBlockEnc *sb_enc = &x->sb_enc;
- if (sb_enc->tpl_data_count == len) {
- const BLOCK_SIZE tpl_bsize = convert_length_to_bsize(tpl_bsize_1d);
- const int tpl_stride = sb_enc->tpl_stride;
- const int tplw = mi_size_wide[tpl_bsize];
- const int tplh = mi_size_high[tpl_bsize];
- const int nw = mi_size_wide[bsize] / tplw;
- const int nh = mi_size_high[bsize] / tplh;
- if (nw >= 1 && nh >= 1) {
- const int of_h = mi_row % mi_size_high[sb_size];
- const int of_w = mi_col % mi_size_wide[sb_size];
- const int start = of_h / tplh * tpl_stride + of_w / tplw;
-
- for (int k = 0; k < nh; k++) {
- for (int l = 0; l < nw; l++) {
- inter_cost += sb_enc->tpl_inter_cost[start + k * tpl_stride + l];
- intra_cost += sb_enc->tpl_intra_cost[start + k * tpl_stride + l];
- }
- }
- inter_cost /= nw * nh;
- intra_cost /= nw * nh;
- }
- }
- }
+ cpi->oxcf.algo_cfg.enable_tpl_model)
+ calculate_cost_from_tpl_data(cpi, x, bsize, mi_row, mi_col, &inter_cost,
+ &intra_cost);
#endif // !CONFIG_REALTIME_ONLY
// Initialize best mode stats for winner mode processing
- av1_zero(x->winner_mode_stats);
+ av1_zero_array(x->winner_mode_stats, MAX_WINNER_MODE_COUNT_INTER);
x->winner_mode_count = 0;
store_winner_mode_stats(&cpi->common, x, mbmi, NULL, NULL, NULL, THR_INVALID,
NULL, bsize, best_rd_so_far,
@@ -5389,20 +5447,20 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
skip_ref_frame_mask,
0,
mode_thresh_mul_fact,
- { 0 },
- 0,
+ intra_mode_idx_ls,
0,
0 };
int64_t best_inter_yrd = INT64_MAX;
- // This is the main loop of this function. It loops over all possible modes
- // and calls handle_inter_mode() to compute the RD for each.
+ // This is the main loop of this function. It loops over all possible inter
+ // modes and calls handle_inter_mode() to compute the RD for each.
// Here midx is just an iterator index that should not be used by itself
// except to keep track of the number of modes searched. It should be used
// with av1_default_mode_order to get the enum that defines the mode, which
// can be used with av1_mode_defs to get the prediction mode and the ref
// frames.
- for (THR_MODES midx = THR_MODE_START; midx < THR_MODE_END; ++midx) {
+ for (THR_MODES midx = THR_INTER_MODE_START; midx < THR_INTER_MODE_END;
+ ++midx) {
// Get the actual prediction mode we are trying in this iteration
const THR_MODES mode_enum = av1_default_mode_order[midx];
const MODE_DEFINITION *mode_def = &av1_mode_defs[mode_enum];
@@ -5420,9 +5478,16 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
txfm_info->skip_txfm = 0;
sf_args.num_single_modes_processed += is_single_pred;
set_ref_ptrs(cm, xd, ref_frame, second_ref_frame);
-
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, skip_inter_mode_time);
+#endif
// Apply speed features to decide if this inter mode can be skipped
- if (skip_inter_mode(cpi, x, bsize, ref_frame_rd, midx, &sf_args)) continue;
+ const int is_skip_inter_mode =
+ skip_inter_mode(cpi, x, bsize, ref_frame_rd, midx, &sf_args);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, skip_inter_mode_time);
+#endif
+ if (is_skip_inter_mode) continue;
// Select prediction reference frames.
for (i = 0; i < num_planes; i++) {
@@ -5549,36 +5614,11 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
// Gate intra mode evaluation if best of inter is skip except when source
// variance is extremely low
const unsigned int src_var_thresh_intra_skip = 1;
- if (sf->intra_sf.skip_intra_in_interframe &&
- (x->source_variance > src_var_thresh_intra_skip)) {
- if (inter_cost >= 0 && intra_cost >= 0) {
- aom_clear_system_state();
- const NN_CONFIG *nn_config = (AOMMIN(cm->width, cm->height) <= 480)
- ? &av1_intrap_nn_config
- : &av1_intrap_hd_nn_config;
- float nn_features[6];
- float scores[2] = { 0.0f };
- float probs[2] = { 0.0f };
-
- nn_features[0] = (float)search_state.best_mbmode.skip_txfm;
- nn_features[1] = (float)mi_size_wide_log2[bsize];
- nn_features[2] = (float)mi_size_high_log2[bsize];
- nn_features[3] = (float)intra_cost;
- nn_features[4] = (float)inter_cost;
- const int ac_q = av1_ac_quant_QTX(x->qindex, 0, xd->bd);
- const int ac_q_max = av1_ac_quant_QTX(255, 0, xd->bd);
- nn_features[5] = (float)(ac_q_max / ac_q);
-
- av1_nn_predict(nn_features, nn_config, 1, scores);
- aom_clear_system_state();
- av1_nn_softmax(scores, probs, 2);
-
- if (probs[1] > 0.8) search_state.intra_search_state.skip_intra_modes = 1;
- } else if ((search_state.best_mbmode.skip_txfm) &&
- (sf->intra_sf.skip_intra_in_interframe >= 2)) {
- search_state.intra_search_state.skip_intra_modes = 1;
- }
- }
+ const int skip_intra_in_interframe = sf->intra_sf.skip_intra_in_interframe;
+ if (skip_intra_in_interframe &&
+ (x->source_variance > src_var_thresh_intra_skip))
+ skip_intra_modes_in_interframe(cm, x, bsize, &search_state, inter_cost,
+ intra_cost, skip_intra_in_interframe);
const unsigned int intra_ref_frame_cost = ref_costs_single[INTRA_FRAME];
search_intra_modes_in_interframe(&search_state, cpi, x, rd_cost, bsize, ctx,
@@ -5588,6 +5628,9 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
end_timing(cpi, handle_intra_mode_time);
#endif
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ start_timing(cpi, refine_winner_mode_tx_time);
+#endif
int winner_mode_count =
cpi->sf.winner_mode_sf.multi_winner_mode_type ? x->winner_mode_count : 1;
// In effect only when fast tx search speed features are enabled.
@@ -5595,6 +5638,9 @@ void av1_rd_pick_inter_mode(struct AV1_COMP *cpi, struct TileDataEnc *tile_data,
cpi, x, rd_cost, bsize, ctx, &search_state.best_mode_index,
&search_state.best_mbmode, yv12_mb, search_state.best_rate_y,
search_state.best_rate_uv, &search_state.best_skip2, winner_mode_count);
+#if CONFIG_COLLECT_COMPONENT_TIMING
+ end_timing(cpi, refine_winner_mode_tx_time);
+#endif
// Initialize default mode evaluation params
set_mode_eval_params(cpi, x, DEFAULT_EVAL);
@@ -5803,7 +5849,7 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
for (i = 0; i < SWITCHABLE_FILTERS; ++i) {
mbmi->interp_filters = av1_broadcast_interp_filter(i);
rs = av1_get_switchable_rate(x, xd, interp_filter,
- cm->seq_params.enable_dual_filter);
+ cm->seq_params->enable_dual_filter);
if (rs < best_rs) {
best_rs = rs;
best_filter = mbmi->interp_filters.as_filters.y_filter;
@@ -5814,7 +5860,7 @@ void av1_rd_pick_inter_mode_sb_seg_skip(const AV1_COMP *cpi,
// Set the appropriate filter
mbmi->interp_filters = av1_broadcast_interp_filter(best_filter);
rate2 += av1_get_switchable_rate(x, xd, interp_filter,
- cm->seq_params.enable_dual_filter);
+ cm->seq_params->enable_dual_filter);
if (cm->current_frame.reference_mode == REFERENCE_MODE_SELECT)
rate2 += comp_inter_cost[comp_pred];
diff --git a/third_party/libaom/source/libaom/av1/encoder/rdopt.h b/third_party/libaom/source/libaom/av1/encoder/rdopt.h
index 362da7b798..055a49e9f1 100644
--- a/third_party/libaom/source/libaom/av1/encoder/rdopt.h
+++ b/third_party/libaom/source/libaom/av1/encoder/rdopt.h
@@ -217,10 +217,10 @@ static INLINE int av1_encoder_get_relative_dist(int a, int b) {
static INLINE int av1_get_sb_mi_size(const AV1_COMMON *const cm) {
const int mi_alloc_size_1d = mi_size_wide[cm->mi_params.mi_alloc_bsize];
int sb_mi_rows =
- (mi_size_wide[cm->seq_params.sb_size] + mi_alloc_size_1d - 1) /
+ (mi_size_wide[cm->seq_params->sb_size] + mi_alloc_size_1d - 1) /
mi_alloc_size_1d;
- assert(mi_size_wide[cm->seq_params.sb_size] ==
- mi_size_high[cm->seq_params.sb_size]);
+ assert(mi_size_wide[cm->seq_params->sb_size] ==
+ mi_size_high[cm->seq_params->sb_size]);
int sb_mi_size = sb_mi_rows * sb_mi_rows;
return sb_mi_size;
diff --git a/third_party/libaom/source/libaom/av1/encoder/rdopt_utils.h b/third_party/libaom/source/libaom/av1/encoder/rdopt_utils.h
index ddd180f7ed..f00037992e 100644
--- a/third_party/libaom/source/libaom/av1/encoder/rdopt_utils.h
+++ b/third_party/libaom/source/libaom/av1/encoder/rdopt_utils.h
@@ -433,8 +433,10 @@ static INLINE void set_tx_type_prune(const SPEED_FEATURES *sf,
txfm_params->prune_2d_txfm_mode = sf->tx_sf.tx_type_search.prune_2d_txfm_mode;
if (!winner_mode_tx_type_pruning) return;
- const int prune_mode[2][2] = { { TX_TYPE_PRUNE_4, TX_TYPE_PRUNE_0 },
- { TX_TYPE_PRUNE_5, TX_TYPE_PRUNE_2 } };
+ const int prune_mode[4][2] = { { TX_TYPE_PRUNE_3, TX_TYPE_PRUNE_0 },
+ { TX_TYPE_PRUNE_4, TX_TYPE_PRUNE_0 },
+ { TX_TYPE_PRUNE_5, TX_TYPE_PRUNE_2 },
+ { TX_TYPE_PRUNE_5, TX_TYPE_PRUNE_3 } };
txfm_params->prune_2d_txfm_mode =
prune_mode[winner_mode_tx_type_pruning - 1][is_winner_mode];
}
@@ -569,7 +571,7 @@ static INLINE CFL_ALLOWED_TYPE store_cfl_required_rdo(const AV1_COMMON *cm,
const MACROBLOCK *x) {
const MACROBLOCKD *xd = &x->e_mbd;
- if (cm->seq_params.monochrome || !xd->is_chroma_ref) return CFL_DISALLOWED;
+ if (cm->seq_params->monochrome || !xd->is_chroma_ref) return CFL_DISALLOWED;
if (!xd->is_chroma_ref) {
// For non-chroma-reference blocks, we should always store the luma pixels,
diff --git a/third_party/libaom/source/libaom/av1/encoder/segmentation.c b/third_party/libaom/source/libaom/av1/encoder/segmentation.c
index de17d571ff..edb6ef67fa 100644
--- a/third_party/libaom/source/libaom/av1/encoder/segmentation.c
+++ b/third_party/libaom/source/libaom/av1/encoder/segmentation.c
@@ -175,6 +175,14 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) {
int no_pred_cost;
int t_pred_cost = INT_MAX;
int tile_col, tile_row, mi_row, mi_col;
+
+ if (!seg->update_map) return;
+ if (cm->features.primary_ref_frame == PRIMARY_REF_NONE) {
+ seg->temporal_update = 0;
+ assert(seg->update_data == 1);
+ return;
+ }
+
unsigned temporal_predictor_count[SEG_TEMPORAL_PRED_CTXS][2] = { { 0 } };
unsigned no_pred_segcounts[MAX_SEGMENTS] = { 0 };
unsigned t_unpred_seg_counts[MAX_SEGMENTS] = { 0 };
@@ -194,15 +202,15 @@ void av1_choose_segmap_coding_method(AV1_COMMON *cm, MACROBLOCKD *xd) {
tile_info.mi_row_start * cm->mi_params.mi_stride +
tile_info.mi_col_start;
for (mi_row = tile_info.mi_row_start; mi_row < tile_info.mi_row_end;
- mi_row += cm->seq_params.mib_size,
- mi_ptr += cm->seq_params.mib_size * cm->mi_params.mi_stride) {
+ mi_row += cm->seq_params->mib_size,
+ mi_ptr += cm->seq_params->mib_size * cm->mi_params.mi_stride) {
MB_MODE_INFO **mi = mi_ptr;
for (mi_col = tile_info.mi_col_start; mi_col < tile_info.mi_col_end;
- mi_col += cm->seq_params.mib_size,
- mi += cm->seq_params.mib_size) {
+ mi_col += cm->seq_params->mib_size,
+ mi += cm->seq_params->mib_size) {
count_segs_sb(cm, xd, &tile_info, mi, no_pred_segcounts,
temporal_predictor_count, t_unpred_seg_counts, mi_row,
- mi_col, cm->seq_params.sb_size);
+ mi_col, cm->seq_params->sb_size);
}
}
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/sparse_linear_solver.c b/third_party/libaom/source/libaom/av1/encoder/sparse_linear_solver.c
index 1c556c2a09..dbfcaabbd6 100644
--- a/third_party/libaom/source/libaom/av1/encoder/sparse_linear_solver.c
+++ b/third_party/libaom/source/libaom/av1/encoder/sparse_linear_solver.c
@@ -8,7 +8,6 @@
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#include <float.h>
#include "av1/common/av1_common_int.h"
#include "av1/encoder/sparse_linear_solver.h"
#include "config/aom_config.h"
@@ -408,4 +407,4 @@ void av1_steepest_descent_sparse(const SPARSE_MTX *A, const double *b, int bl,
aom_free(Ad);
}
-#endif // CONFIG_OPFL
+#endif // CONFIG_OPTICAL_FLOW_API
diff --git a/third_party/libaom/source/libaom/av1/encoder/sparse_linear_solver.h b/third_party/libaom/source/libaom/av1/encoder/sparse_linear_solver.h
index 3cacb51b93..a3f2f7b964 100644
--- a/third_party/libaom/source/libaom/av1/encoder/sparse_linear_solver.h
+++ b/third_party/libaom/source/libaom/av1/encoder/sparse_linear_solver.h
@@ -9,8 +9,8 @@
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
-#ifndef AV1_COMMON_SPARSE_LINEAR_SOLVER_H_
-#define AV1_COMMON_SPARSE_LINEAR_SOLVER_H_
+#ifndef AOM_AV1_ENCODER_SPARSE_LINEAR_SOLVER_H_
+#define AOM_AV1_ENCODER_SPARSE_LINEAR_SOLVER_H_
#ifdef __cplusplus
extern "C" {
@@ -64,4 +64,4 @@ void av1_steepest_descent_sparse(const SPARSE_MTX *A, const double *b, int bl,
} // extern "C"
#endif
-#endif /* AV1_COMMON_SPARSE_LINEAR_SOLVER_H_ */
+#endif /* AOM_AV1_ENCODER_SPARSE_LINEAR_SOLVER_H_ */
diff --git a/third_party/libaom/source/libaom/av1/encoder/speed_features.c b/third_party/libaom/source/libaom/av1/encoder/speed_features.c
index 2244aaae91..916a818513 100644
--- a/third_party/libaom/source/libaom/av1/encoder/speed_features.c
+++ b/third_party/libaom/source/libaom/av1/encoder/speed_features.c
@@ -274,6 +274,20 @@ static void set_allintra_speed_feature_framesize_dependent(
sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16;
}
+
+ if (speed >= 7) {
+ if (!is_480p_or_larger) {
+ sf->rt_sf.nonrd_check_partition_merge_mode = 2;
+ }
+ }
+
+ if (speed >= 8) {
+ // TODO(kyslov): add more speed features to control speed/quality
+ }
+
+ if (speed >= 9) {
+ // TODO(kyslov): add more speed features to control speed/quality
+ }
}
static void set_allintra_speed_features_framesize_independent(
@@ -289,8 +303,11 @@ static void set_allintra_speed_features_framesize_independent(
sf->part_sf.prune_part4_search = 2;
sf->part_sf.simple_motion_search_prune_rect = 1;
sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3;
+ sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
+ sf->part_sf.use_best_rd_for_pruning = 1;
sf->intra_sf.intra_pruning_with_hog = 1;
+ sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF;
sf->tx_sf.adaptive_txb_search_level = 1;
sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1;
@@ -300,7 +317,7 @@ static void set_allintra_speed_features_framesize_independent(
sf->rt_sf.use_nonrd_pick_mode = 0;
sf->rt_sf.use_real_time_ref_set = 0;
- if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
+ if (cpi->ppi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
cpi->use_screen_content_tools) {
sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
} else {
@@ -318,10 +335,12 @@ static void set_allintra_speed_features_framesize_independent(
// speed feature accordingly
sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2;
sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3;
+ sf->part_sf.reuse_best_prediction_for_part_ab = 1;
sf->mv_sf.exhaustive_searches_thresh <<= 1;
sf->intra_sf.prune_palette_search_level = 1;
+ sf->intra_sf.top_intra_model_count_allowed = 3;
sf->tx_sf.adaptive_txb_search_level = 2;
sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
@@ -348,6 +367,7 @@ static void set_allintra_speed_features_framesize_independent(
sf->intra_sf.disable_smooth_intra = 1;
sf->intra_sf.intra_pruning_with_hog = 2;
+ sf->intra_sf.prune_filter_intra_level = 1;
sf->rd_sf.perform_coeff_opt = 3;
@@ -397,9 +417,6 @@ static void set_allintra_speed_features_framesize_independent(
sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
- sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
- sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
sf->intra_sf.prune_chroma_modes_using_luma_winner = 1;
sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL;
@@ -408,7 +425,7 @@ static void set_allintra_speed_features_framesize_independent(
sf->tpl_sf.subpel_force_stop = HALF_PEL;
sf->tpl_sf.search_method = FAST_BIGDIA;
- sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1;
+ sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 1;
@@ -443,9 +460,10 @@ static void set_allintra_speed_features_framesize_independent(
}
if (speed >= 6) {
- sf->intra_sf.disable_filter_intra = 1;
+ sf->intra_sf.prune_filter_intra_level = 2;
sf->intra_sf.chroma_intra_pruning_with_hog = 4;
sf->intra_sf.intra_pruning_with_hog = 4;
+ sf->intra_sf.cfl_search_range = 1;
sf->part_sf.prune_rectangular_split_based_on_qidx =
allow_screen_content_tools ? 0 : 1;
@@ -458,7 +476,7 @@ static void set_allintra_speed_features_framesize_independent(
sf->mv_sf.use_bsize_dependent_search_method = 1;
- sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
+ sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3;
sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0;
// Use largest txfm block size for square coding blocks.
sf->tx_sf.intra_tx_size_search_init_depth_sqr = 2;
@@ -466,10 +484,39 @@ static void set_allintra_speed_features_framesize_independent(
sf->rd_sf.perform_coeff_opt = 6;
sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4;
+ sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
}
+ if (speed >= 7) {
+ sf->part_sf.default_min_partition_size = BLOCK_8X8;
+ sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
+
+ sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
+
+ sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
+ sf->rt_sf.use_nonrd_pick_mode = 1;
+ sf->rt_sf.nonrd_check_partition_merge_mode = 1;
+ sf->rt_sf.nonrd_check_partition_split = 0;
+ sf->rt_sf.skip_intra_pred_if_tx_skip = 1;
+ // Set mask for intra modes.
+ for (int i = 0; i < BLOCK_SIZES; ++i)
+ if (i >= BLOCK_32X32)
+ sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC;
+ else
+ // Use DC, H, V intra mode for block sizes < 32X32.
+ sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
+ }
+
+ if (speed >= 8) {
+ // TODO(kyslov): add more speed features to control speed/quality
+ }
+
+ if (speed >= 9) {
+ // TODO(kyslov): add more speed features to control speed/quality
+ }
+
// Intra txb hash is currently not compatible with multi-winner mode as the
// hashes got reset during multi-winner mode processing.
assert(IMPLIES(
@@ -480,6 +527,7 @@ static void set_allintra_speed_features_framesize_independent(
static void set_good_speed_feature_framesize_dependent(
const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
const AV1_COMMON *const cm = &cpi->common;
+ const int is_480p_or_lesser = AOMMIN(cm->width, cm->height) <= 480;
const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
@@ -518,7 +566,16 @@ static void set_good_speed_feature_framesize_dependent(
sf->mv_sf.use_downsampled_sad = 1;
}
+ if (!is_720p_or_larger) {
+ const RateControlCfg *const rc_cfg = &cpi->oxcf.rc_cfg;
+ const int rate_tolerance =
+ AOMMIN(rc_cfg->under_shoot_pct, rc_cfg->over_shoot_pct);
+ sf->hl_sf.recode_tolerance = 25 + (rate_tolerance >> 2);
+ }
+
if (speed >= 1) {
+ if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 1;
+
if (is_720p_or_larger) {
sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128;
} else if (is_480p_or_larger) {
@@ -561,6 +618,12 @@ static void set_good_speed_feature_framesize_dependent(
}
if (is_480p_or_larger) {
+ sf->inter_sf.disable_interintra_wedge_var_thresh = 100;
+ } else {
+ sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
+ }
+
+ if (is_480p_or_larger) {
sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1;
if (use_hbd) sf->tx_sf.prune_tx_size_level = 2;
} else {
@@ -573,6 +636,8 @@ static void set_good_speed_feature_framesize_dependent(
}
if (speed >= 3) {
+ sf->inter_sf.skip_newmv_in_drl = 2;
+
sf->part_sf.ml_early_term_after_part_split_level = 0;
if (is_720p_or_larger) {
@@ -584,6 +649,10 @@ static void set_good_speed_feature_framesize_dependent(
sf->part_sf.partition_search_breakout_rate_thr = 120;
}
if (use_hbd) sf->tx_sf.prune_tx_size_level = 3;
+
+ if (is_480p_or_larger) sf->intra_sf.top_intra_model_count_allowed = 2;
+
+ sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
}
if (speed >= 4) {
@@ -598,11 +667,14 @@ static void set_good_speed_feature_framesize_dependent(
}
sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
+ if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 3;
if (is_720p_or_larger)
sf->hl_sf.recode_tolerance = 32;
else
sf->hl_sf.recode_tolerance = 55;
+
+ sf->intra_sf.top_intra_model_count_allowed = 2;
}
if (speed >= 5) {
@@ -612,6 +684,8 @@ static void set_good_speed_feature_framesize_dependent(
sf->inter_sf.prune_warped_prob_thresh = 8;
}
if (is_720p_or_larger) sf->hl_sf.recode_tolerance = 40;
+
+ sf->inter_sf.skip_newmv_in_drl = 4;
}
if (speed >= 6) {
@@ -630,7 +704,9 @@ static void set_good_speed_feature_framesize_dependent(
}
if (!is_720p_or_larger) {
- sf->inter_sf.mv_cost_upd_level = 2;
+ sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW_SET;
+ sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_SBROW;
+ sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_SBROW;
}
if (is_720p_or_larger) {
@@ -650,10 +726,10 @@ static void set_good_speed_feature_framesize_dependent(
static void set_good_speed_features_framesize_independent(
const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
const AV1_COMMON *const cm = &cpi->common;
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const int boosted = frame_is_boosted(cpi);
const int is_boosted_arf2_bwd_type =
- boosted || gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE;
+ boosted || gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
const int allow_screen_content_tools =
cm->features.allow_screen_content_tools;
const int use_hbd = cpi->oxcf.use_highbitdepth;
@@ -670,6 +746,8 @@ static void set_good_speed_features_framesize_independent(
sf->part_sf.prune_part4_search = 2;
sf->part_sf.simple_motion_search_prune_rect = 1;
sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3;
+ sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
+ sf->part_sf.use_best_rd_for_pruning = 1;
// TODO(debargha): Test, tweak and turn on either 1 or 2
sf->inter_sf.inter_mode_rd_model_estimation = 1;
@@ -698,7 +776,7 @@ static void set_good_speed_features_framesize_independent(
sf->rt_sf.use_nonrd_pick_mode = 0;
sf->rt_sf.use_real_time_ref_set = 0;
- if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
+ if (cpi->ppi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
cpi->use_screen_content_tools) {
sf->mv_sf.exhaustive_searches_thresh = (1 << 20);
} else {
@@ -725,7 +803,6 @@ static void set_good_speed_features_framesize_independent(
sf->mv_sf.use_accurate_subpel_search = USE_4_TAPS;
sf->mv_sf.disable_extensive_joint_motion_search = 1;
- sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
sf->inter_sf.prune_comp_search_by_single_result = boosted ? 2 : 1;
sf->inter_sf.prune_comp_type_by_comp_avg = 1;
sf->inter_sf.prune_comp_type_by_model_rd = boosted ? 0 : 1;
@@ -736,7 +813,6 @@ static void set_good_speed_features_framesize_independent(
sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3;
sf->inter_sf.reuse_inter_intra_mode = 1;
sf->inter_sf.selective_ref_frame = 2;
- sf->inter_sf.skip_repeated_newmv = 1;
sf->interp_sf.use_interp_filter = 1;
@@ -766,7 +842,11 @@ static void set_good_speed_features_framesize_independent(
if (speed >= 2) {
sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
+ sf->fp_sf.skip_motion_search_threshold = 25;
+
sf->part_sf.allow_partition_search_skip = 1;
+ sf->part_sf.reuse_best_prediction_for_part_ab =
+ !frame_is_intra_only(&cpi->common);
sf->mv_sf.auto_mv_step_size = 1;
sf->mv_sf.simple_motion_subpel_force_stop = QUARTER_PEL;
@@ -778,20 +858,21 @@ static void set_good_speed_features_framesize_independent(
// bit more closely to figure out why.
sf->inter_sf.adaptive_rd_thresh = 1;
sf->inter_sf.comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
- sf->inter_sf.disable_interintra_wedge_var_thresh = 100;
sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
sf->inter_sf.fast_interintra_wedge_search = 1;
sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 1;
- sf->inter_sf.prune_compound_using_neighbors = 1;
+ sf->inter_sf.prune_ext_comp_using_neighbors = 1;
sf->inter_sf.prune_comp_using_best_single_mode_ref = 2;
sf->inter_sf.prune_comp_type_by_comp_avg = 2;
- sf->inter_sf.reuse_best_prediction_for_part_ab = 1;
sf->inter_sf.selective_ref_frame = 3;
sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
// Enable fast search only for COMPOUND_DIFFWTD type.
sf->inter_sf.enable_fast_compound_mode_search = 1;
sf->inter_sf.reuse_mask_search_results = 1;
sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 1;
+ sf->inter_sf.disable_interinter_wedge_newmv_search =
+ is_boosted_arf2_bwd_type ? 0 : 1;
+ sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 1;
// TODO(Sachin): Enable/Enhance this speed feature for speed 2 & 3
sf->interp_sf.adaptive_interp_filter_search = 1;
@@ -831,7 +912,8 @@ static void set_good_speed_features_framesize_independent(
sf->mv_sf.search_method = DIAMOND;
sf->mv_sf.disable_second_mv = 2;
- sf->inter_sf.mv_cost_upd_level = 1;
+ sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
+ sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
sf->inter_sf.disable_onesided_comp = 1;
// TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
// it with cpi->sf.disable_wedge_search_var_thresh.
@@ -843,10 +925,11 @@ static void set_good_speed_features_framesize_independent(
sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 2;
sf->inter_sf.selective_ref_frame = 5;
sf->inter_sf.skip_repeated_ref_mv = 1;
- sf->inter_sf.skip_repeated_full_newmv = 1;
sf->inter_sf.reuse_compound_type_decision = 1;
sf->inter_sf.txfm_rd_gate_level =
boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2);
+ sf->inter_sf.enable_fast_wedge_mask_search = 1;
+ sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 2;
sf->interp_sf.adaptive_interp_filter_search = 2;
@@ -865,6 +948,8 @@ static void set_good_speed_features_framesize_independent(
sf->tx_sf.adaptive_txb_search_level = boosted ? 2 : 3;
sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2;
sf->tx_sf.use_intra_txb_hash = 1;
+ sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
+ sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1;
// TODO(any): Refactor the code related to following winner mode speed
// features
@@ -874,10 +959,10 @@ static void set_good_speed_features_framesize_independent(
frame_is_intra_only(&cpi->common) ? 0 : 1;
sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1;
sf->winner_mode_sf.motion_mode_for_winner_cand =
- boosted
- ? 0
- : gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE ? 1
- : 2;
+ boosted ? 0
+ : gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE
+ ? 1
+ : 2;
// TODO(any): evaluate if these lpf features can be moved to speed 2.
// For screen content, "prune_sgr_based_on_wiener = 2" cause large quality
@@ -889,6 +974,8 @@ static void set_good_speed_features_framesize_independent(
}
if (speed >= 4) {
+ sf->gm_sf.prune_zero_mv_with_sse = 1;
+
sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
sf->part_sf.simple_motion_search_prune_agg = 2;
@@ -901,7 +988,7 @@ static void set_good_speed_features_framesize_independent(
sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 3;
sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 2;
- sf->inter_sf.prune_compound_using_neighbors = 2;
+ sf->inter_sf.prune_ext_comp_using_neighbors = 2;
sf->inter_sf.prune_obmc_prob_thresh = INT_MAX;
sf->interp_sf.cb_pred_filter_search = 1;
@@ -911,9 +998,10 @@ static void set_good_speed_features_framesize_independent(
sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
- sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
- sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
- sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
+ // TODO(any): "intra_y_mode_mask" doesn't help much at speed 4.
+ // sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
+ // sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
+ // sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
// TODO(any): Experiment with this speed feature set to 2 for higher quality
// presets as well
sf->intra_sf.skip_intra_in_interframe = 2;
@@ -923,10 +1011,10 @@ static void set_good_speed_features_framesize_independent(
sf->tpl_sf.prune_starting_mv = 2;
sf->tpl_sf.subpel_force_stop = HALF_PEL;
sf->tpl_sf.search_method = FAST_BIGDIA;
+ sf->tpl_sf.gop_length_decision_method = 1;
- sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1;
+ sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1;
- sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3;
sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 1;
// TODO(any): Experiment with enabling of this speed feature as hash state
// is reset during winner mode processing
@@ -948,9 +1036,14 @@ static void set_good_speed_features_framesize_independent(
}
if (speed >= 5) {
+ sf->fp_sf.reduce_mv_step_param = 4;
+
sf->part_sf.simple_motion_search_prune_agg = 3;
sf->part_sf.ext_partition_eval_thresh =
allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16;
+ sf->part_sf.prune_sub_8x8_partition_level =
+ (allow_screen_content_tools || frame_is_intra_only(&cpi->common)) ? 0
+ : 2;
sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX;
sf->inter_sf.prune_inter_modes_if_skippable = 1;
@@ -974,8 +1067,11 @@ static void set_good_speed_features_framesize_independent(
sf->tpl_sf.prune_starting_mv = 3;
sf->tpl_sf.use_y_only_rate_distortion = 1;
sf->tpl_sf.subpel_force_stop = FULL_PEL;
+ sf->tpl_sf.gop_length_decision_method = 2;
sf->winner_mode_sf.dc_blk_pred_level = 1;
+
+ sf->fp_sf.disable_recon = 1;
}
if (speed >= 6) {
@@ -986,9 +1082,14 @@ static void set_good_speed_features_framesize_independent(
sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 3;
sf->inter_sf.prune_nearmv_using_neighbors = 1;
sf->inter_sf.selective_ref_frame = 6;
+ sf->inter_sf.prune_ext_comp_using_neighbors = 3;
sf->intra_sf.chroma_intra_pruning_with_hog = 4;
sf->intra_sf.intra_pruning_with_hog = 4;
+ sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC;
+ sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC;
+ sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC;
+ sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC;
sf->part_sf.prune_rectangular_split_based_on_qidx =
boosted || allow_screen_content_tools ? 0 : 1;
@@ -1000,10 +1101,10 @@ static void set_good_speed_features_framesize_independent(
sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL;
sf->mv_sf.use_bsize_dependent_search_method = 1;
- sf->tpl_sf.disable_gop_length_decision = 1;
+ sf->tpl_sf.gop_length_decision_method = 3;
sf->tpl_sf.disable_filtered_key_tpl = 1;
- sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2;
+ sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4;
sf->tx_sf.use_intra_txb_hash = 1;
sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0;
@@ -1052,10 +1153,13 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi,
#endif
}
} else {
- if (speed == 8 && !cpi->use_svc) {
+ if (speed == 8 && !cpi->ppi->use_svc) {
sf->rt_sf.short_circuit_low_temp_var = 0;
sf->rt_sf.use_nonrd_altref_frame = 1;
}
+ if (speed >= 9) {
+ sf->rt_sf.skip_cdef_sb = 1;
+ }
}
if (!is_480p_or_larger) {
if (speed == 7) {
@@ -1088,6 +1192,8 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
sf->part_sf.less_rectangular_check_level = 1;
sf->part_sf.ml_prune_partition = 1;
sf->part_sf.prune_ext_partition_types_search_level = 1;
+ sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
+ sf->part_sf.use_best_rd_for_pruning = 1;
// TODO(debargha): Test, tweak and turn on either 1 or 2
sf->inter_sf.inter_mode_rd_model_estimation = 0;
@@ -1103,6 +1209,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
sf->interp_sf.use_fast_interpolation_filter_search = 1;
+ sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF;
sf->intra_sf.intra_pruning_with_hog = 1;
sf->mv_sf.full_pixel_search_level = 1;
@@ -1140,7 +1247,6 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
sf->inter_sf.prune_comp_search_by_single_result = 1;
sf->inter_sf.reuse_inter_intra_mode = 1;
sf->inter_sf.selective_ref_frame = 2;
- sf->inter_sf.skip_repeated_newmv = 1;
sf->inter_sf.disable_interintra_wedge_var_thresh = 0;
sf->inter_sf.disable_interinter_wedge_var_thresh = 0;
sf->inter_sf.prune_comp_type_by_comp_avg = 1;
@@ -1191,7 +1297,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
if (speed >= 3) {
sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
- sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
+ sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
sf->part_sf.less_rectangular_check_level = 2;
@@ -1202,7 +1308,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
// sf->mv_sf.adaptive_motion_search = 1;
sf->inter_sf.adaptive_rd_thresh = 2;
- sf->inter_sf.mv_cost_upd_level = 1;
+ sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
// TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
// it with cpi->sf.disable_wedge_search_var_thresh.
sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
@@ -1306,12 +1412,20 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
sf->part_sf.default_min_partition_size = BLOCK_8X8;
sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
+ sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
+
sf->mv_sf.search_method = FAST_DIAMOND;
sf->mv_sf.subpel_force_stop = QUARTER_PEL;
sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED;
sf->inter_sf.inter_mode_rd_model_estimation = 2;
+ // Disable intra_y_mode_mask pruning since the performance at speed 7 isn't
+ // good. May need more study.
+ for (int i = 0; i < TX_SIZES; ++i) {
+ sf->intra_sf.intra_y_mode_mask[i] = INTRA_ALL;
+ }
+
sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q;
sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
@@ -1348,7 +1462,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
// TODO(marpan): Look into why enabling skip_loopfilter_non_reference is
// not bitexact on rtc testset, its very close (< ~0.01 bdrate), but not
// always bitexact.
- if (cpi->use_svc && cpi->svc.non_reference_frame &&
+ if (cpi->ppi->use_svc && cpi->svc.non_reference_frame &&
sf->lpf_sf.cdef_pick_method == CDEF_PICK_FROM_Q &&
sf->lpf_sf.lpf_pick == LPF_PICK_FROM_Q)
sf->rt_sf.skip_loopfilter_non_reference = 1;
@@ -1398,8 +1512,14 @@ static AOM_INLINE void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) {
hl_sf->second_alt_ref_filtering = 1;
}
+static AOM_INLINE void init_fp_sf(FIRST_PASS_SPEED_FEATURES *fp_sf) {
+ fp_sf->reduce_mv_step_param = 3;
+ fp_sf->skip_motion_search_threshold = 0;
+ fp_sf->disable_recon = 0;
+}
+
static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) {
- tpl_sf->disable_gop_length_decision = 0;
+ tpl_sf->gop_length_decision_method = 0;
tpl_sf->prune_intra_modes = 0;
tpl_sf->prune_starting_mv = 0;
tpl_sf->reduce_first_step_size = 0;
@@ -1415,6 +1535,7 @@ static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) {
static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) {
gm_sf->gm_search_type = GM_FULL_SEARCH;
gm_sf->prune_ref_frame_for_gm_search = 0;
+ gm_sf->prune_zero_mv_with_sse = 0;
}
static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
@@ -1454,6 +1575,9 @@ static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) {
part_sf->ml_predict_breakout_level = 0;
part_sf->prune_sub_8x8_partition_level = 0;
part_sf->simple_motion_search_rect_split = 0;
+ part_sf->reuse_prev_rd_results_for_part_ab = 0;
+ part_sf->reuse_best_prediction_for_part_ab = 0;
+ part_sf->use_best_rd_for_pruning = 0;
}
static AOM_INLINE void init_mv_sf(MV_SPEED_FEATURES *mv_sf) {
@@ -1487,16 +1611,17 @@ static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
inter_sf->fast_wedge_sign_estimate = 0;
inter_sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED;
inter_sf->reuse_inter_intra_mode = 0;
- inter_sf->mv_cost_upd_level = 0;
+ inter_sf->mv_cost_upd_level = INTERNAL_COST_UPD_SB;
+ inter_sf->coeff_cost_upd_level = INTERNAL_COST_UPD_SB;
+ inter_sf->mode_cost_upd_level = INTERNAL_COST_UPD_SB;
inter_sf->prune_inter_modes_based_on_tpl = 0;
inter_sf->prune_nearmv_using_neighbors = 0;
inter_sf->prune_comp_search_by_single_result = 0;
inter_sf->skip_repeated_ref_mv = 0;
- inter_sf->skip_repeated_newmv = 0;
- inter_sf->skip_repeated_full_newmv = 0;
+ inter_sf->skip_newmv_in_drl = 0;
inter_sf->inter_mode_rd_model_estimation = 0;
inter_sf->prune_compound_using_single_ref = 0;
- inter_sf->prune_compound_using_neighbors = 0;
+ inter_sf->prune_ext_comp_using_neighbors = 0;
inter_sf->prune_comp_using_best_single_mode_ref = 0;
inter_sf->disable_onesided_comp = 0;
inter_sf->prune_mode_search_simple_translation = 0;
@@ -1514,9 +1639,10 @@ static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) {
inter_sf->txfm_rd_gate_level = 0;
inter_sf->prune_inter_modes_if_skippable = 0;
inter_sf->disable_masked_comp = 0;
- inter_sf->reuse_best_prediction_for_part_ab = 0;
inter_sf->enable_fast_compound_mode_search = 0;
inter_sf->reuse_mask_search_results = 0;
+ inter_sf->enable_fast_wedge_mask_search = 0;
+ inter_sf->inter_mode_txfm_breakout = 0;
}
static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
@@ -1529,6 +1655,7 @@ static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) {
}
static AOM_INLINE void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) {
+ intra_sf->dv_cost_upd_level = INTERNAL_COST_UPD_SB;
intra_sf->skip_intra_in_interframe = 1;
intra_sf->intra_pruning_with_hog = 0;
intra_sf->chroma_intra_pruning_with_hog = 0;
@@ -1539,8 +1666,10 @@ static AOM_INLINE void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) {
intra_sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
}
intra_sf->disable_smooth_intra = 0;
- intra_sf->disable_filter_intra = 0;
+ intra_sf->prune_filter_intra_level = 0;
intra_sf->prune_chroma_modes_using_luma_winner = 0;
+ intra_sf->cfl_search_range = 3;
+ intra_sf->top_intra_model_count_allowed = TOP_INTRA_MODEL_COUNT;
}
static AOM_INLINE void init_tx_sf(TX_SPEED_FEATURES *tx_sf) {
@@ -1650,9 +1779,11 @@ void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
break;
}
- if (!cpi->seq_params_locked) {
- cpi->common.seq_params.enable_masked_compound &=
+ if (!cpi->ppi->seq_params_locked) {
+ cpi->common.seq_params->enable_masked_compound &=
!sf->inter_sf.disable_masked_comp;
+ cpi->common.seq_params->enable_interintra_compound &=
+ (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
}
// This is only used in motion vector unit test.
@@ -1662,9 +1793,9 @@ void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
cpi->mv_search_params.find_fractional_mv_step = av1_return_min_sub_pixel_mv;
if ((cpi->oxcf.row_mt == 1) && (cpi->oxcf.max_threads > 1)) {
- if (sf->inter_sf.mv_cost_upd_level > 1) {
+ if (sf->inter_sf.mv_cost_upd_level < INTERNAL_COST_UPD_SBROW) {
// Set mv_cost_upd_level to use row level update.
- sf->inter_sf.mv_cost_upd_level = 1;
+ sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
}
}
}
@@ -1676,6 +1807,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
int i;
init_hl_sf(&sf->hl_sf);
+ init_fp_sf(&sf->fp_sf);
init_tpl_sf(&sf->tpl_sf);
init_gm_sf(&sf->gm_sf);
init_part_sf(&sf->part_sf);
@@ -1701,12 +1833,12 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
break;
}
- if (!cpi->seq_params_locked) {
- cpi->common.seq_params.enable_dual_filter &=
+ if (!cpi->ppi->seq_params_locked) {
+ cpi->common.seq_params->enable_dual_filter &=
!sf->interp_sf.disable_dual_filter;
- cpi->common.seq_params.enable_restoration &= !sf->lpf_sf.disable_lr_filter;
+ cpi->common.seq_params->enable_restoration &= !sf->lpf_sf.disable_lr_filter;
- cpi->common.seq_params.enable_interintra_compound &=
+ cpi->common.seq_params->enable_interintra_compound &=
(sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX);
}
@@ -1821,10 +1953,11 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
SPEED_FEATURES *const sf = &cpi->sf;
WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params;
const int boosted = frame_is_boosted(cpi);
+ const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080;
const int is_arf2_bwd_type =
- cpi->gf_group.update_type[cpi->gf_group.index] == INTNL_ARF_UPDATE;
+ cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE;
if (cpi->oxcf.mode == REALTIME) return;
@@ -1832,7 +1965,6 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
// qindex_thresh for resolution < 720p
const int qindex_thresh = boosted ? 70 : (is_arf2_bwd_type ? 110 : 140);
if (!is_720p_or_larger && cm->quant_params.base_qindex <= qindex_thresh) {
- sf->inter_sf.skip_repeated_newmv = 1;
sf->part_sf.simple_motion_search_split =
cm->features.allow_screen_content_tools ? 1 : 2;
sf->part_sf.simple_motion_search_early_term_none = 1;
@@ -1849,7 +1981,6 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
sf->tx_sf.inter_tx_size_search_init_depth_rect = 1;
sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1;
sf->tx_sf.intra_tx_size_search_init_depth_rect = 1;
- sf->inter_sf.skip_repeated_newmv = 1;
sf->tx_sf.model_based_prune_tx_search_level = 0;
if (is_1080p_or_larger && cm->quant_params.base_qindex <= 108) {
@@ -1866,28 +1997,25 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) {
}
}
- if (speed >= 3) {
- // Disable extended partitions for lower quantizers
- const int qindex_thresh =
- cm->features.allow_screen_content_tools ? 50 : 100;
- if (cm->quant_params.base_qindex <= qindex_thresh && !boosted) {
- sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
- }
- }
-
- if (speed >= 4) {
+ if (speed >= 2) {
// Disable extended partitions for lower quantizers
- const int qindex_thresh = boosted ? 80 : 120;
- if (cm->quant_params.base_qindex <= qindex_thresh &&
- !frame_is_intra_only(&cpi->common)) {
- sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
+ const int aggr = AOMMIN(3, speed - 2);
+ const int qindex_thresh1[4] = { 50, 50, 80, 100 };
+ const int qindex_thresh2[4] = { 80, 100, 120, 160 };
+ int qindex_thresh;
+ int disable_ext_part;
+ if (aggr <= 1) {
+ const int qthresh2 =
+ (!aggr && !is_480p_or_larger) ? 70 : qindex_thresh2[aggr];
+ qindex_thresh = cm->features.allow_screen_content_tools
+ ? qindex_thresh1[aggr]
+ : qthresh2;
+ disable_ext_part = !boosted;
+ } else {
+ qindex_thresh = boosted ? qindex_thresh1[aggr] : qindex_thresh2[aggr];
+ disable_ext_part = !frame_is_intra_only(cm);
}
- }
-
- if (speed >= 5) {
- const int qindex_thresh = boosted ? 100 : 160;
- if (cm->quant_params.base_qindex <= qindex_thresh &&
- !frame_is_intra_only(&cpi->common)) {
+ if (cm->quant_params.base_qindex <= qindex_thresh && disable_ext_part) {
sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128;
}
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/speed_features.h b/third_party/libaom/source/libaom/av1/encoder/speed_features.h
index 90765febfb..3cf4c3d10b 100644
--- a/third_party/libaom/source/libaom/av1/encoder/speed_features.h
+++ b/third_party/libaom/source/libaom/av1/encoder/speed_features.h
@@ -287,17 +287,30 @@ enum {
SUPERRES_AUTO_DUAL, // Tries no superres and q-based superres ratios
SUPERRES_AUTO_SOLO, // Only apply the q-based superres ratio
} UENUM1BYTE(SUPERRES_AUTO_SEARCH_TYPE);
-
/*!\endcond */
+
+/*!\enum INTERNAL_COST_UPDATE_TYPE
+ * \brief This enum decides internally how often to update the entropy costs
+ *
+ * INTERNAL_COST_UPD_TYPE is similar to \ref COST_UPDATE_TYPE but has slightly
+ * more flexibility in update frequency. This enum is separate from \ref
+ * COST_UPDATE_TYPE because although \ref COST_UPDATE_TYPE is not exposed, its
+ * values are public so it cannot be modified without breaking public API.
+ */
+typedef enum {
+ INTERNAL_COST_UPD_OFF, /*!< Turn off cost updates. */
+ INTERNAL_COST_UPD_SBROW_SET, /*!< Update every row_set of height 256 pixs. */
+ INTERNAL_COST_UPD_SBROW, /*!< Update every sb rows inside a tile. */
+ INTERNAL_COST_UPD_SB, /*!< Update every sb. */
+} INTERNAL_COST_UPDATE_TYPE;
+
/*!
* \brief Sequence/frame level speed vs quality features
*/
typedef struct HIGH_LEVEL_SPEED_FEATURES {
- /*!\cond */
- // Frame level coding parameter update
+ /*! Frame level coding parameter update. */
int frame_parameter_update;
- /*!\endcond */
/*!
* Cases and frame types for which the recode loop is enabled.
*/
@@ -309,25 +322,27 @@ typedef struct HIGH_LEVEL_SPEED_FEATURES {
*/
int recode_tolerance;
- /*!\cond */
- // Determine how motion vector precision is chosen. The possibilities are:
- // LAST_MV_DATA: use the mv data from the last coded frame
- // CURRENT_Q: use the current q as a threshold
- // QTR_ONLY: use quarter pel precision only.
+ /*!
+ * Determine how motion vector precision is chosen. The possibilities are:
+ * LAST_MV_DATA: use the mv data from the last coded frame
+ * CURRENT_Q: use the current q as a threshold
+ * QTR_ONLY: use quarter pel precision only.
+ */
MV_PREC_LOGIC high_precision_mv_usage;
- // Always set to 0. If on it enables 0 cost background transmission
- // (except for the initial transmission of the segmentation). The feature is
- // disabled because the addition of very large block sizes make the
- // backgrounds very to cheap to encode, and the segmentation we have
- // adds overhead.
+ /*!
+ * Always set to 0. If on it enables 0 cost background transmission
+ * (except for the initial transmission of the segmentation). The feature is
+ * disabled because the addition of very large block sizes make the
+ * backgrounds very to cheap to encode, and the segmentation we have
+ * adds overhead.
+ */
int static_segmentation;
/*!
* Superres-auto mode search type:
*/
SUPERRES_AUTO_SEARCH_TYPE superres_auto_search_type;
- /*!\endcond */
/*!
* Enable/disable extra screen content test by encoding key frame twice.
@@ -340,10 +355,39 @@ typedef struct HIGH_LEVEL_SPEED_FEATURES {
int second_alt_ref_filtering;
} HIGH_LEVEL_SPEED_FEATURES;
+/*!
+ * Speed features for the first pass.
+ */
+typedef struct FIRST_PASS_SPEED_FEATURES {
+ /*!
+ * \brief Reduces the mv search window.
+ * By default, the initial search window is around
+ * MIN(MIN(dims), MAX_FULL_PEL_VAL) = MIN(MIN(dims), 1023).
+ * Each step reduction decrease the window size by about a factor of 2.
+ */
+ int reduce_mv_step_param;
+
+ /*!
+ * \brief Skips the motion search when the zero mv has small sse.
+ */
+ int skip_motion_search_threshold;
+
+ /*!
+ * \brief Skips reconstruction by using source buffers for prediction
+ */
+ int disable_recon;
+} FIRST_PASS_SPEED_FEATURES;
+
/*!\cond */
typedef struct TPL_SPEED_FEATURES {
- // Enable/disable GOP length adaptive decision.
- int disable_gop_length_decision;
+ // GOP length adaptive decision.
+ // If set to 0, tpl model decides whether a shorter gf interval is better.
+ // If set to 1, tpl stats of ARFs from base layer, (base+1) layer and
+ // (base+2) layer decide whether a shorter gf interval is better.
+ // If set to 2, tpl stats of ARFs from base layer, (base+1) layer and GF boost
+ // decide whether a shorter gf interval is better.
+ // If set to 3, gop length adaptive decision is disabled.
+ int gop_length_decision_method;
// Prune the intra modes search by tpl.
// If set to 0, we will search all intra modes from DC_PRED to PAETH_PRED.
// If set to 1, we only search DC_PRED, V_PRED, and H_PRED.
@@ -387,6 +431,10 @@ typedef struct GLOBAL_MOTION_SPEED_FEATURES {
// given direction(past/future), if the evaluated ref_frame in that direction
// yields gm_type as INVALID/TRANSLATION/IDENTITY
int prune_ref_frame_for_gm_search;
+
+ // When the current GM type is set to ZEROMV, prune ZEROMV if its performance
+ // is worse than NEWMV under SSE metric.
+ int prune_zero_mv_with_sse;
} GLOBAL_MOTION_SPEED_FEATURES;
typedef struct PARTITION_SPEED_FEATURES {
@@ -511,6 +559,53 @@ typedef struct PARTITION_SPEED_FEATURES {
// Prune rectangular split based on simple motion search split/no_split score.
// 0: disable pruning, 1: enable pruning
int simple_motion_search_rect_split;
+
+ // The current encoder adopts a DFS search for block partitions.
+ // Therefore the mode selection and associated rdcost is ready for smaller
+ // blocks before the mode selection for some partition types.
+ // AB partition could use previous rd information and skip mode search.
+ // An example is:
+ //
+ // current block
+ // +---+---+
+ // | |
+ // + +
+ // | |
+ // +-------+
+ //
+ // SPLIT partition has been searched first before trying HORZ_A
+ // +---+---+
+ // | R | R |
+ // +---+---+
+ // | R | R |
+ // +---+---+
+ //
+ // HORZ_A
+ // +---+---+
+ // | | |
+ // +---+---+
+ // | |
+ // +-------+
+ //
+ // With this speed feature, the top two sub blocks can directly use rdcost
+ // searched in split partition, and the mode info is also copied from
+ // saved info. Similarly, the bottom rectangular block can also use
+ // the available information from previous rectangular search.
+ int reuse_prev_rd_results_for_part_ab;
+
+ // Reuse the best prediction modes found in PARTITION_SPLIT and PARTITION_RECT
+ // when encoding PARTITION_AB.
+ int reuse_best_prediction_for_part_ab;
+
+ // The current partition search records the best rdcost so far and uses it
+ // in mode search and transform search to early skip when some criteria is
+ // met. For example, when the current rdcost is larger than the best rdcost,
+ // or the model rdcost is larger than the best rdcost times some thresholds.
+ // By default, this feature is turned on to speed up the encoder partition
+ // search.
+ // If disabling it, at speed 0, 30 frames, we could get
+ // about -0.25% quality gain (psnr, ssim, vmaf), with about 13% slowdown.
+ int use_best_rd_for_pruning;
} PARTITION_SPEED_FEATURES;
typedef struct MV_SPEED_FEATURES {
@@ -621,16 +716,19 @@ typedef struct INTER_MODE_SPEED_FEATURES {
int alt_ref_search_fp;
- // flag to skip NEWMV mode in drl if the motion search result is the same
- int skip_repeated_newmv;
-
- // Skip the current ref_mv in NEW_MV mode if we have already encountered
- // another ref_mv in the drl such that:
- // 1. The other drl has the same fullpel_mv during the SIMPLE_TRANSLATION
- // search process as the current fullpel_mv.
- // 2. The rate needed to encode the current fullpel_mv is larger than that
- // for the other ref_mv.
- int skip_repeated_full_newmv;
+ // Skip the current ref_mv in NEW_MV mode based on mv, rate cost, etc.
+ // This speed feature equaling 0 means no skipping.
+ // If the speed feature equals 1 or 2, skip the current ref_mv in NEW_MV mode
+ // if we have already encountered ref_mv in the drl such that:
+ // 1. The other drl has the same mv during the SIMPLE_TRANSLATION search
+ // process as the current mv.
+ // 2. The rate needed to encode the current mv is larger than that for the
+ // other ref_mv.
+ // The speed feature equaling 1 means using subpel mv in the comparison.
+ // The speed feature equaling 2 means using fullpel mv in the comparison.
+ // If the speed feature >= 3, skip the current ref_mv in NEW_MV mode based on
+ // known full_mv bestsme and drl cost.
+ int skip_newmv_in_drl;
// This speed feature checks duplicate ref MVs among NEARESTMV, NEARMV,
// GLOBALMV and skips NEARMV or GLOBALMV (in order) if a duplicate is found
@@ -677,12 +775,14 @@ typedef struct INTER_MODE_SPEED_FEATURES {
// the single reference modes, it is one of the two best performers.
int prune_compound_using_single_ref;
- // Skip extended compound mode using ref frames of above and left neighbor
+ // Skip extended compound mode (NEAREST_NEWMV, NEW_NEARESTMV, NEAR_NEWMV,
+ // NEW_NEARMV) using ref frames of above and left neighbor
// blocks.
// 0 : no pruning
- // 1 : prune extended compound mode (less aggressiveness)
- // 2 : prune extended compound mode (high aggressiveness)
- int prune_compound_using_neighbors;
+ // 1 : prune ext compound modes using neighbor blocks (less aggressiveness)
+ // 2 : prune ext compound modes using neighbor blocks (high aggressiveness)
+ // 3 : prune ext compound modes unconditionally (highest aggressiveness)
+ int prune_ext_comp_using_neighbors;
// Skip extended compound mode when ref frame corresponding to NEWMV does not
// have NEWMV as single mode winner.
@@ -722,12 +822,15 @@ typedef struct INTER_MODE_SPEED_FEATURES {
// Decide when and how to use joint_comp.
DIST_WTD_COMP_FLAG use_dist_wtd_comp_flag;
- // To skip cost update for mv.
- // mv_cost_upd_level indicates the aggressiveness of skipping.
- // 0: update happens at each sb level.
- // 1: update happens once for each sb row.
- // 2: update happens once for a set of rows.
- int mv_cost_upd_level;
+ // Clip the frequency of updating the mv cost.
+ INTERNAL_COST_UPDATE_TYPE mv_cost_upd_level;
+
+ // Clip the frequency of updating the coeff cost.
+ INTERNAL_COST_UPDATE_TYPE coeff_cost_upd_level;
+
+ // Clip the frequency of updating the mode cost.
+ INTERNAL_COST_UPDATE_TYPE mode_cost_upd_level;
+
// Prune inter modes based on tpl stats
// 0 : no pruning
// 1 - 3 indicate increasing aggressiveness in order.
@@ -750,15 +853,17 @@ typedef struct INTER_MODE_SPEED_FEATURES {
// Enable/disable masked compound.
int disable_masked_comp;
- // Reuse the best prediction modes found in PARTITION_SPLIT and PARTITION_RECT
- // when encoding PARTITION_AB.
- int reuse_best_prediction_for_part_ab;
-
// Enable/disable the fast compound mode search.
int enable_fast_compound_mode_search;
// Reuse masked compound type search results
int reuse_mask_search_results;
+
+ // Enable/disable fast search for wedge masks
+ int enable_fast_wedge_mask_search;
+
+ // Early breakout from transform search of inter modes
+ int inter_mode_txfm_breakout;
} INTER_MODE_SPEED_FEATURES;
typedef struct INTERP_FILTER_SPEED_FEATURES {
@@ -808,8 +913,11 @@ typedef struct INTRA_MODE_SPEED_FEATURES {
// Enable/disable smooth intra modes.
int disable_smooth_intra;
- // Enable/disable filter intra modes.
- int disable_filter_intra;
+ // Prune filter intra modes in intra frames.
+ // 0 : No pruning
+ // 1 : Evaluate applicable filter intra modes based on best intra mode so far
+ // 2 : Do not evaluate filter intra modes
+ int prune_filter_intra_level;
// prune palette search
// 0: No pruning
@@ -825,6 +933,27 @@ typedef struct INTRA_MODE_SPEED_FEATURES {
// 1: Prune chroma intra modes other than UV_DC_PRED, UV_SMOOTH_PRED,
// UV_CFL_PRED and the mode that corresponds to luma intra mode winner.
int prune_chroma_modes_using_luma_winner;
+
+ // Clip the frequency of updating the mv cost for intrabc.
+ INTERNAL_COST_UPDATE_TYPE dv_cost_upd_level;
+
+ // We use DCT_DCT transform followed by computing SATD (Sum of Absolute
+ // Transformed Differences) as an estimation of RD score to quickly find the
+ // best possible Chroma from Luma (CFL) parameter. Then we do a full RD search
+ // near the best possible parameter. The search range is set here.
+ // The range of cfl_searh_range should be [1, 33], and the following are the
+ // recommended values.
+ // 1: Fastest mode.
+ // 3: Default mode that provides good speedup without losing compression
+ // performance at speed 0.
+ // 33: Exhaustive rd search (33 == CFL_MAGS_SIZE). This mode should only
+ // be used for debugging purpose.
+ int cfl_search_range;
+
+ // TOP_INTRA_MODEL_COUNT is 4 that is the number of top model rd to store in
+ // intra mode decision. Here, add a speed feature to reduce this number for
+ // higher speeds.
+ int top_intra_model_count_allowed;
} INTRA_MODE_SPEED_FEATURES;
typedef struct TX_SPEED_FEATURES {
@@ -1082,6 +1211,11 @@ typedef struct REAL_TIME_SPEED_FEATURES {
// Skips mode checks more agressively in nonRD mode
int nonrd_agressive_skip;
+
+ // Skip cdef on 64x64 blocks when NEWMV or INTRA is not picked or color
+ // sensitivity is off. When color sensitivity is on for a superblock, all
+ // 64x64 blocks within will not skip.
+ int skip_cdef_sb;
} REAL_TIME_SPEED_FEATURES;
/*!\endcond */
@@ -1096,6 +1230,11 @@ typedef struct SPEED_FEATURES {
HIGH_LEVEL_SPEED_FEATURES hl_sf;
/*!
+ * Speed features for the first pass.
+ */
+ FIRST_PASS_SPEED_FEATURES fp_sf;
+
+ /*!
* Speed features related to how tpl's searches are done.
*/
TPL_SPEED_FEATURES tpl_sf;
diff --git a/third_party/libaom/source/libaom/av1/encoder/superres_scale.c b/third_party/libaom/source/libaom/av1/encoder/superres_scale.c
index bcd3fefdfe..283faabe61 100644
--- a/third_party/libaom/source/libaom/av1/encoder/superres_scale.c
+++ b/third_party/libaom/source/libaom/av1/encoder/superres_scale.c
@@ -80,7 +80,7 @@ static uint8_t calculate_next_resize_scale(const AV1_COMP *cpi) {
if (is_stat_generation_stage(cpi)) return SCALE_NUMERATOR;
uint8_t new_denom = SCALE_NUMERATOR;
- if (cpi->common.seq_params.reduced_still_picture_hdr) return SCALE_NUMERATOR;
+ if (cpi->common.seq_params->reduced_still_picture_hdr) return SCALE_NUMERATOR;
switch (resize_cfg->resize_mode) {
case RESIZE_NONE: new_denom = SCALE_NUMERATOR; break;
case RESIZE_FIXED:
@@ -109,12 +109,13 @@ int av1_superres_in_recode_allowed(const AV1_COMP *const cpi) {
#define SUPERRES_ENERGY_BY_AC_THRESH 0.2
static double get_energy_by_q2_thresh(const GF_GROUP *gf_group,
- const RATE_CONTROL *rc) {
+ const RATE_CONTROL *rc,
+ int gf_frame_index) {
// TODO(now): Return keyframe thresh * factor based on frame type / pyramid
// level.
- if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
+ if (gf_group->update_type[gf_frame_index] == ARF_UPDATE) {
return SUPERRES_ENERGY_BY_Q2_THRESH_ARFFRAME;
- } else if (gf_group->update_type[gf_group->index] == KF_UPDATE) {
+ } else if (gf_group->update_type[gf_frame_index] == KF_UPDATE) {
if (rc->frames_to_key <= 1)
return SUPERRES_ENERGY_BY_Q2_THRESH_KEYFRAME_SOLO;
else
@@ -142,15 +143,15 @@ static uint8_t get_superres_denom_from_qindex_energy(int qindex, double *energy,
static uint8_t get_superres_denom_for_qindex(const AV1_COMP *cpi, int qindex,
int sr_kf, int sr_arf) {
// Use superres for Key-frames and Alt-ref frames only.
- const GF_GROUP *gf_group = &cpi->gf_group;
- if (gf_group->update_type[gf_group->index] != KF_UPDATE &&
- gf_group->update_type[gf_group->index] != ARF_UPDATE) {
+ const GF_GROUP *gf_group = &cpi->ppi->gf_group;
+ if (gf_group->update_type[cpi->gf_frame_index] != KF_UPDATE &&
+ gf_group->update_type[cpi->gf_frame_index] != ARF_UPDATE) {
return SCALE_NUMERATOR;
}
- if (gf_group->update_type[gf_group->index] == KF_UPDATE && !sr_kf) {
+ if (gf_group->update_type[cpi->gf_frame_index] == KF_UPDATE && !sr_kf) {
return SCALE_NUMERATOR;
}
- if (gf_group->update_type[gf_group->index] == ARF_UPDATE && !sr_arf) {
+ if (gf_group->update_type[cpi->gf_frame_index] == ARF_UPDATE && !sr_arf) {
return SCALE_NUMERATOR;
}
@@ -158,7 +159,7 @@ static uint8_t get_superres_denom_for_qindex(const AV1_COMP *cpi, int qindex,
analyze_hor_freq(cpi, energy);
const double energy_by_q2_thresh =
- get_energy_by_q2_thresh(gf_group, &cpi->rc);
+ get_energy_by_q2_thresh(gf_group, &cpi->rc, cpi->gf_frame_index);
int denom = get_superres_denom_from_qindex_energy(
qindex, energy, energy_by_q2_thresh, SUPERRES_ENERGY_BY_AC_THRESH);
/*
@@ -166,8 +167,8 @@ static uint8_t get_superres_denom_for_qindex(const AV1_COMP *cpi, int qindex,
for (int k = 1; k < 16; ++k) printf("%f, ", energy[k]);
printf("]\n");
printf("boost = %d\n",
- (gf_group->update_type[gf_group->index] == KF_UPDATE)
- ? cpi->rc.kf_boost
+ (gf_group->update_type[cpi->gf_frame_index] == KF_UPDATE)
+ ? cpi->ppi->p_rc.kf_boost
: cpi->rc.gfu_boost);
printf("denom = %d\n", denom);
*/
@@ -194,8 +195,8 @@ static uint8_t calculate_next_superres_scale(AV1_COMP *cpi) {
// Make sure that superres mode of the frame is consistent with the
// sequence-level flag.
assert(IMPLIES(superres_cfg->superres_mode != AOM_SUPERRES_NONE,
- cpi->common.seq_params.enable_superres));
- assert(IMPLIES(!cpi->common.seq_params.enable_superres,
+ cpi->common.seq_params->enable_superres));
+ assert(IMPLIES(!cpi->common.seq_params->enable_superres,
superres_cfg->superres_mode == AOM_SUPERRES_NONE));
// Make sure that superres mode for current encoding is consistent with user
// provided superres mode.
@@ -222,8 +223,8 @@ static uint8_t calculate_next_superres_scale(AV1_COMP *cpi) {
// Now decide the use of superres based on 'q'.
int bottom_index, top_index;
const int q = av1_rc_pick_q_and_bounds(
- cpi, &cpi->rc, frm_dim_cfg->width, frm_dim_cfg->height,
- cpi->gf_group.index, &bottom_index, &top_index);
+ cpi, frm_dim_cfg->width, frm_dim_cfg->height, cpi->gf_frame_index,
+ &bottom_index, &top_index);
const int qthresh = (frame_is_intra_only(&cpi->common))
? superres_cfg->superres_kf_qthresh
@@ -243,8 +244,8 @@ static uint8_t calculate_next_superres_scale(AV1_COMP *cpi) {
// Now decide the use of superres based on 'q'.
int bottom_index, top_index;
const int q = av1_rc_pick_q_and_bounds(
- cpi, &cpi->rc, frm_dim_cfg->width, frm_dim_cfg->height,
- cpi->gf_group.index, &bottom_index, &top_index);
+ cpi, frm_dim_cfg->width, frm_dim_cfg->height, cpi->gf_frame_index,
+ &bottom_index, &top_index);
const SUPERRES_AUTO_SEARCH_TYPE sr_search_type =
cpi->sf.hl_sf.superres_auto_search_type;
@@ -345,7 +346,7 @@ static size_params_type calculate_next_size_params(AV1_COMP *cpi) {
size_params_type rsz = { frm_dim_cfg->width, frm_dim_cfg->height,
SCALE_NUMERATOR };
int resize_denom = SCALE_NUMERATOR;
- if (has_no_stats_stage(cpi) && cpi->use_svc &&
+ if (has_no_stats_stage(cpi) && cpi->ppi->use_svc &&
cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1) {
rsz.resize_width = cpi->common.width;
rsz.resize_height = cpi->common.height;
diff --git a/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.c b/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.c
index 17109201e6..5cff958a85 100644
--- a/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.c
+++ b/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.c
@@ -30,6 +30,7 @@ void av1_init_layer_context(AV1_COMP *const cpi) {
svc->current_superframe = 0;
svc->force_zero_mode_spatial_ref = 1;
svc->num_encoded_top_layer = 0;
+ svc->use_flexible_mode = 0;
for (int sl = 0; sl < svc->number_spatial_layers; ++sl) {
for (int tl = 0; tl < svc->number_temporal_layers; ++tl) {
@@ -90,6 +91,7 @@ void av1_init_layer_context(AV1_COMP *const cpi) {
void av1_update_layer_context_change_config(AV1_COMP *const cpi,
const int64_t target_bandwidth) {
const RATE_CONTROL *const rc = &cpi->rc;
+ const PRIMARY_RATE_CONTROL *const p_rc = &cpi->ppi->p_rc;
SVC *const svc = &cpi->svc;
int layer = 0;
int64_t spatial_layer_target = 0;
@@ -106,17 +108,18 @@ void av1_update_layer_context_change_config(AV1_COMP *const cpi,
LAYER_CONTEXT *const lc =
&svc->layer_context[sl * svc->number_temporal_layers + tl];
RATE_CONTROL *const lrc = &lc->rc;
+ PRIMARY_RATE_CONTROL *const lp_rc = &lc->p_rc;
lc->spatial_layer_target_bandwidth = spatial_layer_target;
bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth;
- lrc->starting_buffer_level =
- (int64_t)(rc->starting_buffer_level * bitrate_alloc);
- lrc->optimal_buffer_level =
- (int64_t)(rc->optimal_buffer_level * bitrate_alloc);
- lrc->maximum_buffer_size =
- (int64_t)(rc->maximum_buffer_size * bitrate_alloc);
+ lp_rc->starting_buffer_level =
+ (int64_t)(p_rc->starting_buffer_level * bitrate_alloc);
+ lp_rc->optimal_buffer_level =
+ (int64_t)(p_rc->optimal_buffer_level * bitrate_alloc);
+ lp_rc->maximum_buffer_size =
+ (int64_t)(p_rc->maximum_buffer_size * bitrate_alloc);
lrc->bits_off_target =
- AOMMIN(lrc->bits_off_target, lrc->maximum_buffer_size);
- lrc->buffer_level = AOMMIN(lrc->buffer_level, lrc->maximum_buffer_size);
+ AOMMIN(lrc->bits_off_target, lp_rc->maximum_buffer_size);
+ lrc->buffer_level = AOMMIN(lrc->buffer_level, lp_rc->maximum_buffer_size);
lc->framerate = cpi->framerate / lc->framerate_factor;
lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate);
lrc->max_frame_bandwidth = rc->max_frame_bandwidth;
@@ -164,7 +167,6 @@ void av1_update_temporal_layer_framerate(AV1_COMP *const cpi) {
}
void av1_restore_layer_context(AV1_COMP *const cpi) {
- GF_GROUP *const gf_group = &cpi->gf_group;
SVC *const svc = &cpi->svc;
const AV1_COMMON *const cm = &cpi->common;
LAYER_CONTEXT *const lc = get_layer_context(cpi);
@@ -172,8 +174,9 @@ void av1_restore_layer_context(AV1_COMP *const cpi) {
const int old_frame_to_key = cpi->rc.frames_to_key;
// Restore layer rate control.
cpi->rc = lc->rc;
+ cpi->ppi->p_rc = lc->p_rc;
cpi->oxcf.rc_cfg.target_bandwidth = lc->target_bandwidth;
- gf_group->index = 0;
+ cpi->gf_frame_index = 0;
cpi->mv_search_params.max_mv_magnitude = lc->max_mv_magnitude;
if (cpi->mv_search_params.max_mv_magnitude == 0)
cpi->mv_search_params.max_mv_magnitude = AOMMAX(cm->width, cm->height);
@@ -198,7 +201,7 @@ void av1_restore_layer_context(AV1_COMP *const cpi) {
// This is to skip searching mv for that reference if it was last
// refreshed (i.e., buffer slot holding that reference was refreshed) on the
// previous spatial layer(s) at the same time (current_superframe).
- if (svc->external_ref_frame_config && svc->force_zero_mode_spatial_ref) {
+ if (svc->set_ref_frame_config && svc->force_zero_mode_spatial_ref) {
int ref_frame_idx = svc->ref_idx[LAST_FRAME - 1];
if (svc->buffer_time_index[ref_frame_idx] == svc->current_superframe &&
svc->buffer_spatial_layer[ref_frame_idx] <= svc->spatial_layer_id - 1)
@@ -211,13 +214,13 @@ void av1_restore_layer_context(AV1_COMP *const cpi) {
}
void av1_save_layer_context(AV1_COMP *const cpi) {
- GF_GROUP *const gf_group = &cpi->gf_group;
SVC *const svc = &cpi->svc;
const AV1_COMMON *const cm = &cpi->common;
LAYER_CONTEXT *lc = get_layer_context(cpi);
lc->rc = cpi->rc;
+ lc->p_rc = cpi->ppi->p_rc;
lc->target_bandwidth = (int)cpi->oxcf.rc_cfg.target_bandwidth;
- lc->group_index = gf_group->index;
+ lc->group_index = cpi->gf_frame_index;
lc->max_mv_magnitude = cpi->mv_search_params.max_mv_magnitude;
if (svc->spatial_layer_id == 0) svc->base_framerate = cpi->framerate;
// For spatial-svc, allow cyclic-refresh to be applied on the spatial layers,
@@ -243,7 +246,7 @@ void av1_save_layer_context(AV1_COMP *const cpi) {
svc->buffer_time_index[i] = svc->current_superframe;
svc->buffer_spatial_layer[i] = svc->spatial_layer_id;
}
- } else if (cpi->svc.external_ref_frame_config) {
+ } else if (cpi->svc.set_ref_frame_config) {
for (unsigned int i = 0; i < INTER_REFS_PER_FRAME; i++) {
int ref_frame_map_idx = svc->ref_idx[i];
if (cpi->svc.refresh[ref_frame_map_idx]) {
@@ -342,3 +345,171 @@ void av1_one_pass_cbr_svc_start_layer(AV1_COMP *const cpi) {
cpi->common.height = height;
av1_update_frame_size(cpi);
}
+
+enum {
+ SVC_LAST_FRAME = 0,
+ SVC_LAST2_FRAME,
+ SVC_LAST3_FRAME,
+ SVC_GOLDEN_FRAME,
+ SVC_BWDREF_FRAME,
+ SVC_ALTREF2_FRAME,
+ SVC_ALTREF_FRAME
+};
+
+// For fixed svc mode: fixed pattern is set based on the number of
+// spatial and temporal layers, and the ksvc_fixed_mode.
+void av1_set_svc_fixed_mode(AV1_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ int i;
+ assert(svc->use_flexible_mode == 0);
+ // Fixed SVC mode only supports at most 3 spatial or temporal layers.
+ assert(svc->number_spatial_layers >= 1 && svc->number_spatial_layers <= 3 &&
+ svc->number_temporal_layers >= 1 && svc->number_temporal_layers <= 3);
+ svc->set_ref_frame_config = 1;
+ int superframe_cnt = svc->current_superframe;
+ // Set the reference map buffer idx for the 7 references:
+ // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
+ // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = i;
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->reference[i] = 0;
+ for (i = 0; i < REF_FRAMES; i++) svc->refresh[i] = 0;
+ // Always reference LAST, and reference GOLDEN on SL > 0.
+ // For KSVC: GOLDEN reference will be removed on INTER_FRAMES later
+ // when frame_type is set.
+ svc->reference[SVC_LAST_FRAME] = 1;
+ if (svc->spatial_layer_id > 0) svc->reference[SVC_GOLDEN_FRAME] = 1;
+ if (svc->temporal_layer_id == 0) {
+ // Base temporal layer.
+ if (svc->spatial_layer_id == 0) {
+ // Set all buffer_idx to 0. Update slot 0 (LAST).
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
+ svc->refresh[0] = 1;
+ } else if (svc->spatial_layer_id == 1) {
+ // Set buffer_idx for LAST to slot 1, GOLDEN (and all other refs) to
+ // slot 0. Update slot 1 (LAST).
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
+ svc->ref_idx[SVC_LAST_FRAME] = 1;
+ svc->refresh[1] = 1;
+ } else if (svc->spatial_layer_id == 2) {
+ // Set buffer_idx for LAST to slot 2, GOLDEN (and all other refs) to
+ // slot 1. Update slot 2 (LAST).
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 1;
+ svc->ref_idx[SVC_LAST_FRAME] = 2;
+ svc->refresh[2] = 1;
+ }
+ } else if (svc->temporal_layer_id == 2 && (superframe_cnt - 1) % 4 == 0) {
+ // First top temporal enhancement layer.
+ if (svc->spatial_layer_id == 0) {
+ // Reference LAST (slot 0).
+ // Set GOLDEN to slot 3 and update slot 3.
+ // Set all other buffer_idx to slot 0.
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
+ if (svc->spatial_layer_id < svc->number_spatial_layers - 1) {
+ svc->ref_idx[SVC_GOLDEN_FRAME] = 3;
+ svc->refresh[3] = 1;
+ }
+ } else if (svc->spatial_layer_id == 1) {
+ // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
+ // GOLDEN (and all other refs) to slot 3.
+ // Set LAST2 to slot 4 and Update slot 4.
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 3;
+ svc->ref_idx[SVC_LAST_FRAME] = 1;
+ if (svc->spatial_layer_id < svc->number_spatial_layers - 1) {
+ svc->ref_idx[SVC_LAST2_FRAME] = 4;
+ svc->refresh[4] = 1;
+ }
+ } else if (svc->spatial_layer_id == 2) {
+ // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
+ // GOLDEN (and all other refs) to slot 4.
+ // No update.
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 4;
+ svc->ref_idx[SVC_LAST_FRAME] = 2;
+ }
+ } else if (svc->temporal_layer_id == 1) {
+ // Middle temporal enhancement layer.
+ if (svc->spatial_layer_id == 0) {
+ // Reference LAST.
+ // Set all buffer_idx to 0.
+ // Set GOLDEN to slot 5 and update slot 5.
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
+ if (svc->temporal_layer_id < svc->number_temporal_layers - 1) {
+ svc->ref_idx[SVC_GOLDEN_FRAME] = 5;
+ svc->refresh[5] = 1;
+ }
+ } else if (svc->spatial_layer_id == 1) {
+ // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
+ // GOLDEN (and all other refs) to slot 5.
+ // Set LAST3 to slot 6 and update slot 6.
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 5;
+ svc->ref_idx[SVC_LAST_FRAME] = 1;
+ if (svc->temporal_layer_id < svc->number_temporal_layers - 1) {
+ svc->ref_idx[SVC_LAST3_FRAME] = 6;
+ svc->refresh[6] = 1;
+ }
+ } else if (svc->spatial_layer_id == 2) {
+ // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 2,
+ // GOLDEN (and all other refs) to slot 6.
+ // Set LAST3 to slot 7 and update slot 7.
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 6;
+ svc->ref_idx[SVC_LAST_FRAME] = 2;
+ if (svc->temporal_layer_id < svc->number_temporal_layers - 1) {
+ svc->ref_idx[SVC_LAST3_FRAME] = 7;
+ svc->refresh[7] = 1;
+ }
+ }
+ } else if (svc->temporal_layer_id == 2 && (superframe_cnt - 3) % 4 == 0) {
+ // Second top temporal enhancement layer.
+ if (svc->spatial_layer_id == 0) {
+ // Set LAST to slot 5 and reference LAST.
+ // Set GOLDEN to slot 3 and update slot 3.
+ // Set all other buffer_idx to 0.
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
+ svc->ref_idx[SVC_LAST_FRAME] = 5;
+ if (svc->spatial_layer_id < svc->number_spatial_layers - 1) {
+ svc->ref_idx[SVC_GOLDEN_FRAME] = 3;
+ svc->refresh[3] = 1;
+ }
+ } else if (svc->spatial_layer_id == 1) {
+ // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
+ // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
+ svc->ref_idx[SVC_LAST_FRAME] = 6;
+ svc->ref_idx[SVC_GOLDEN_FRAME] = 3;
+ if (svc->spatial_layer_id < svc->number_spatial_layers - 1) {
+ svc->ref_idx[SVC_LAST2_FRAME] = 4;
+ svc->refresh[4] = 1;
+ }
+ } else if (svc->spatial_layer_id == 2) {
+ // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 7,
+ // GOLDEN to slot 4. No update.
+ for (i = 0; i < INTER_REFS_PER_FRAME; i++) svc->ref_idx[i] = 0;
+ svc->ref_idx[SVC_LAST_FRAME] = 7;
+ svc->ref_idx[SVC_GOLDEN_FRAME] = 4;
+ }
+ }
+}
+
+void av1_svc_check_reset_layer_rc_flag(AV1_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ for (int sl = 0; sl < svc->number_spatial_layers; ++sl) {
+ // Check for reset based on avg_frame_bandwidth for spatial layer sl.
+ int layer = LAYER_IDS_TO_IDX(sl, svc->number_temporal_layers - 1,
+ svc->number_temporal_layers);
+ LAYER_CONTEXT *lc = &svc->layer_context[layer];
+ RATE_CONTROL *lrc = &lc->rc;
+ if (lrc->avg_frame_bandwidth > (3 * lrc->prev_avg_frame_bandwidth >> 1) ||
+ lrc->avg_frame_bandwidth < (lrc->prev_avg_frame_bandwidth >> 1)) {
+ // Reset for all temporal layers with spatial layer sl.
+ for (int tl = 0; tl < svc->number_temporal_layers; ++tl) {
+ int layer2 = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers);
+ LAYER_CONTEXT *lc2 = &svc->layer_context[layer2];
+ RATE_CONTROL *lrc2 = &lc2->rc;
+ PRIMARY_RATE_CONTROL *const lp_rc = &lc2->p_rc;
+ lrc2->rc_1_frame = 0;
+ lrc2->rc_2_frame = 0;
+ lrc2->bits_off_target = lp_rc->optimal_buffer_level;
+ lrc2->buffer_level = lp_rc->optimal_buffer_level;
+ }
+ }
+ }
+}
diff --git a/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.h b/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.h
index 1eeba5e273..817e3620b0 100644
--- a/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.h
+++ b/third_party/libaom/source/libaom/av1/encoder/svc_layercontext.h
@@ -26,6 +26,7 @@ extern "C" {
typedef struct {
/*!\cond */
RATE_CONTROL rc;
+ PRIMARY_RATE_CONTROL p_rc;
int framerate_factor;
int64_t layer_target_bitrate;
int scaling_factor_num;
@@ -94,8 +95,10 @@ typedef struct SVC {
int temporal_layer_id;
int number_spatial_layers;
int number_temporal_layers;
- int external_ref_frame_config;
+ int set_ref_frame_config;
int non_reference_frame;
+ int use_flexible_mode;
+ int ksvc_fixed_mode;
/*!\endcond */
/*!
@@ -271,6 +274,11 @@ int av1_svc_primary_ref_frame(const struct AV1_COMP *const cpi);
void av1_get_layer_resolution(const int width_org, const int height_org,
const int num, const int den, int *width_out,
int *height_out);
+
+void av1_set_svc_fixed_mode(struct AV1_COMP *const cpi);
+
+void av1_svc_check_reset_layer_rc_flag(struct AV1_COMP *const cpi);
+
#ifdef __cplusplus
} // extern "C"
#endif
diff --git a/third_party/libaom/source/libaom/av1/encoder/temporal_filter.c b/third_party/libaom/source/libaom/av1/encoder/temporal_filter.c
index 676e110e60..6833ac8a40 100644
--- a/third_party/libaom/source/libaom/av1/encoder/temporal_filter.c
+++ b/third_party/libaom/source/libaom/av1/encoder/temporal_filter.c
@@ -155,7 +155,7 @@ static void tf_motion_search(AV1_COMP *cpi, MACROBLOCK *mb,
best_mv.as_mv.row = GET_MV_SUBPEL(mv_row);
best_mv.as_mv.col = GET_MV_SUBPEL(mv_col);
const int mv_offset = mv_row * y_stride + mv_col;
- error = cpi->fn_ptr[block_size].vf(
+ error = cpi->ppi->fn_ptr[block_size].vf(
ref_frame->y_buffer + y_offset + mv_offset, y_stride,
frame_to_filter->y_buffer + y_offset, y_stride, &sse);
block_mse = DIVIDE_AND_ROUND(error, mb_pels);
@@ -561,9 +561,16 @@ void av1_apply_temporal_filter_c(
(double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
// Decay factors for non-local mean approach.
double decay_factor[MAX_MB_PLANE] = { 0 };
- // Smaller q -> smaller filtering weight.
+ // Adjust filtering based on q.
+ // Larger q -> stronger filtering -> larger weight.
+ // Smaller q -> weaker filtering -> smaller weight.
double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
q_decay = CLIP(q_decay, 1e-5, 1);
+ if (q_factor >= TF_QINDEX_CUTOFF) {
+ // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+ // We do not need a clip here.
+ q_decay = 0.5 * pow((double)q_factor / 64, 2);
+ }
// Smaller strength -> smaller filtering weight.
double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
s_decay = CLIP(s_decay, 1e-5, 1);
@@ -745,10 +752,19 @@ static void tf_normalize_filtered_frame(
}
int av1_get_q(const AV1_COMP *cpi) {
- const GF_GROUP *gf_group = &cpi->gf_group;
- const FRAME_TYPE frame_type = gf_group->frame_type[gf_group->index];
- const int q = (int)av1_convert_qindex_to_q(
- cpi->rc.avg_frame_qindex[frame_type], cpi->common.seq_params.bit_depth);
+ const GF_GROUP *gf_group = &cpi->ppi->gf_group;
+ const FRAME_TYPE frame_type = gf_group->frame_type[cpi->gf_frame_index];
+ int avg_frame_qindex;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ avg_frame_qindex =
+ (cpi->ppi->gf_group.frame_parallel_level[cpi->gf_frame_index] > 0)
+ ? cpi->ppi->temp_avg_frame_qindex[frame_type]
+ : cpi->rc.avg_frame_qindex[frame_type];
+#else
+ avg_frame_qindex = cpi->rc.avg_frame_qindex[frame_type];
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ const int q = (int)av1_convert_qindex_to_q(avg_frame_qindex,
+ cpi->common.seq_params->bit_depth);
return q;
}
@@ -855,23 +871,24 @@ void av1_tf_do_filtering_row(AV1_COMP *cpi, ThreadData *td, int mb_row) {
}
}
tf_normalize_filtered_frame(mbd, block_size, mb_row, mb_col, num_planes,
- accum, count, &cpi->alt_ref_buffer);
+ accum, count, &cpi->ppi->alt_ref_buffer);
if (check_show_existing) {
const int y_height = mb_height >> mbd->plane[0].subsampling_y;
const int y_width = mb_width >> mbd->plane[0].subsampling_x;
const int source_y_stride = frame_to_filter->y_stride;
- const int filter_y_stride = cpi->alt_ref_buffer.y_stride;
+ const int filter_y_stride = cpi->ppi->alt_ref_buffer.y_stride;
const int source_offset =
mb_row * y_height * source_y_stride + mb_col * y_width;
const int filter_offset =
mb_row * y_height * filter_y_stride + mb_col * y_width;
unsigned int sse = 0;
- cpi->fn_ptr[block_size].vf(
+ cpi->ppi->fn_ptr[block_size].vf(
frame_to_filter->y_buffer + source_offset, source_y_stride,
- cpi->alt_ref_buffer.y_buffer + filter_offset, filter_y_stride, &sse);
+ cpi->ppi->alt_ref_buffer.y_buffer + filter_offset, filter_y_stride,
+ &sse);
diff->sum += sse;
- diff->sse += sse * sse;
+ diff->sse += sse * (int64_t)sse;
}
}
}
@@ -939,8 +956,9 @@ static void tf_setup_filtering_buffer(AV1_COMP *cpi,
const int lookahead_depth =
av1_lookahead_depth(cpi->ppi->lookahead, cpi->compressor_stage);
- int arf_src_offset = cpi->gf_group.arf_src_offset[cpi->gf_group.index];
- const FRAME_TYPE frame_type = cpi->gf_group.frame_type[cpi->gf_group.index];
+ int arf_src_offset = cpi->ppi->gf_group.arf_src_offset[cpi->gf_frame_index];
+ const FRAME_TYPE frame_type =
+ cpi->ppi->gf_group.frame_type[cpi->gf_frame_index];
// Temporal filtering should not go beyond key frames
const int key_to_curframe =
@@ -949,10 +967,10 @@ static void tf_setup_filtering_buffer(AV1_COMP *cpi,
AOMMAX(cpi->rc.frames_to_key - arf_src_offset - 1, 0);
// Number of buffered frames before the to-filter frame.
- const int max_before = AOMMIN(filter_frame_lookahead_idx, key_to_curframe);
+ int max_before = AOMMIN(filter_frame_lookahead_idx, key_to_curframe);
// Number of buffered frames after the to-filter frame.
- const int max_after =
+ int max_after =
AOMMIN(lookahead_depth - filter_frame_lookahead_idx - 1, curframe_to_key);
// Estimate noises for each plane.
@@ -964,26 +982,34 @@ static void tf_setup_filtering_buffer(AV1_COMP *cpi,
double *noise_levels = tf_ctx->noise_levels;
for (int plane = 0; plane < num_planes; ++plane) {
noise_levels[plane] = av1_estimate_noise_from_single_plane(
- to_filter_frame, plane, cpi->common.seq_params.bit_depth);
+ to_filter_frame, plane, cpi->common.seq_params->bit_depth);
}
// Get quantization factor.
const int q = av1_get_q(cpi);
- // Get correlation estimates from first-pass
- RATE_CONTROL *rc = &cpi->rc;
- const double *coeff = rc->cor_coeff;
- const int offset = rc->regions_offset;
- int cur_frame_idx =
- filter_frame_lookahead_idx + rc->frames_since_key - offset;
-
+ // Get correlation estimates from first-pass;
+ const FIRSTPASS_STATS *stats =
+ cpi->ppi->twopass.stats_in - (cpi->rc.frames_since_key == 0);
double accu_coeff0 = 1.0, accu_coeff1 = 1.0;
for (int i = 1; i <= max_after; i++) {
- accu_coeff1 *= coeff[cur_frame_idx + i];
+ if (stats + filter_frame_lookahead_idx + i >=
+ cpi->ppi->twopass.stats_buf_ctx->stats_in_end) {
+ max_after = i - 1;
+ break;
+ }
+ accu_coeff1 *=
+ AOMMAX(stats[filter_frame_lookahead_idx + i].cor_coeff, 0.001);
}
if (max_after >= 1) {
accu_coeff1 = pow(accu_coeff1, 1.0 / (double)max_after);
}
for (int i = 1; i <= max_before; i++) {
- accu_coeff0 *= coeff[cur_frame_idx - i + 1];
+ if (stats + filter_frame_lookahead_idx - i + 1 <=
+ cpi->ppi->twopass.stats_buf_ctx->stats_in_start) {
+ max_before = i - 1;
+ break;
+ }
+ accu_coeff0 *=
+ AOMMAX(stats[filter_frame_lookahead_idx - i + 1].cor_coeff, 0.001);
}
if (max_before >= 1) {
accu_coeff0 = pow(accu_coeff0, 1.0 / (double)max_before);
@@ -1008,7 +1034,7 @@ static void tf_setup_filtering_buffer(AV1_COMP *cpi,
num_before = AOMMIN(num_frames - 1, max_before);
num_after = 0;
} else {
- num_frames = AOMMIN(num_frames, cpi->rc.gfu_boost / 150);
+ num_frames = AOMMIN(num_frames, cpi->ppi->p_rc.gfu_boost / 150);
num_frames += !(num_frames & 1); // Make the number odd.
// Only use 2 neighbours for the second ARF.
if (is_second_arf) num_frames = AOMMIN(num_frames, 3);
@@ -1051,10 +1077,10 @@ static void tf_setup_filtering_buffer(AV1_COMP *cpi,
assert(frames[tf_ctx->filter_frame_idx] == to_filter_frame);
av1_setup_src_planes(&cpi->td.mb, &to_filter_buf->img, 0, 0, num_planes,
- cpi->common.seq_params.sb_size);
+ cpi->common.seq_params->sb_size);
av1_setup_block_planes(&cpi->td.mb.e_mbd,
- cpi->common.seq_params.subsampling_x,
- cpi->common.seq_params.subsampling_y, num_planes);
+ cpi->common.seq_params->subsampling_x,
+ cpi->common.seq_params->subsampling_y, num_planes);
}
/*!\cond */
@@ -1174,8 +1200,8 @@ int av1_temporal_filter(AV1_COMP *cpi, const int filter_frame_lookahead_idx,
int *show_existing_arf) {
MultiThreadInfo *const mt_info = &cpi->mt_info;
// Basic informaton of the current frame.
- const GF_GROUP *const gf_group = &cpi->gf_group;
- const uint8_t group_idx = gf_group->index;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
+ const uint8_t group_idx = cpi->gf_frame_index;
TemporalFilterCtx *tf_ctx = &cpi->tf_ctx;
TemporalFilterData *tf_data = &cpi->td.tf_data;
// Filter one more ARF if the lookahead index is leq 7 (w.r.t. 9-th frame).
@@ -1236,9 +1262,9 @@ int av1_temporal_filter(AV1_COMP *cpi, const int filter_frame_lookahead_idx,
int top_index = 0;
int bottom_index = 0;
const int q = av1_rc_pick_q_and_bounds(
- cpi, &cpi->rc, cpi->oxcf.frm_dim_cfg.width,
- cpi->oxcf.frm_dim_cfg.height, group_idx, &bottom_index, &top_index);
- const int ac_q = av1_ac_quant_QTX(q, 0, cpi->common.seq_params.bit_depth);
+ cpi, cpi->oxcf.frm_dim_cfg.width, cpi->oxcf.frm_dim_cfg.height,
+ group_idx, &bottom_index, &top_index);
+ const int ac_q = av1_ac_quant_QTX(q, 0, cpi->common.seq_params->bit_depth);
const float threshold = 0.7f * ac_q * ac_q;
if (!is_second_arf) {
diff --git a/third_party/libaom/source/libaom/av1/encoder/temporal_filter.h b/third_party/libaom/source/libaom/av1/encoder/temporal_filter.h
index 2ae7dd4bda..3b9563755c 100644
--- a/third_party/libaom/source/libaom/av1/encoder/temporal_filter.h
+++ b/third_party/libaom/source/libaom/av1/encoder/temporal_filter.h
@@ -64,6 +64,14 @@ struct ThreadData;
// then the actual threshold will be 720 * 0.1 = 72. Similarly, the threshold
// for 360p videos will be 360 * 0.1 = 36.
#define TF_SEARCH_DISTANCE_THRESHOLD 0.1
+// 6. Threshold to identify if the q is in a relative high range.
+// Above this cutoff q, a stronger filtering is applied.
+// For a high q, the quantization throws away more information, and thus a
+// stronger filtering is less likely to distort the encoded quality, while a
+// stronger filtering could reduce bit rates.
+// Ror a low q, more details are expected to be retained. Filtering is thus
+// more conservative.
+#define TF_QINDEX_CUTOFF 128
#define NOISE_ESTIMATION_EDGE_THRESHOLD 50
@@ -276,11 +284,6 @@ static AOM_INLINE void tf_dealloc_data(TemporalFilterData *tf_data,
aom_free(tf_data->pred);
}
-// Helper function to compute number of blocks on either side of the frame.
-static INLINE int get_num_blocks(const int frame_length, const int mb_length) {
- return (frame_length + mb_length - 1) / mb_length;
-}
-
// Saves the state prior to temporal filter process.
// Inputs:
// mbd: Pointer to the block for filtering.
diff --git a/third_party/libaom/source/libaom/av1/encoder/tokenize.c b/third_party/libaom/source/libaom/av1/encoder/tokenize.c
index bc63cc00ae..7e16b29a9a 100644
--- a/third_party/libaom/source/libaom/av1/encoder/tokenize.c
+++ b/third_party/libaom/source/libaom/av1/encoder/tokenize.c
@@ -155,16 +155,18 @@ static void tokenize_vartx(ThreadData *td, TX_SIZE tx_size,
const int bsw = tx_size_wide_unit[sub_txs];
const int bsh = tx_size_high_unit[sub_txs];
const int step = bsw * bsh;
+ const int row_end =
+ AOMMIN(tx_size_high_unit[tx_size], max_blocks_high - blk_row);
+ const int col_end =
+ AOMMIN(tx_size_wide_unit[tx_size], max_blocks_wide - blk_col);
assert(bsw > 0 && bsh > 0);
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
- const int offsetr = blk_row + row;
+ for (int row = 0; row < row_end; row += bsh) {
+ const int offsetr = blk_row + row;
+ for (int col = 0; col < col_end; col += bsw) {
const int offsetc = blk_col + col;
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
-
tokenize_vartx(td, sub_txs, plane_bsize, offsetr, offsetc, block, plane,
arg);
block += step;
diff --git a/third_party/libaom/source/libaom/av1/encoder/tokenize.h b/third_party/libaom/source/libaom/av1/encoder/tokenize.h
index 51eb28cee6..f31dc96958 100644
--- a/third_party/libaom/source/libaom/av1/encoder/tokenize.h
+++ b/third_party/libaom/source/libaom/av1/encoder/tokenize.h
@@ -119,8 +119,8 @@ static INLINE unsigned int get_token_alloc(int mb_rows, int mb_cols,
// Allocate memory for token related info.
static AOM_INLINE void alloc_token_info(AV1_COMMON *cm, TokenInfo *token_info) {
int mi_rows_aligned_to_sb =
- ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, cm->seq_params.mib_size_log2);
- int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params.mib_size_log2;
+ ALIGN_POWER_OF_TWO(cm->mi_params.mi_rows, cm->seq_params->mib_size_log2);
+ int sb_rows = mi_rows_aligned_to_sb >> cm->seq_params->mib_size_log2;
const int num_planes = av1_num_planes(cm);
unsigned int tokens =
get_token_alloc(cm->mi_params.mb_rows, cm->mi_params.mb_cols,
diff --git a/third_party/libaom/source/libaom/av1/encoder/tpl_model.c b/third_party/libaom/source/libaom/av1/encoder/tpl_model.c
index 6ae957d4e5..e07ab3e311 100644
--- a/third_party/libaom/source/libaom/av1/encoder/tpl_model.c
+++ b/third_party/libaom/source/libaom/av1/encoder/tpl_model.c
@@ -35,38 +35,48 @@
#include "av1/encoder/reconinter_enc.h"
#include "av1/encoder/tpl_model.h"
-static AOM_INLINE int tpl_use_multithread(const AV1_COMP *cpi) {
- return cpi->mt_info.num_workers > 1 && !cpi->sf.tpl_sf.allow_compound_pred;
+static INLINE double exp_bounded(double v) {
+ // When v > 700 or <-700, the exp function will be close to overflow
+ // For details, see the "Notes" in the following link.
+ // https://en.cppreference.com/w/c/numeric/math/exp
+ if (v > 700) {
+ return DBL_MAX;
+ } else if (v < -700) {
+ return 0;
+ }
+ return exp(v);
}
-static AOM_INLINE void tpl_stats_record_txfm_block(TplDepFrame *tpl_frame,
- const tran_low_t *coeff) {
- aom_clear_system_state();
- // For transform larger than 16x16, the scale of coeff need to be adjusted.
- // It's not LOSSLESS_Q_STEP.
- assert(tpl_frame->coeff_num <= 256);
- for (int i = 0; i < tpl_frame->coeff_num; ++i) {
- tpl_frame->abs_coeff_sum[i] += abs(coeff[i]) / (double)LOSSLESS_Q_STEP;
+void av1_init_tpl_txfm_stats(TplTxfmStats *tpl_txfm_stats) {
+ tpl_txfm_stats->coeff_num = 256;
+ tpl_txfm_stats->txfm_block_count = 0;
+ memset(tpl_txfm_stats->abs_coeff_sum, 0,
+ sizeof(tpl_txfm_stats->abs_coeff_sum[0]) * tpl_txfm_stats->coeff_num);
+}
+
+void av1_accumulate_tpl_txfm_stats(const TplTxfmStats *sub_stats,
+ TplTxfmStats *accumulated_stats) {
+ accumulated_stats->txfm_block_count += sub_stats->txfm_block_count;
+ for (int i = 0; i < accumulated_stats->coeff_num; ++i) {
+ accumulated_stats->abs_coeff_sum[i] += sub_stats->abs_coeff_sum[i];
}
- ++tpl_frame->txfm_block_count;
}
-static AOM_INLINE void tpl_stats_update_abs_coeff_mean(TplDepFrame *tpl_frame) {
- aom_clear_system_state();
- for (int i = 0; i < tpl_frame->coeff_num; ++i) {
- tpl_frame->abs_coeff_mean[i] =
- tpl_frame->abs_coeff_sum[i] / tpl_frame->txfm_block_count;
+void av1_record_tpl_txfm_block(TplTxfmStats *tpl_txfm_stats,
+ const tran_low_t *coeff) {
+ // For transform larger than 16x16, the scale of coeff need to be adjusted.
+ // It's not LOSSLESS_Q_STEP.
+ assert(tpl_txfm_stats->coeff_num <= 256);
+ for (int i = 0; i < tpl_txfm_stats->coeff_num; ++i) {
+ tpl_txfm_stats->abs_coeff_sum[i] += abs(coeff[i]) / (double)LOSSLESS_Q_STEP;
}
+ ++tpl_txfm_stats->txfm_block_count;
}
-void av1_tpl_stats_init_txfm_stats(TplDepFrame *tpl_frame, int tpl_bsize_1d) {
- aom_clear_system_state();
- tpl_frame->txfm_block_count = 0;
- tpl_frame->coeff_num = tpl_bsize_1d * tpl_bsize_1d;
- memset(tpl_frame->abs_coeff_sum, 0, sizeof(tpl_frame->abs_coeff_sum));
- assert(sizeof(tpl_frame->abs_coeff_mean) /
- sizeof(tpl_frame->abs_coeff_mean[0]) ==
- tpl_frame->coeff_num);
+static AOM_INLINE void av1_tpl_store_txfm_stats(
+ TplParams *tpl_data, const TplTxfmStats *tpl_txfm_stats,
+ const int frame_index) {
+ tpl_data->txfm_stats_list[frame_index] = *tpl_txfm_stats;
}
static AOM_INLINE void get_quantize_error(const MACROBLOCK *x, int plane,
@@ -118,9 +128,11 @@ static AOM_INLINE void set_tpl_stats_block_size(uint8_t *block_mis_log2,
assert(*tpl_bsize_1d >= 16);
}
-void av1_setup_tpl_buffers(AV1_COMMON *const cm, TplParams *const tpl_data,
- int lag_in_frames) {
- CommonModeInfoParams *const mi_params = &cm->mi_params;
+void av1_setup_tpl_buffers(AV1_PRIMARY *const ppi,
+ CommonModeInfoParams *const mi_params, int width,
+ int height, int byte_alignment, int lag_in_frames) {
+ SequenceHeader *const seq_params = &ppi->seq_params;
+ TplParams *const tpl_data = &ppi->tpl_data;
set_tpl_stats_block_size(&tpl_data->tpl_stats_block_mis_log2,
&tpl_data->tpl_bsize_1d);
const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
@@ -139,7 +151,6 @@ void av1_setup_tpl_buffers(AV1_COMMON *const cm, TplParams *const tpl_data,
tpl_frame->stride = tpl_data->tpl_stats_buffer[frame].width;
tpl_frame->mi_rows = mi_params->mi_rows;
tpl_frame->mi_cols = mi_params->mi_cols;
- av1_tpl_stats_init_txfm_stats(tpl_frame, tpl_data->tpl_bsize_1d);
}
tpl_data->tpl_frame = &tpl_data->tpl_stats_buffer[REF_FRAMES + 1];
@@ -150,47 +161,33 @@ void av1_setup_tpl_buffers(AV1_COMMON *const cm, TplParams *const tpl_data,
// TODO(aomedia:2873): Explore the allocation of tpl buffers based on
// lag_in_frames.
for (int frame = 0; frame < MAX_LAG_BUFFERS; ++frame) {
- CHECK_MEM_ERROR(
- cm, tpl_data->tpl_stats_pool[frame],
+ AOM_CHECK_MEM_ERROR(
+ &ppi->error, tpl_data->tpl_stats_pool[frame],
aom_calloc(tpl_data->tpl_stats_buffer[frame].width *
tpl_data->tpl_stats_buffer[frame].height,
sizeof(*tpl_data->tpl_stats_buffer[frame].tpl_stats_ptr)));
- if (aom_alloc_frame_buffer(
- &tpl_data->tpl_rec_pool[frame], cm->width, cm->height,
- cm->seq_params.subsampling_x, cm->seq_params.subsampling_y,
- cm->seq_params.use_highbitdepth, tpl_data->border_in_pixels,
- cm->features.byte_alignment))
- aom_internal_error(&cm->error, AOM_CODEC_MEM_ERROR,
+
+ if (aom_alloc_frame_buffer(&tpl_data->tpl_rec_pool[frame], width, height,
+ seq_params->subsampling_x,
+ seq_params->subsampling_y,
+ seq_params->use_highbitdepth,
+ tpl_data->border_in_pixels, byte_alignment))
+ aom_internal_error(&ppi->error, AOM_CODEC_MEM_ERROR,
"Failed to allocate frame buffer");
}
}
-static AOM_INLINE void tpl_fwd_txfm(const int16_t *src_diff, int bw,
- tran_low_t *coeff, TX_SIZE tx_size,
- int bit_depth, int is_hbd) {
- TxfmParam txfm_param;
- txfm_param.tx_type = DCT_DCT;
- txfm_param.tx_size = tx_size;
- txfm_param.lossless = 0;
- txfm_param.tx_set_type = EXT_TX_SET_ALL16;
-
- txfm_param.bd = bit_depth;
- txfm_param.is_hbd = is_hbd;
- av1_fwd_txfm(src_diff, coeff, bw, &txfm_param);
-}
-
-static AOM_INLINE int64_t tpl_get_satd_cost(const MACROBLOCK *x,
+static AOM_INLINE int64_t tpl_get_satd_cost(BitDepthInfo bd_info,
int16_t *src_diff, int diff_stride,
const uint8_t *src, int src_stride,
const uint8_t *dst, int dst_stride,
tran_low_t *coeff, int bw, int bh,
TX_SIZE tx_size) {
- const MACROBLOCKD *xd = &x->e_mbd;
const int pix_num = bw * bh;
- av1_subtract_block(xd, bh, bw, src_diff, diff_stride, src, src_stride, dst,
- dst_stride);
- tpl_fwd_txfm(src_diff, bw, coeff, tx_size, xd->bd, is_cur_buf_hbd(xd));
+ av1_subtract_block(bd_info, bh, bw, src_diff, diff_stride, src, src_stride,
+ dst, dst_stride);
+ av1_quick_txfm(/*use_hadamard=*/0, tx_size, bd_info, src_diff, bw, coeff);
return aom_satd(coeff, pix_num);
}
@@ -198,7 +195,6 @@ static int rate_estimator(const tran_low_t *qcoeff, int eob, TX_SIZE tx_size) {
const SCAN_ORDER *const scan_order = &av1_scan_orders[tx_size][DCT_DCT];
assert((1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]) >= eob);
- aom_clear_system_state();
int rate_cost = 1;
for (int idx = 0; idx < eob; ++idx) {
@@ -215,11 +211,11 @@ static AOM_INLINE void txfm_quant_rdcost(
tran_low_t *qcoeff, tran_low_t *dqcoeff, int bw, int bh, TX_SIZE tx_size,
int *rate_cost, int64_t *recon_error, int64_t *sse) {
const MACROBLOCKD *xd = &x->e_mbd;
+ const BitDepthInfo bd_info = get_bit_depth_info(xd);
uint16_t eob;
- av1_subtract_block(xd, bh, bw, src_diff, diff_stride, src, src_stride, dst,
- dst_stride);
- tpl_fwd_txfm(src_diff, diff_stride, coeff, tx_size, xd->bd,
- is_cur_buf_hbd(xd));
+ av1_subtract_block(bd_info, bh, bw, src_diff, diff_stride, src, src_stride,
+ dst, dst_stride);
+ av1_quick_txfm(/*use_hadamard=*/0, tx_size, bd_info, src_diff, bw, coeff);
get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, &eob, recon_error,
sse);
@@ -316,13 +312,16 @@ static int is_alike_mv(int_mv candidate_mv, center_mv_t *center_mvs,
}
static void get_rate_distortion(
- int *rate_cost, int64_t *recon_error, int16_t *src_diff, tran_low_t *coeff,
- tran_low_t *qcoeff, tran_low_t *dqcoeff, AV1_COMMON *cm, MACROBLOCK *x,
+ int *rate_cost, int64_t *recon_error, int64_t *pred_error,
+ int16_t *src_diff, tran_low_t *coeff, tran_low_t *qcoeff,
+ tran_low_t *dqcoeff, AV1_COMMON *cm, MACROBLOCK *x,
const YV12_BUFFER_CONFIG *ref_frame_ptr[2], uint8_t *rec_buffer_pool[3],
const int rec_stride_pool[3], TX_SIZE tx_size, PREDICTION_MODE best_mode,
int mi_row, int mi_col, int use_y_only_rate_distortion) {
+ const SequenceHeader *seq_params = cm->seq_params;
*rate_cost = 0;
*recon_error = 1;
+ *pred_error = 1;
MACROBLOCKD *xd = &x->e_mbd;
int is_compound = (best_mode == NEW_NEWMV);
@@ -356,7 +355,8 @@ static void get_rate_distortion(
for (int ref = 0; ref < 1 + is_compound; ++ref) {
if (!is_inter_mode(best_mode)) {
av1_predict_intra_block(
- cm, xd, block_size_wide[bsize_plane], block_size_high[bsize_plane],
+ xd, seq_params->sb_size, seq_params->enable_intra_edge_filter,
+ block_size_wide[bsize_plane], block_size_high[bsize_plane],
max_txsize_rect_lookup[bsize_plane], best_mode, 0, 0,
FILTER_INTRA_MODES, dst_buffer, dst_buffer_stride, dst_buffer,
dst_buffer_stride, 0, 0, plane);
@@ -405,21 +405,24 @@ static void get_rate_distortion(
&this_rate, &this_recon_error, &sse);
*recon_error += this_recon_error;
+ *pred_error += sse;
*rate_cost += this_rate;
}
}
-static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
- int mi_col, BLOCK_SIZE bsize,
- TX_SIZE tx_size,
+static AOM_INLINE void mode_estimation(AV1_COMP *cpi,
+ TplTxfmStats *tpl_txfm_stats,
+ MACROBLOCK *x, int mi_row, int mi_col,
+ BLOCK_SIZE bsize, TX_SIZE tx_size,
TplDepStats *tpl_stats) {
AV1_COMMON *cm = &cpi->common;
- const GF_GROUP *gf_group = &cpi->gf_group;
+ const GF_GROUP *gf_group = &cpi->ppi->gf_group;
(void)gf_group;
MACROBLOCKD *xd = &x->e_mbd;
- TplParams *tpl_data = &cpi->tpl_data;
+ const BitDepthInfo bd_info = get_bit_depth_info(xd);
+ TplParams *tpl_data = &cpi->ppi->tpl_data;
TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_data->frame_idx];
const uint8_t block_mis_log2 = tpl_data->tpl_stats_block_mis_log2;
@@ -471,6 +474,7 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
uint8_t *predictor =
is_cur_buf_hbd(xd) ? CONVERT_TO_BYTEPTR(predictor8) : predictor8;
int64_t recon_error = 1;
+ int64_t pred_error = 1;
memset(tpl_stats, 0, sizeof(*tpl_stats));
tpl_stats->ref_frame_index[0] = -1;
@@ -493,7 +497,6 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
// Pre-load the bottom left line.
if (xd->left_available &&
mi_row + tx_size_high_unit[tx_size] < xd->tile.mi_row_end) {
-#if CONFIG_AV1_HIGHBITDEPTH
if (is_cur_buf_hbd(xd)) {
uint16_t *dst = CONVERT_TO_SHORTPTR(dst_buffer);
for (int i = 0; i < bw; ++i)
@@ -504,26 +507,24 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
dst_buffer[(bw + i) * dst_buffer_stride - 1] =
dst_buffer[(bw - 1) * dst_buffer_stride - 1];
}
-#else
- for (int i = 0; i < bw; ++i)
- dst_buffer[(bw + i) * dst_buffer_stride - 1] =
- dst_buffer[(bw - 1) * dst_buffer_stride - 1];
-#endif
}
// if cpi->sf.tpl_sf.prune_intra_modes is on, then search only DC_PRED,
// H_PRED, and V_PRED
const PREDICTION_MODE last_intra_mode =
cpi->sf.tpl_sf.prune_intra_modes ? D45_PRED : INTRA_MODE_END;
+ const SequenceHeader *seq_params = cm->seq_params;
for (PREDICTION_MODE mode = INTRA_MODE_START; mode < last_intra_mode;
++mode) {
- av1_predict_intra_block(cm, xd, block_size_wide[bsize],
- block_size_high[bsize], tx_size, mode, 0, 0,
- FILTER_INTRA_MODES, dst_buffer, dst_buffer_stride,
- predictor, bw, 0, 0, 0);
+ av1_predict_intra_block(xd, seq_params->sb_size,
+ seq_params->enable_intra_edge_filter,
+ block_size_wide[bsize], block_size_high[bsize],
+ tx_size, mode, 0, 0, FILTER_INTRA_MODES, dst_buffer,
+ dst_buffer_stride, predictor, bw, 0, 0, 0);
- intra_cost = tpl_get_satd_cost(x, src_diff, bw, src_mb_buffer, src_stride,
- predictor, bw, coeff, bw, bh, tx_size);
+ intra_cost =
+ tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride,
+ predictor, bw, coeff, bw, bh, tx_size);
if (intra_cost < best_intra_cost) {
best_intra_cost = intra_cost;
@@ -607,7 +608,7 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
for (idx = 0; idx < refmv_count; ++idx) {
FULLPEL_MV mv = get_fullmv_from_mv(&center_mvs[idx].mv.as_mv);
clamp_fullmv(&mv, &x->mv_limits);
- center_mvs[idx].sad = (int)cpi->fn_ptr[bsize].sdf(
+ center_mvs[idx].sad = (int)cpi->ppi->fn_ptr[bsize].sdf(
src_mb_buffer, src_stride, &ref_mb[mv.row * ref_stride + mv.col],
ref_stride);
}
@@ -653,8 +654,9 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
av1_enc_build_one_inter_predictor(predictor, bw, &best_rfidx_mv.as_mv,
&inter_pred_params);
- inter_cost = tpl_get_satd_cost(x, src_diff, bw, src_mb_buffer, src_stride,
- predictor, bw, coeff, bw, bh, tx_size);
+ inter_cost =
+ tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride,
+ predictor, bw, coeff, bw, bh, tx_size);
// Store inter cost for each ref frame
tpl_stats->pred_error[rf_idx] = AOMMAX(1, inter_cost);
@@ -732,8 +734,9 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
av1_enc_build_one_inter_predictor(predictor, bw, &tmp_mv[ref].as_mv,
&inter_pred_params);
}
- inter_cost = tpl_get_satd_cost(x, src_diff, bw, src_mb_buffer, src_stride,
- predictor, bw, coeff, bw, bh, tx_size);
+ inter_cost =
+ tpl_get_satd_cost(bd_info, src_diff, bw, src_mb_buffer, src_stride,
+ predictor, bw, coeff, bw, bh, tx_size);
if (inter_cost < best_inter_cost) {
best_cmp_rf_idx = cmp_rf_idx;
best_inter_cost = inter_cost;
@@ -760,8 +763,8 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
: NULL,
};
int rate_cost = 1;
- get_rate_distortion(&rate_cost, &recon_error, src_diff, coeff, qcoeff,
- dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
+ get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff,
+ qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
use_y_only_rate_distortion);
tpl_stats->srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
@@ -772,7 +775,8 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
tpl_stats->inter_cost = best_inter_cost << TPL_DEP_COST_SCALE_LOG2;
tpl_stats->intra_cost = best_intra_cost << TPL_DEP_COST_SCALE_LOG2;
- tpl_stats->srcrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
+ tpl_stats->srcrf_dist = recon_error << TPL_DEP_COST_SCALE_LOG2;
+ tpl_stats->srcrf_sse = pred_error << TPL_DEP_COST_SCALE_LOG2;
// Final encode
int rate_cost = 0;
@@ -786,21 +790,19 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
best_mode == NEW_NEWMV
? tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][1]]
: NULL;
- get_rate_distortion(&rate_cost, &recon_error, src_diff, coeff, qcoeff,
- dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
+ get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff,
+ qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
use_y_only_rate_distortion);
- if (!tpl_use_multithread(cpi)) {
- // TODO(angiebird): make this work for multithread
- tpl_stats_record_txfm_block(tpl_frame, coeff);
- }
+ av1_record_tpl_txfm_block(tpl_txfm_stats, coeff);
tpl_stats->recrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
tpl_stats->recrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
if (!is_inter_mode(best_mode)) {
tpl_stats->srcrf_dist = recon_error << (TPL_DEP_COST_SCALE_LOG2);
tpl_stats->srcrf_rate = rate_cost << TPL_DEP_COST_SCALE_LOG2;
+ tpl_stats->srcrf_sse = pred_error << TPL_DEP_COST_SCALE_LOG2;
}
tpl_stats->recrf_dist = AOMMAX(tpl_stats->srcrf_dist, tpl_stats->recrf_dist);
@@ -810,8 +812,8 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
ref_frame_ptr[0] = tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][0]];
ref_frame_ptr[1] =
tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][1]];
- get_rate_distortion(&rate_cost, &recon_error, src_diff, coeff, qcoeff,
- dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
+ get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff,
+ qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
use_y_only_rate_distortion);
tpl_stats->cmp_recrf_dist[0] = recon_error << TPL_DEP_COST_SCALE_LOG2;
@@ -831,8 +833,8 @@ static AOM_INLINE void mode_estimation(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
ref_frame_ptr[0] =
tpl_data->src_ref_frame[comp_ref_frames[best_cmp_rf_idx][0]];
ref_frame_ptr[1] = tpl_data->ref_frame[comp_ref_frames[best_cmp_rf_idx][1]];
- get_rate_distortion(&rate_cost, &recon_error, src_diff, coeff, qcoeff,
- dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
+ get_rate_distortion(&rate_cost, &recon_error, &pred_error, src_diff, coeff,
+ qcoeff, dqcoeff, cm, x, ref_frame_ptr, rec_buffer_pool,
rec_stride_pool, tx_size, best_mode, mi_row, mi_col,
use_y_only_rate_distortion);
tpl_stats->cmp_recrf_dist[1] = recon_error << TPL_DEP_COST_SCALE_LOG2;
@@ -887,41 +889,24 @@ static int round_floor(int ref_pos, int bsize_pix) {
return round;
}
-static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row,
- int ref_pos_col, int block, BLOCK_SIZE bsize) {
- int width = 0, height = 0;
- int bw = 4 << mi_size_wide_log2[bsize];
- int bh = 4 << mi_size_high_log2[bsize];
-
- switch (block) {
- case 0:
- width = grid_pos_col + bw - ref_pos_col;
- height = grid_pos_row + bh - ref_pos_row;
- break;
- case 1:
- width = ref_pos_col + bw - grid_pos_col;
- height = grid_pos_row + bh - ref_pos_row;
- break;
- case 2:
- width = grid_pos_col + bw - ref_pos_col;
- height = ref_pos_row + bh - grid_pos_row;
- break;
- case 3:
- width = ref_pos_col + bw - grid_pos_col;
- height = ref_pos_row + bh - grid_pos_row;
- break;
- default: assert(0);
+int av1_get_overlap_area(int row_a, int col_a, int row_b, int col_b, int width,
+ int height) {
+ int min_row = AOMMAX(row_a, row_b);
+ int max_row = AOMMIN(row_a + height, row_b + height);
+ int min_col = AOMMAX(col_a, col_b);
+ int max_col = AOMMIN(col_a + width, col_b + width);
+ if (min_row < max_row && min_col < max_col) {
+ return (max_row - min_row) * (max_col - min_col);
}
- int overlap_area = width * height;
- return overlap_area;
+ return 0;
}
int av1_tpl_ptr_pos(int mi_row, int mi_col, int stride, uint8_t right_shift) {
return (mi_row >> right_shift) * stride + (mi_col >> right_shift);
}
-static int64_t delta_rate_cost(int64_t delta_rate, int64_t recrf_dist,
- int64_t srcrf_dist, int pix_num) {
+int64_t av1_delta_rate_cost(int64_t delta_rate, int64_t recrf_dist,
+ int64_t srcrf_dist, int pix_num) {
double beta = (double)srcrf_dist / recrf_dist;
int64_t rate_cost = delta_rate;
@@ -952,7 +937,6 @@ static int64_t delta_rate_cost(int64_t delta_rate, int64_t recrf_dist,
static AOM_INLINE void tpl_model_update_b(TplParams *const tpl_data, int mi_row,
int mi_col, const BLOCK_SIZE bsize,
int frame_idx, int ref) {
- aom_clear_system_state();
TplDepFrame *tpl_frame_ptr = &tpl_data->tpl_frame[frame_idx];
TplDepStats *tpl_ptr = tpl_frame_ptr->tpl_stats_ptr;
TplDepFrame *tpl_frame = tpl_data->tpl_frame;
@@ -998,8 +982,8 @@ static AOM_INLINE void tpl_model_update_b(TplParams *const tpl_data, int mi_row,
tpl_stats_ptr->recrf_dist));
int64_t delta_rate = tpl_stats_ptr->recrf_rate - srcrf_rate;
int64_t mc_dep_rate =
- delta_rate_cost(tpl_stats_ptr->mc_dep_rate, tpl_stats_ptr->recrf_dist,
- srcrf_dist, pix_num);
+ av1_delta_rate_cost(tpl_stats_ptr->mc_dep_rate, tpl_stats_ptr->recrf_dist,
+ srcrf_dist, pix_num);
for (block = 0; block < 4; ++block) {
int grid_pos_row = grid_pos_row_base + bh * (block >> 1);
@@ -1007,8 +991,8 @@ static AOM_INLINE void tpl_model_update_b(TplParams *const tpl_data, int mi_row,
if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE &&
grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
- int overlap_area = get_overlap_area(
- grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize);
+ int overlap_area = av1_get_overlap_area(grid_pos_row, grid_pos_col,
+ ref_pos_row, ref_pos_col, bw, bh);
int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height;
int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width;
assert((1 << block_mis_log2) == mi_height);
@@ -1043,6 +1027,7 @@ static AOM_INLINE void tpl_model_store(TplDepStats *tpl_stats_ptr, int mi_row,
tpl_ptr->intra_cost = AOMMAX(1, tpl_ptr->intra_cost);
tpl_ptr->inter_cost = AOMMAX(1, tpl_ptr->inter_cost);
tpl_ptr->srcrf_dist = AOMMAX(1, tpl_ptr->srcrf_dist);
+ tpl_ptr->srcrf_sse = AOMMAX(1, tpl_ptr->srcrf_sse);
tpl_ptr->recrf_dist = AOMMAX(1, tpl_ptr->recrf_dist);
tpl_ptr->srcrf_rate = AOMMAX(1, tpl_ptr->srcrf_rate);
tpl_ptr->recrf_rate = AOMMAX(1, tpl_ptr->recrf_rate);
@@ -1068,12 +1053,12 @@ static AOM_INLINE int get_gop_length(const GF_GROUP *gf_group) {
// Initialize the mc_flow parameters used in computing tpl data.
static AOM_INLINE void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,
int pframe_qindex) {
- TplParams *const tpl_data = &cpi->tpl_data;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
TplDepFrame *tpl_frame = &tpl_data->tpl_frame[frame_idx];
const YV12_BUFFER_CONFIG *this_frame = tpl_frame->gf_picture;
const YV12_BUFFER_CONFIG *ref_frames_ordered[INTER_REFS_PER_FRAME];
uint32_t ref_frame_display_indices[INTER_REFS_PER_FRAME];
- GF_GROUP *gf_group = &cpi->gf_group;
+ GF_GROUP *gf_group = &cpi->ppi->gf_group;
int ref_pruning_enabled = is_frame_eligible_for_ref_pruning(
gf_group, cpi->sf.inter_sf.selective_ref_frame,
cpi->sf.tpl_sf.prune_ref_frames_in_tpl, frame_idx);
@@ -1084,6 +1069,7 @@ static AOM_INLINE void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,
ThreadData *td = &cpi->td;
MACROBLOCK *x = &td->mb;
MACROBLOCKD *xd = &x->e_mbd;
+ TplTxfmStats *tpl_txfm_stats = &td->tpl_txfm_stats;
tpl_data->frame_idx = frame_idx;
tpl_reset_src_ref_frames(tpl_data);
av1_tile_init(&xd->tile, cm, 0, 0);
@@ -1161,18 +1147,21 @@ static AOM_INLINE void init_mc_flow_dispenser(AV1_COMP *cpi, int frame_idx,
tpl_frame->base_rdmult =
av1_compute_rd_mult_based_on_qindex(cpi, pframe_qindex) / 6;
+
+ av1_init_tpl_txfm_stats(tpl_txfm_stats);
}
// This function stores the motion estimation dependencies of all the blocks in
// a row
-void av1_mc_flow_dispenser_row(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
- BLOCK_SIZE bsize, TX_SIZE tx_size) {
+void av1_mc_flow_dispenser_row(AV1_COMP *cpi, TplTxfmStats *tpl_txfm_stats,
+ MACROBLOCK *x, int mi_row, BLOCK_SIZE bsize,
+ TX_SIZE tx_size) {
AV1_COMMON *const cm = &cpi->common;
MultiThreadInfo *const mt_info = &cpi->mt_info;
AV1TplRowMultiThreadInfo *const tpl_row_mt = &mt_info->tpl_row_mt;
const CommonModeInfoParams *const mi_params = &cm->mi_params;
const int mi_width = mi_size_wide[bsize];
- TplParams *const tpl_data = &cpi->tpl_data;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
TplDepFrame *tpl_frame = &tpl_data->tpl_frame[tpl_data->frame_idx];
MACROBLOCKD *xd = &x->e_mbd;
@@ -1194,7 +1183,8 @@ void av1_mc_flow_dispenser_row(AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
xd->mb_to_left_edge = -GET_MV_SUBPEL(mi_col * MI_SIZE);
xd->mb_to_right_edge =
GET_MV_SUBPEL(mi_params->mi_cols - mi_width - mi_col);
- mode_estimation(cpi, x, mi_row, mi_col, bsize, tx_size, &tpl_stats);
+ mode_estimation(cpi, tpl_txfm_stats, x, mi_row, mi_col, bsize, tx_size,
+ &tpl_stats);
// Motion flow dependency dispenser.
tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, tpl_frame->stride,
@@ -1210,40 +1200,36 @@ static AOM_INLINE void mc_flow_dispenser(AV1_COMP *cpi) {
ThreadData *td = &cpi->td;
MACROBLOCK *x = &td->mb;
MACROBLOCKD *xd = &x->e_mbd;
- const BLOCK_SIZE bsize = convert_length_to_bsize(cpi->tpl_data.tpl_bsize_1d);
+ const BLOCK_SIZE bsize =
+ convert_length_to_bsize(cpi->ppi->tpl_data.tpl_bsize_1d);
const TX_SIZE tx_size = max_txsize_lookup[bsize];
const int mi_height = mi_size_high[bsize];
for (int mi_row = 0; mi_row < mi_params->mi_rows; mi_row += mi_height) {
// Motion estimation row boundary
av1_set_mv_row_limits(mi_params, &x->mv_limits, mi_row, mi_height,
- cpi->tpl_data.border_in_pixels);
+ cpi->ppi->tpl_data.border_in_pixels);
xd->mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE);
xd->mb_to_bottom_edge =
GET_MV_SUBPEL((mi_params->mi_rows - mi_height - mi_row) * MI_SIZE);
- av1_mc_flow_dispenser_row(cpi, x, mi_row, bsize, tx_size);
- }
- if (!tpl_use_multithread(cpi)) {
- // TODO(angiebird): make this work for multithread
- TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[cpi->tpl_data.frame_idx];
- tpl_stats_update_abs_coeff_mean(tpl_frame);
+ av1_mc_flow_dispenser_row(cpi, &td->tpl_txfm_stats, x, mi_row, bsize,
+ tx_size);
}
}
-static void mc_flow_synthesizer(AV1_COMP *cpi, int frame_idx) {
- AV1_COMMON *cm = &cpi->common;
- TplParams *const tpl_data = &cpi->tpl_data;
-
+static void mc_flow_synthesizer(TplParams *tpl_data, int frame_idx, int mi_rows,
+ int mi_cols) {
+ if (!frame_idx) {
+ return;
+ }
const BLOCK_SIZE bsize = convert_length_to_bsize(tpl_data->tpl_bsize_1d);
const int mi_height = mi_size_high[bsize];
const int mi_width = mi_size_wide[bsize];
assert(mi_height == (1 << tpl_data->tpl_stats_block_mis_log2));
assert(mi_width == (1 << tpl_data->tpl_stats_block_mis_log2));
- for (int mi_row = 0; mi_row < cm->mi_params.mi_rows; mi_row += mi_height) {
- for (int mi_col = 0; mi_col < cm->mi_params.mi_cols; mi_col += mi_width) {
- if (frame_idx) {
- tpl_model_update(tpl_data, mi_row, mi_col, frame_idx);
- }
+ for (int mi_row = 0; mi_row < mi_rows; mi_row += mi_height) {
+ for (int mi_col = 0; mi_col < mi_cols; mi_col += mi_width) {
+ tpl_model_update(tpl_data, mi_row, mi_col, frame_idx);
}
}
}
@@ -1253,12 +1239,17 @@ static AOM_INLINE void init_gop_frames_for_tpl(
GF_GROUP *gf_group, int gop_eval, int *tpl_group_frames,
const EncodeFrameInput *const frame_input, int *pframe_qindex) {
AV1_COMMON *cm = &cpi->common;
- int cur_frame_idx = gf_group->index;
+ int cur_frame_idx = cpi->gf_frame_index;
*pframe_qindex = 0;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ RefFrameMapPair ref_frame_map_pairs[REF_FRAMES];
+ init_ref_map_pair(cpi, ref_frame_map_pairs);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
RefBufferStack ref_buffer_stack = cpi->ref_buffer_stack;
EncodeFrameParams frame_params = *init_frame_params;
- TplParams *const tpl_data = &cpi->tpl_data;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
int ref_picture_map[REF_FRAMES];
@@ -1288,7 +1279,7 @@ static AOM_INLINE void init_gop_frames_for_tpl(
TplDepFrame *tpl_frame = &tpl_data->tpl_frame[gf_index];
FRAME_UPDATE_TYPE frame_update_type = gf_group->update_type[gf_index];
int frame_display_index = gf_index == gf_group->size
- ? cpi->rc.baseline_gf_interval
+ ? cpi->ppi->p_rc.baseline_gf_interval
: gf_group->cur_frame_idx[gf_index] +
gf_group->arf_src_offset[gf_index];
@@ -1317,7 +1308,7 @@ static AOM_INLINE void init_gop_frames_for_tpl(
}
if (gop_eval && cpi->rc.frames_since_key > 0 &&
gf_group->arf_index == gf_index)
- tpl_frame->gf_picture = &cpi->alt_ref_buffer;
+ tpl_frame->gf_picture = &cpi->ppi->alt_ref_buffer;
// 'cm->current_frame.frame_number' is the display number
// of the current frame.
@@ -1338,15 +1329,45 @@ static AOM_INLINE void init_gop_frames_for_tpl(
tpl_frame->tpl_stats_ptr = tpl_data->tpl_stats_pool[process_frame_count];
++process_frame_count;
}
-
- av1_get_ref_frames(cpi, &ref_buffer_stack);
- int refresh_mask = av1_get_refresh_frame_flags(
- cpi, &frame_params, frame_update_type, &ref_buffer_stack);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ const int true_disp = (int)(tpl_frame->frame_display_index);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
+ av1_get_ref_frames(&ref_buffer_stack,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ cpi, ref_frame_map_pairs, true_disp,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ cm->remapped_ref_idx);
+
+ int refresh_mask =
+ av1_get_refresh_frame_flags(cpi, &frame_params, frame_update_type,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ true_disp, ref_frame_map_pairs,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ &ref_buffer_stack);
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ // Make the frames marked as is_frame_non_ref to non-reference frames.
+ if (cpi->ppi->gf_group.is_frame_non_ref[gf_index]) refresh_mask = 0;
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
int refresh_frame_map_index = av1_get_refresh_ref_frame_map(refresh_mask);
+#if !CONFIG_FRAME_PARALLEL_ENCODE
av1_update_ref_frame_map(cpi, frame_update_type, frame_params.frame_type,
frame_params.show_existing_frame,
refresh_frame_map_index, &ref_buffer_stack);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ if (refresh_frame_map_index < REF_FRAMES &&
+ refresh_frame_map_index != INVALID_IDX) {
+ ref_frame_map_pairs[refresh_frame_map_index].disp_order =
+ AOMMAX(0, true_disp);
+ ref_frame_map_pairs[refresh_frame_map_index].pyr_level =
+ get_true_pyr_level(gf_group->layer_depth[gf_index], true_disp,
+ cpi->ppi->gf_group.max_layer_depth);
+ }
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
tpl_frame->ref_map_index[i - LAST_FRAME] =
@@ -1360,8 +1381,9 @@ static AOM_INLINE void init_gop_frames_for_tpl(
if (cpi->rc.frames_since_key == 0) return;
int extend_frame_count = 0;
- int extend_frame_length = AOMMIN(
- MAX_TPL_EXTEND, cpi->rc.frames_to_key - cpi->rc.baseline_gf_interval);
+ int extend_frame_length =
+ AOMMIN(MAX_TPL_EXTEND,
+ cpi->rc.frames_to_key - cpi->ppi->p_rc.baseline_gf_interval);
int frame_display_index = gf_group->cur_frame_idx[gop_length - 1] +
gf_group->arf_src_offset[gop_length - 1] + 1;
@@ -1400,14 +1422,37 @@ static AOM_INLINE void init_gop_frames_for_tpl(
gf_group->update_type[gf_index] = LF_UPDATE;
gf_group->q_val[gf_index] = *pframe_qindex;
-
- av1_get_ref_frames(cpi, &ref_buffer_stack);
- int refresh_mask = av1_get_refresh_frame_flags(
- cpi, &frame_params, frame_update_type, &ref_buffer_stack);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ const int true_disp = (int)(tpl_frame->frame_display_index);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ av1_get_ref_frames(&ref_buffer_stack,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ cpi, ref_frame_map_pairs, true_disp,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ cm->remapped_ref_idx);
+ int refresh_mask =
+ av1_get_refresh_frame_flags(cpi, &frame_params, frame_update_type,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ true_disp, ref_frame_map_pairs,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ &ref_buffer_stack);
int refresh_frame_map_index = av1_get_refresh_ref_frame_map(refresh_mask);
+#if !CONFIG_FRAME_PARALLEL_ENCODE
av1_update_ref_frame_map(cpi, frame_update_type, frame_params.frame_type,
frame_params.show_existing_frame,
refresh_frame_map_index, &ref_buffer_stack);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ if (refresh_frame_map_index < REF_FRAMES &&
+ refresh_frame_map_index != INVALID_IDX) {
+ ref_frame_map_pairs[refresh_frame_map_index].disp_order =
+ AOMMAX(0, true_disp);
+ ref_frame_map_pairs[refresh_frame_map_index].pyr_level =
+ get_true_pyr_level(gf_group->layer_depth[gf_index], true_disp,
+ cpi->ppi->gf_group.max_layer_depth);
+ }
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
for (int i = LAST_FRAME; i <= ALTREF_FRAME; ++i)
tpl_frame->ref_map_index[i - LAST_FRAME] =
@@ -1424,8 +1469,16 @@ static AOM_INLINE void init_gop_frames_for_tpl(
++extend_frame_count;
++frame_display_index;
}
-
- av1_get_ref_frames(cpi, &cpi->ref_buffer_stack);
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ TplDepFrame *tpl_frame = &tpl_data->tpl_frame[cur_frame_idx];
+ const int true_disp = (int)(tpl_frame->frame_display_index);
+ init_ref_map_pair(cpi, ref_frame_map_pairs);
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ av1_get_ref_frames(&cpi->ref_buffer_stack,
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ cpi, ref_frame_map_pairs, true_disp,
+#endif // CONFIG_FRAME_PARALLEL_ENCODE
+ cm->remapped_ref_idx);
}
void av1_init_tpl_stats(TplParams *const tpl_data) {
@@ -1440,9 +1493,47 @@ void av1_init_tpl_stats(TplParams *const tpl_data) {
sizeof(*tpl_frame->tpl_stats_ptr));
tpl_frame->is_valid = 0;
}
- for (frame_idx = 0; frame_idx < MAX_LENGTH_TPL_FRAME_STATS; ++frame_idx) {
- TplDepFrame *tpl_frame = &tpl_data->tpl_stats_buffer[frame_idx];
- av1_tpl_stats_init_txfm_stats(tpl_frame, tpl_data->tpl_bsize_1d);
+#if CONFIG_BITRATE_ACCURACY
+ tpl_data->estimated_gop_bitrate = 0;
+ tpl_data->actual_gop_bitrate = 0;
+#endif
+}
+
+static AOM_INLINE int eval_gop_length(double *beta, int gop_eval) {
+ switch (gop_eval) {
+ case 1:
+ // Allow larger GOP size if the base layer ARF has higher dependency
+ // factor than the intermediate ARF and both ARFs have reasonably high
+ // dependency factors.
+ return (beta[0] >= beta[1] + 0.7) && beta[0] > 8.0;
+ case 2:
+ if ((beta[0] >= beta[1] + 0.4) && beta[0] > 1.6)
+ return 1; // Don't shorten the gf interval
+ else if ((beta[0] < beta[1] + 0.1) || beta[0] <= 1.4)
+ return 0; // Shorten the gf interval
+ else
+ return 2; // Cannot decide the gf interval, so redo the
+ // tpl stats calculation.
+ case 3: return beta[0] > 1.1;
+ default: return 2;
+ }
+}
+
+// TODO(jingning): Restructure av1_rc_pick_q_and_bounds() to narrow down
+// the scope of input arguments.
+void av1_tpl_preload_rc_estimate(AV1_COMP *cpi,
+ const EncodeFrameParams *const frame_params) {
+ AV1_COMMON *cm = &cpi->common;
+ GF_GROUP *gf_group = &cpi->ppi->gf_group;
+ int bottom_index, top_index;
+ cm->current_frame.frame_type = frame_params->frame_type;
+ for (int gf_index = cpi->gf_frame_index; gf_index < gf_group->size;
+ ++gf_index) {
+ cm->current_frame.frame_type = gf_group->frame_type[gf_index];
+ cm->show_frame = gf_group->update_type[gf_index] != ARF_UPDATE &&
+ gf_group->update_type[gf_index] != INTNL_ARF_UPDATE;
+ gf_group->q_val[gf_index] = av1_rc_pick_q_and_bounds(
+ cpi, cm->width, cm->height, gf_index, &bottom_index, &top_index);
}
}
@@ -1455,10 +1546,17 @@ int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
AV1_COMMON *cm = &cpi->common;
MultiThreadInfo *const mt_info = &cpi->mt_info;
AV1TplRowMultiThreadInfo *const tpl_row_mt = &mt_info->tpl_row_mt;
- GF_GROUP *gf_group = &cpi->gf_group;
- int bottom_index, top_index;
+ GF_GROUP *gf_group = &cpi->ppi->gf_group;
EncodeFrameParams this_frame_params = *frame_params;
- TplParams *const tpl_data = &cpi->tpl_data;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
+ int approx_gop_eval = (gop_eval > 1);
+ int num_arf_layers = MAX_ARF_LAYERS;
+
+ // When gop_eval is set to 2, tpl stats calculation is done for ARFs from base
+ // layer, (base+1) layer and (base+2) layer. When gop_eval is set to 3,
+ // tpl stats calculation is limited to ARFs from base layer and (base+1)
+ // layer.
+ if (approx_gop_eval) num_arf_layers = (gop_eval == 2) ? 3 : 2;
if (cpi->superres_mode != AOM_SUPERRES_NONE) {
assert(cpi->superres_mode != AOM_SUPERRES_AUTO);
@@ -1467,7 +1565,8 @@ int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
}
cm->current_frame.frame_type = frame_params->frame_type;
- for (int gf_index = gf_group->index; gf_index < gf_group->size; ++gf_index) {
+ for (int gf_index = cpi->gf_frame_index; gf_index < gf_group->size;
+ ++gf_index) {
cm->current_frame.frame_type = gf_group->frame_type[gf_index];
av1_configure_buffer_updates(cpi, &this_frame_params.refresh_frame,
gf_group->update_type[gf_index],
@@ -1475,13 +1574,6 @@ int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
memcpy(&cpi->refresh_frame, &this_frame_params.refresh_frame,
sizeof(cpi->refresh_frame));
-
- cm->show_frame = gf_group->update_type[gf_index] != ARF_UPDATE &&
- gf_group->update_type[gf_index] != INTNL_ARF_UPDATE;
-
- gf_group->q_val[gf_index] =
- av1_rc_pick_q_and_bounds(cpi, &cpi->rc, cm->width, cm->height, gf_index,
- &bottom_index, &top_index);
}
int pframe_qindex;
@@ -1489,7 +1581,7 @@ int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
init_gop_frames_for_tpl(cpi, frame_params, gf_group, gop_eval,
&tpl_gf_group_frames, frame_input, &pframe_qindex);
- cpi->rc.base_layer_qp = pframe_qindex;
+ cpi->ppi->p_rc.base_layer_qp = pframe_qindex;
av1_init_tpl_stats(tpl_data);
@@ -1505,37 +1597,59 @@ int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
av1_fill_mv_costs(&cm->fc->nmvc, cm->features.cur_frame_force_integer_mv,
cm->features.allow_high_precision_mv, cpi->td.mb.mv_costs);
+ const int gop_length = get_gop_length(gf_group);
// Backward propagation from tpl_group_frames to 1.
- for (int frame_idx = gf_group->index; frame_idx < tpl_gf_group_frames;
+ for (int frame_idx = cpi->gf_frame_index; frame_idx < tpl_gf_group_frames;
++frame_idx) {
if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE ||
gf_group->update_type[frame_idx] == OVERLAY_UPDATE)
continue;
+ // When approx_gop_eval = 1, skip tpl stats calculation for higher layer
+ // frames and for frames beyond gop length.
+ if (approx_gop_eval && (gf_group->layer_depth[frame_idx] > num_arf_layers ||
+ frame_idx >= gop_length))
+ continue;
+
init_mc_flow_dispenser(cpi, frame_idx, pframe_qindex);
- if (tpl_use_multithread(cpi)) {
+ if (mt_info->num_workers > 1) {
tpl_row_mt->sync_read_ptr = av1_tpl_row_mt_sync_read;
tpl_row_mt->sync_write_ptr = av1_tpl_row_mt_sync_write;
av1_mc_flow_dispenser_mt(cpi);
} else {
mc_flow_dispenser(cpi);
}
+ av1_tpl_store_txfm_stats(tpl_data, &cpi->td.tpl_txfm_stats, frame_idx);
aom_extend_frame_borders(tpl_data->tpl_frame[frame_idx].rec_picture,
av1_num_planes(cm));
}
- for (int frame_idx = tpl_gf_group_frames - 1; frame_idx >= gf_group->index;
- --frame_idx) {
+#if CONFIG_BITRATE_ACCURACY
+ tpl_data->estimated_gop_bitrate = av1_estimate_gop_bitrate(
+ gf_group->q_val, gf_group->size, tpl_data->txfm_stats_list);
+ if (gf_group->update_type[cpi->gf_frame_index] == ARF_UPDATE &&
+ gop_eval == 0) {
+ printf("\nestimated bitrate: %f\n", tpl_data->estimated_gop_bitrate);
+ }
+#endif
+
+ for (int frame_idx = tpl_gf_group_frames - 1;
+ frame_idx >= cpi->gf_frame_index; --frame_idx) {
if (gf_group->update_type[frame_idx] == INTNL_OVERLAY_UPDATE ||
gf_group->update_type[frame_idx] == OVERLAY_UPDATE)
continue;
- mc_flow_synthesizer(cpi, frame_idx);
+ if (approx_gop_eval && (gf_group->layer_depth[frame_idx] > num_arf_layers ||
+ frame_idx >= gop_length))
+ continue;
+
+ mc_flow_synthesizer(tpl_data, frame_idx, cm->mi_params.mi_rows,
+ cm->mi_params.mi_cols);
}
av1_configure_buffer_updates(cpi, &this_frame_params.refresh_frame,
- gf_group->update_type[gf_group->index],
+ gf_group->update_type[cpi->gf_frame_index],
frame_params->frame_type, 0);
cm->current_frame.frame_type = frame_params->frame_type;
cm->show_frame = frame_params->show_frame;
@@ -1592,21 +1706,17 @@ int av1_tpl_setup_stats(AV1_COMP *cpi, int gop_eval,
#if CONFIG_COLLECT_COMPONENT_TIMING
end_timing(cpi, av1_tpl_setup_stats_time);
#endif
-
- // Allow larger GOP size if the base layer ARF has higher dependency factor
- // than the intermediate ARF and both ARFs have reasonably high dependency
- // factors.
- return (beta[0] >= beta[1] + 0.7) && beta[0] > 8.0;
+ return eval_gop_length(beta, gop_eval);
}
void av1_tpl_rdmult_setup(AV1_COMP *cpi) {
const AV1_COMMON *const cm = &cpi->common;
- const GF_GROUP *const gf_group = &cpi->gf_group;
- const int tpl_idx = gf_group->index;
+ const int tpl_idx = cpi->gf_frame_index;
- assert(IMPLIES(gf_group->size > 0, tpl_idx < gf_group->size));
+ assert(
+ IMPLIES(cpi->ppi->gf_group.size > 0, tpl_idx < cpi->ppi->gf_group.size));
- TplParams *const tpl_data = &cpi->tpl_data;
+ TplParams *const tpl_data = &cpi->ppi->tpl_data;
const TplDepFrame *const tpl_frame = &tpl_data->tpl_frame[tpl_idx];
if (!tpl_frame->is_valid) return;
@@ -1623,8 +1733,6 @@ void av1_tpl_rdmult_setup(AV1_COMP *cpi) {
const double c = 1.2;
const int step = 1 << tpl_data->tpl_stats_block_mis_log2;
- aom_clear_system_state();
-
// Loop through each 'block_size' X 'block_size' block.
for (int row = 0; row < num_rows; row++) {
for (int col = 0; col < num_cols; col++) {
@@ -1647,24 +1755,23 @@ void av1_tpl_rdmult_setup(AV1_COMP *cpi) {
}
const double rk = intra_cost / mc_dep_cost;
const int index = row * num_cols + col;
- cpi->tpl_rdmult_scaling_factors[index] = rk / cpi->rd.r0 + c;
+ cpi->ppi->tpl_rdmult_scaling_factors[index] = rk / cpi->rd.r0 + c;
}
}
- aom_clear_system_state();
}
void av1_tpl_rdmult_setup_sb(AV1_COMP *cpi, MACROBLOCK *const x,
BLOCK_SIZE sb_size, int mi_row, int mi_col) {
AV1_COMMON *const cm = &cpi->common;
- GF_GROUP *gf_group = &cpi->gf_group;
- assert(IMPLIES(cpi->gf_group.size > 0,
- cpi->gf_group.index < cpi->gf_group.size));
- const int tpl_idx = cpi->gf_group.index;
- TplDepFrame *tpl_frame = &cpi->tpl_data.tpl_frame[tpl_idx];
-
- if (tpl_frame->is_valid == 0) return;
- if (!is_frame_tpl_eligible(gf_group, gf_group->index)) return;
+ GF_GROUP *gf_group = &cpi->ppi->gf_group;
+ assert(IMPLIES(cpi->ppi->gf_group.size > 0,
+ cpi->gf_frame_index < cpi->ppi->gf_group.size));
+ const int tpl_idx = cpi->gf_frame_index;
+
if (tpl_idx >= MAX_TPL_FRAME_IDX) return;
+ TplDepFrame *tpl_frame = &cpi->ppi->tpl_data.tpl_frame[tpl_idx];
+ if (!tpl_frame->is_valid) return;
+ if (!is_frame_tpl_eligible(gf_group, cpi->gf_frame_index)) return;
if (cpi->oxcf.q_cfg.aq_mode != NO_AQ) return;
const int mi_col_sr =
@@ -1685,13 +1792,12 @@ void av1_tpl_rdmult_setup_sb(AV1_COMP *cpi, MACROBLOCK *const x,
double base_block_count = 0.0;
double log_sum = 0.0;
- aom_clear_system_state();
for (row = mi_row / num_mi_w;
row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
for (col = mi_col_sr / num_mi_h;
col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) {
const int index = row * num_cols + col;
- log_sum += log(cpi->tpl_rdmult_scaling_factors[index]);
+ log_sum += log(cpi->ppi->tpl_rdmult_scaling_factors[index]);
base_block_count += 1.0;
}
}
@@ -1705,33 +1811,30 @@ void av1_tpl_rdmult_setup_sb(AV1_COMP *cpi, MACROBLOCK *const x,
const double scaling_factor = (double)new_rdmult / (double)orig_rdmult;
double scale_adj = log(scaling_factor) - log_sum / base_block_count;
- scale_adj = exp(scale_adj);
+ scale_adj = exp_bounded(scale_adj);
for (row = mi_row / num_mi_w;
row < num_rows && row < mi_row / num_mi_w + num_brows; ++row) {
for (col = mi_col_sr / num_mi_h;
col < num_cols && col < mi_col_sr / num_mi_h + num_bcols; ++col) {
const int index = row * num_cols + col;
- cpi->tpl_sb_rdmult_scaling_factors[index] =
- scale_adj * cpi->tpl_rdmult_scaling_factors[index];
+ cpi->ppi->tpl_sb_rdmult_scaling_factors[index] =
+ scale_adj * cpi->ppi->tpl_rdmult_scaling_factors[index];
}
}
- aom_clear_system_state();
}
-#define EPSILON (0.0000001)
-
double av1_exponential_entropy(double q_step, double b) {
- aom_clear_system_state();
- double z = fmax(exp(-q_step / b), EPSILON);
+ b = AOMMAX(b, TPL_EPSILON);
+ double z = fmax(exp_bounded(-q_step / b), TPL_EPSILON);
return -log2(1 - z) - z * log2(z) / (1 - z);
}
double av1_laplace_entropy(double q_step, double b, double zero_bin_ratio) {
- aom_clear_system_state();
// zero bin's size is zero_bin_ratio * q_step
// non-zero bin's size is q_step
- double z = fmax(exp(-zero_bin_ratio / 2 * q_step / b), EPSILON);
+ b = AOMMAX(b, TPL_EPSILON);
+ double z = fmax(exp_bounded(-zero_bin_ratio / 2 * q_step / b), TPL_EPSILON);
double h = av1_exponential_entropy(q_step, b);
double r = -(1 - z) * log2(1 - z) - z * log2(z) + z * (h + 1);
return r;
@@ -1740,7 +1843,6 @@ double av1_laplace_entropy(double q_step, double b, double zero_bin_ratio) {
double av1_laplace_estimate_frame_rate(int q_index, int block_count,
const double *abs_coeff_mean,
int coeff_num) {
- aom_clear_system_state();
double zero_bin_ratio = 2;
double dc_q_step = av1_dc_quant_QTX(q_index, 0, AOM_BITS_8) / 4.;
double ac_q_step = av1_ac_quant_QTX(q_index, 0, AOM_BITS_8) / 4.;
@@ -1755,3 +1857,58 @@ double av1_laplace_estimate_frame_rate(int q_index, int block_count,
est_rate *= block_count;
return est_rate;
}
+
+double av1_estimate_gop_bitrate(const unsigned char *q_index_list,
+ const int frame_count,
+ const TplTxfmStats *stats_list) {
+ double gop_bitrate = 0;
+ for (int frame_index = 0; frame_index < frame_count; frame_index++) {
+ int q_index = q_index_list[frame_index];
+ TplTxfmStats frame_stats = stats_list[frame_index];
+
+ /* Convert to mean absolute deviation */
+ double abs_coeff_mean[256] = { 0 };
+ for (int i = 0; i < 256; i++) {
+ abs_coeff_mean[i] =
+ frame_stats.abs_coeff_sum[i] / frame_stats.txfm_block_count;
+ }
+
+ double frame_bitrate = av1_laplace_estimate_frame_rate(
+ q_index, frame_stats.txfm_block_count, abs_coeff_mean, 256);
+ gop_bitrate += frame_bitrate;
+ }
+ return gop_bitrate;
+}
+
+double av1_estimate_coeff_entropy(double q_step, double b,
+ double zero_bin_ratio, int qcoeff) {
+ b = AOMMAX(b, TPL_EPSILON);
+ int abs_qcoeff = abs(qcoeff);
+ double z0 = fmax(exp_bounded(-zero_bin_ratio / 2 * q_step / b), TPL_EPSILON);
+ if (abs_qcoeff == 0) {
+ double r = -log2(1 - z0);
+ return r;
+ } else {
+ double z = fmax(exp_bounded(-q_step / b), TPL_EPSILON);
+ double r = 1 - log2(z0) - log2(1 - z) - (abs_qcoeff - 1) * log2(z);
+ return r;
+ }
+}
+
+double av1_estimate_txfm_block_entropy(int q_index,
+ const double *abs_coeff_mean,
+ int *qcoeff_arr, int coeff_num) {
+ double zero_bin_ratio = 2;
+ double dc_q_step = av1_dc_quant_QTX(q_index, 0, AOM_BITS_8) / 4.;
+ double ac_q_step = av1_ac_quant_QTX(q_index, 0, AOM_BITS_8) / 4.;
+ double est_rate = 0;
+ // dc coeff
+ est_rate += av1_estimate_coeff_entropy(dc_q_step, abs_coeff_mean[0],
+ zero_bin_ratio, qcoeff_arr[0]);
+ // ac coeff
+ for (int i = 1; i < coeff_num; ++i) {
+ est_rate += av1_estimate_coeff_entropy(ac_q_step, abs_coeff_mean[i],
+ zero_bin_ratio, qcoeff_arr[i]);
+ }
+ return est_rate;
+}
diff --git a/third_party/libaom/source/libaom/av1/encoder/tpl_model.h b/third_party/libaom/source/libaom/av1/encoder/tpl_model.h
index 4b85740f3e..c764d92239 100644
--- a/third_party/libaom/source/libaom/av1/encoder/tpl_model.h
+++ b/third_party/libaom/source/libaom/av1/encoder/tpl_model.h
@@ -18,11 +18,20 @@ extern "C" {
/*!\cond */
+struct AV1_PRIMARY;
struct AV1_COMP;
+struct AV1_SEQ_CODING_TOOLS;
struct EncodeFrameParams;
struct EncodeFrameInput;
-#include "av1/encoder/encoder.h"
+#include "config/aom_config.h"
+
+#include "aom_scale/yv12config.h"
+
+#include "av1/common/mv.h"
+#include "av1/common/scale.h"
+#include "av1/encoder/block.h"
+#include "av1/encoder/lookahead.h"
static INLINE BLOCK_SIZE convert_length_to_bsize(int length) {
switch (length) {
@@ -82,6 +91,14 @@ typedef struct AV1TplRowMultiThreadInfo {
#define MAX_TPL_EXTEND (MAX_LAG_BUFFERS - MAX_GF_INTERVAL)
#define TPL_DEP_COST_SCALE_LOG2 4
+#define TPL_EPSILON 0.0000001
+
+typedef struct TplTxfmStats {
+ double abs_coeff_sum[256]; // Assume we are using 16x16 transform block
+ int txfm_block_count;
+ int coeff_num;
+} TplTxfmStats;
+
typedef struct TplDepStats {
int64_t intra_cost;
int64_t inter_cost;
@@ -90,6 +107,7 @@ typedef struct TplDepStats {
int64_t cmp_recrf_dist[2];
int64_t srcrf_rate;
int64_t recrf_rate;
+ int64_t srcrf_sse;
int64_t cmp_recrf_rate[2];
int64_t mc_dep_rate;
int64_t mc_dep_dist;
@@ -111,10 +129,6 @@ typedef struct TplDepFrame {
int mi_cols;
int base_rdmult;
uint32_t frame_display_index;
- double abs_coeff_sum[256]; // Assume we are using 16x16 transform block
- double abs_coeff_mean[256];
- int coeff_num; // number of coefficients in a transform block
- int txfm_block_count;
} TplDepFrame;
/*!\endcond */
@@ -147,6 +161,12 @@ typedef struct TplParams {
TplDepStats *tpl_stats_pool[MAX_LAG_BUFFERS];
/*!
+ * Buffer to store tpl transform stats per frame.
+ * txfm_stats_list[i] stores the TplTxfmStats of the ith frame in a gf group.
+ */
+ TplTxfmStats txfm_stats_list[MAX_LENGTH_TPL_FRAME_STATS];
+
+ /*!
* Buffer to store tpl reconstructed frame.
* tpl_rec_pool[i] stores the reconstructed frame of ith frame in a gf group.
*/
@@ -192,10 +212,13 @@ typedef struct TplParams {
*/
int border_in_pixels;
- /*!
- * Skip tpl setup when tpl data from gop length decision can be reused.
+#if CONFIG_BITRATE_ACCURACY
+ /*
+ * Estimated and actual GOP bitrate.
*/
- int skip_tpl_setup_stats;
+ double estimated_gop_bitrate;
+ double actual_gop_bitrate;
+#endif
} TplParams;
/*!\brief Allocate buffers used by tpl model
@@ -206,8 +229,9 @@ typedef struct TplParams {
* \param[out] tpl_data tpl data structure
*/
-void av1_setup_tpl_buffers(AV1_COMMON *const cm, TplParams *const tpl_data,
- int lag_in_frames);
+void av1_setup_tpl_buffers(struct AV1_PRIMARY *const ppi,
+ CommonModeInfoParams *const mi_params, int width,
+ int height, int byte_alignment, int lag_in_frames);
/*!\brief Implements temporal dependency modelling for a GOP (GF/ARF
* group) and selects between 16 and 32 frame GOP structure.
@@ -227,6 +251,9 @@ int av1_tpl_setup_stats(struct AV1_COMP *cpi, int gop_eval,
/*!\cond */
+void av1_tpl_preload_rc_estimate(
+ struct AV1_COMP *cpi, const struct EncodeFrameParams *const frame_params);
+
int av1_tpl_ptr_pos(int mi_row, int mi_col, int stride, uint8_t right_shift);
void av1_init_tpl_stats(TplParams *const tpl_data);
@@ -236,8 +263,9 @@ void av1_tpl_rdmult_setup(struct AV1_COMP *cpi);
void av1_tpl_rdmult_setup_sb(struct AV1_COMP *cpi, MACROBLOCK *const x,
BLOCK_SIZE sb_size, int mi_row, int mi_col);
-void av1_mc_flow_dispenser_row(struct AV1_COMP *cpi, MACROBLOCK *x, int mi_row,
- BLOCK_SIZE bsize, TX_SIZE tx_size);
+void av1_mc_flow_dispenser_row(struct AV1_COMP *cpi,
+ TplTxfmStats *tpl_txfm_stats, MACROBLOCK *x,
+ int mi_row, BLOCK_SIZE bsize, TX_SIZE tx_size);
/*!\brief Compute the entropy of an exponential probability distribution
* function (pdf) subjected to uniform quantization.
@@ -271,7 +299,7 @@ double av1_laplace_entropy(double q_step, double b, double zero_bin_ratio);
/*!\brief Compute the frame rate using transform block stats
*
* Assume each position i in the transform block is of Laplace distribution
- * with maximum absolute deviation abs_coeff_mean[i]
+ * with mean absolute deviation abs_coeff_mean[i]
*
* Then we can use av1_laplace_entropy() to compute the expected frame
* rate.
@@ -280,7 +308,7 @@ double av1_laplace_entropy(double q_step, double b, double zero_bin_ratio);
*
* \param[in] q_index quantizer index
* \param[in] block_count number of transform blocks
- * \param[in] abs_coeff_mean array of maximum absolute deviation
+ * \param[in] abs_coeff_mean array of mean absolute deviation
* \param[in] coeff_num number of coefficients per transform block
*
* \return expected frame rate
@@ -289,15 +317,104 @@ double av1_laplace_estimate_frame_rate(int q_index, int block_count,
const double *abs_coeff_mean,
int coeff_num);
-/*!\brief Init data structure storing transform stats
+/*
+ *!\brief Compute the number of bits needed to encode a GOP
+ *
+ * \param[in] q_index_list array of q_index, one per frame
+ * \param[in] frame_count number of frames in the GOP
+ * \param[in] stats array of transform stats, one per frame
+ *
+ */
+double av1_estimate_gop_bitrate(const unsigned char *q_index_list,
+ const int frame_count,
+ const TplTxfmStats *stats);
+
+/*
+ *!\brief Init TplTxfmStats
+ *
+ * \param[in] tpl_txfm_stats a structure for storing transform stats
+ *
+ *
+ */
+void av1_init_tpl_txfm_stats(TplTxfmStats *tpl_txfm_stats);
+
+/*
+ *!\brief Accumulate TplTxfmStats
+ *
+ * \param[in] sub_stats a structure for storing sub transform stats
+ * \param[out] accumulated_stats a structure for storing accumulated transform
+ *stats
+ *
+ */
+void av1_accumulate_tpl_txfm_stats(const TplTxfmStats *sub_stats,
+ TplTxfmStats *accumulated_stats);
+
+/*
+ *!\brief Record a transform block into TplTxfmStats
+ *
+ * \param[in] tpl_txfm_stats A structure for storing transform stats
+ * \param[out] coeff An array of transform coefficients. Its size
+ * should equal to tpl_txfm_stats.coeff_num.
+ *
+ */
+void av1_record_tpl_txfm_block(TplTxfmStats *tpl_txfm_stats,
+ const tran_low_t *coeff);
+
+/*!\brief Estimate coefficient entropy using Laplace dsitribution
*
*\ingroup tpl_modelling
*
- * \param[in] tpl_frame pointer of tpl frame data structure
+ * This function is equivalent to -log2(laplace_prob()), where laplace_prob() is
+ * defined in tpl_model_test.cc
+ *
+ * \param[in] q_step quantizer step size without any scaling
+ * \param[in] b mean absolute deviation of Laplace distribution
+ * \param[in] zero_bin_ratio zero bin's size is zero_bin_ratio * q_step
+ * \param[in] qcoeff quantized coefficient
+ *
+ * \return estimated coefficient entropy
+ *
+ */
+double av1_estimate_coeff_entropy(double q_step, double b,
+ double zero_bin_ratio, int qcoeff);
+
+/*!\brief Estimate entropy of a transform block using Laplace dsitribution
+ *
+ *\ingroup tpl_modelling
+ *
+ * \param[in] q_index quantizer index
+ * \param[in] abs_coeff_mean array of mean absolute deviations
+ * \param[in] qcoeff_arr array of quantized coefficients
* \param[in] coeff_num number of coefficients per transform block
*
+ * \return estimated transform block entropy
+ *
+ */
+double av1_estimate_txfm_block_entropy(int q_index,
+ const double *abs_coeff_mean,
+ int *qcoeff_arr, int coeff_num);
+
+// TODO(angiebird): Add doxygen description here.
+int64_t av1_delta_rate_cost(int64_t delta_rate, int64_t recrf_dist,
+ int64_t srcrf_dist, int pix_num);
+
+/*!\brief Compute the overlap area between two blocks with the same size
+ *
+ *\ingroup tpl_modelling
+ *
+ * If there is no overlap, this function should return zero.
+ *
+ * \param[in] row_a row position of the first block
+ * \param[in] col_a column position of the first block
+ * \param[in] row_b row position of the second block
+ * \param[in] col_b column position of the second block
+ * \param[in] width width shared by the two blocks
+ * \param[in] height height shared by the two blocks
+ *
+ * \return overlap area of the two blocks
*/
-void av1_tpl_stats_init_txfm_stats(TplDepFrame *tpl_frame, int coeff_num);
+int av1_get_overlap_area(int row_a, int col_a, int row_b, int col_b, int width,
+ int height);
/*!\endcond */
#ifdef __cplusplus
diff --git a/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.c b/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.c
index 39940e8aa6..f82e910595 100644
--- a/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.c
+++ b/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.c
@@ -15,24 +15,34 @@
#include "aom_dsp/butteraugli.h"
#include "aom_ports/system_state.h"
-#include "av1/encoder/rdopt.h"
+#include "av1/encoder/encodeframe.h"
+#include "av1/encoder/encoder_utils.h"
#include "av1/encoder/extend.h"
+#include "av1/encoder/var_based_part.h"
static const int resize_factor = 2;
-void set_mb_butteraugli_rdmult_scaling(AV1_COMP *cpi,
- const YV12_BUFFER_CONFIG *source,
- const YV12_BUFFER_CONFIG *recon) {
+static void set_mb_butteraugli_rdmult_scaling(AV1_COMP *cpi,
+ const YV12_BUFFER_CONFIG *source,
+ const YV12_BUFFER_CONFIG *recon,
+ const double K) {
AV1_COMMON *const cm = &cpi->common;
+ SequenceHeader *const seq_params = cm->seq_params;
const CommonModeInfoParams *const mi_params = &cm->mi_params;
+ const aom_color_range_t color_range =
+ seq_params->color_range != 0 ? AOM_CR_FULL_RANGE : AOM_CR_STUDIO_RANGE;
const int bit_depth = cpi->td.mb.e_mbd.bd;
const int width = source->y_crop_width;
const int height = source->y_crop_height;
+ const int ss_x = source->subsampling_x;
+ const int ss_y = source->subsampling_y;
float *diffmap;
CHECK_MEM_ERROR(cm, diffmap, aom_malloc(width * height * sizeof(*diffmap)));
- if (!aom_calc_butteraugli(source, recon, bit_depth, diffmap)) {
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ if (!aom_calc_butteraugli(source, recon, bit_depth,
+ seq_params->matrix_coefficients, color_range,
+ diffmap)) {
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Failed to calculate Butteraugli distances.");
}
@@ -55,6 +65,7 @@ void set_mb_butteraugli_rdmult_scaling(AV1_COMP *cpi,
const int x_start = col * block_w;
float dbutteraugli = 0.0f;
float dmse = 0.0f;
+ float px_count = 0.0f;
// Loop through each pixel.
for (int y = y_start; y < y_start + block_h && y < height; y++) {
@@ -63,25 +74,28 @@ void set_mb_butteraugli_rdmult_scaling(AV1_COMP *cpi,
float px_diff = source->y_buffer[y * source->y_stride + x] -
recon->y_buffer[y * recon->y_stride + x];
dmse += px_diff * px_diff;
+ px_count += 1.0f;
}
}
- for (int y = y_start; y < y_start + block_h && y < height; y += 2) {
- for (int x = x_start; x < x_start + block_w && x < width; x += 2) {
- const int src_px_index = y / 2 * source->uv_stride + x / 2;
- const int recon_px_index = y / 2 * recon->uv_stride + x / 2;
+ const int y_end = AOMMIN((y_start >> ss_y) + (block_h >> ss_y),
+ (height + ss_y) >> ss_y);
+ for (int y = y_start >> ss_y; y < y_end; y++) {
+ const int x_end = AOMMIN((x_start >> ss_x) + (block_w >> ss_x),
+ (width + ss_x) >> ss_x);
+ for (int x = x_start >> ss_x; x < x_end; x++) {
+ const int src_px_index = y * source->uv_stride + x;
+ const int recon_px_index = y * recon->uv_stride + x;
const float px_diff_u = (float)(source->u_buffer[src_px_index] -
recon->u_buffer[recon_px_index]);
const float px_diff_v = (float)(source->v_buffer[src_px_index] -
recon->v_buffer[recon_px_index]);
dmse += px_diff_u * px_diff_u + px_diff_v * px_diff_v;
+ px_count += 2.0f;
}
}
dbutteraugli = powf(dbutteraugli, 1.0f / 12.0f);
- dmse = dmse / (2.0f * (float)block_w * (float)block_h);
- // 'K' is used to balance the rate-distortion distribution between PSNR
- // and Butteraugli.
- const double K = 0.4;
+ dmse = dmse / px_count;
const float eps = 0.01f;
double weight;
if (dbutteraugli < eps || dmse < eps) {
@@ -166,10 +180,12 @@ static void copy_img(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst,
int width, int height) {
copy_plane(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, width,
height);
+ const int width_uv = (width + src->subsampling_x) >> src->subsampling_x;
+ const int height_uv = (height + src->subsampling_y) >> src->subsampling_y;
copy_plane(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride,
- width / 2, height / 2);
+ width_uv, height_uv);
copy_plane(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride,
- width / 2, height / 2);
+ width_uv, height_uv);
}
static void zero_plane(uint8_t *dst, int dst_stride, int h) {
@@ -192,9 +208,11 @@ void av1_setup_butteraugli_source(AV1_COMP *cpi) {
const int width = cpi->source->y_crop_width;
const int height = cpi->source->y_crop_height;
const int bit_depth = cpi->td.mb.e_mbd.bd;
+ const int ss_x = cpi->source->subsampling_x;
+ const int ss_y = cpi->source->subsampling_y;
if (dst->buffer_alloc_sz == 0) {
aom_alloc_frame_buffer(
- dst, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+ dst, width, height, ss_x, ss_y, cm->seq_params->use_highbitdepth,
cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
}
av1_copy_and_extend_frame(cpi->source, dst);
@@ -202,8 +220,8 @@ void av1_setup_butteraugli_source(AV1_COMP *cpi) {
YV12_BUFFER_CONFIG *const resized_dst = &cpi->butteraugli_info.resized_source;
if (resized_dst->buffer_alloc_sz == 0) {
aom_alloc_frame_buffer(
- resized_dst, width / resize_factor, height / resize_factor, 1, 1,
- cm->seq_params.use_highbitdepth, cpi->oxcf.border_in_pixels,
+ resized_dst, width / resize_factor, height / resize_factor, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
cm->features.byte_alignment);
}
av1_resize_and_extend_frame_nonnormative(cpi->source, resized_dst, bit_depth,
@@ -215,25 +233,86 @@ void av1_setup_butteraugli_source(AV1_COMP *cpi) {
aom_clear_system_state();
}
-void av1_restore_butteraugli_source(AV1_COMP *cpi) {
+void av1_setup_butteraugli_rdmult_and_restore_source(AV1_COMP *cpi, double K) {
aom_clear_system_state();
av1_copy_and_extend_frame(&cpi->butteraugli_info.source, cpi->source);
AV1_COMMON *const cm = &cpi->common;
const int width = cpi->source->y_crop_width;
const int height = cpi->source->y_crop_height;
+ const int ss_x = cpi->source->subsampling_x;
+ const int ss_y = cpi->source->subsampling_y;
YV12_BUFFER_CONFIG resized_recon;
memset(&resized_recon, 0, sizeof(resized_recon));
aom_alloc_frame_buffer(
- &resized_recon, width / resize_factor, height / resize_factor, 1, 1,
- cm->seq_params.use_highbitdepth, cpi->oxcf.border_in_pixels,
+ &resized_recon, width / resize_factor, height / resize_factor, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
cm->features.byte_alignment);
copy_img(&cpi->common.cur_frame->buf, &resized_recon, width / resize_factor,
height / resize_factor);
set_mb_butteraugli_rdmult_scaling(cpi, &cpi->butteraugli_info.resized_source,
- &resized_recon);
+ &resized_recon, K);
cpi->butteraugli_info.recon_set = true;
aom_free_frame_buffer(&resized_recon);
aom_clear_system_state();
}
+
+void av1_setup_butteraugli_rdmult(AV1_COMP *cpi) {
+ AV1_COMMON *const cm = &cpi->common;
+ const AV1EncoderConfig *const oxcf = &cpi->oxcf;
+ const QuantizationCfg *const q_cfg = &oxcf->q_cfg;
+ const int q_index = 96;
+ aom_clear_system_state();
+
+ // Setup necessary params for encoding, including frame source, etc.
+ if (cm->current_frame.frame_type == KEY_FRAME) copy_frame_prob_info(cpi);
+ av1_set_frame_size(cpi, cm->superres_upscaled_width,
+ cm->superres_upscaled_height);
+
+ cpi->source =
+ av1_scale_if_required(cm, cpi->unscaled_source, &cpi->scaled_source,
+ cm->features.interp_filter, 0, false, false);
+ if (cpi->unscaled_last_source != NULL) {
+ cpi->last_source = av1_scale_if_required(
+ cm, cpi->unscaled_last_source, &cpi->scaled_last_source,
+ cm->features.interp_filter, 0, false, false);
+ }
+
+ av1_setup_butteraugli_source(cpi);
+ av1_setup_frame(cpi);
+
+ if (cm->seg.enabled) {
+ if (!cm->seg.update_data && cm->prev_frame) {
+ segfeatures_copy(&cm->seg, &cm->prev_frame->seg);
+ cm->seg.enabled = cm->prev_frame->seg.enabled;
+ } else {
+ av1_calculate_segdata(&cm->seg);
+ }
+ } else {
+ memset(&cm->seg, 0, sizeof(cm->seg));
+ }
+ segfeatures_copy(&cm->cur_frame->seg, &cm->seg);
+ cm->cur_frame->seg.enabled = cm->seg.enabled;
+
+ const PARTITION_SEARCH_TYPE partition_search_type =
+ cpi->sf.part_sf.partition_search_type;
+ const BLOCK_SIZE fixed_partition_size = cpi->sf.part_sf.fixed_partition_size;
+ // Enable a quicker pass by uncommenting the following lines:
+ // cpi->sf.part_sf.partition_search_type = FIXED_PARTITION;
+ // cpi->sf.part_sf.fixed_partition_size = BLOCK_32X32;
+
+ av1_set_quantizer(cm, q_cfg->qm_minlevel, q_cfg->qm_maxlevel, q_index,
+ q_cfg->enable_chroma_deltaq);
+ av1_set_speed_features_qindex_dependent(cpi, oxcf->speed);
+ if (q_cfg->deltaq_mode != NO_DELTA_Q || q_cfg->enable_chroma_deltaq)
+ av1_init_quantizer(&cpi->enc_quant_dequant_params, &cm->quant_params,
+ cm->seq_params->bit_depth);
+
+ av1_set_variance_partition_thresholds(cpi, q_index, 0);
+ av1_encode_frame(cpi);
+
+ av1_setup_butteraugli_rdmult_and_restore_source(cpi, 0.3);
+ cpi->sf.part_sf.partition_search_type = partition_search_type;
+ cpi->sf.part_sf.fixed_partition_size = fixed_partition_size;
+}
diff --git a/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.h b/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.h
index a4af31c718..7b7b0b64d3 100644
--- a/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.h
+++ b/third_party/libaom/source/libaom/av1/encoder/tune_butteraugli.h
@@ -38,6 +38,10 @@ void av1_setup_butteraugli_recon(AV1_COMP *cpi,
void av1_setup_butteraugli_source(AV1_COMP *cpi);
-void av1_restore_butteraugli_source(AV1_COMP *cpi);
+// 'K' is used to balance the rate-distortion distribution between PSNR
+// and Butteraugli.
+void av1_setup_butteraugli_rdmult_and_restore_source(AV1_COMP *cpi, double K);
+
+void av1_setup_butteraugli_rdmult(AV1_COMP *cpi);
#endif // AOM_AV1_ENCODER_TUNE_BUTTERAUGLI_H_
diff --git a/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.c b/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.c
index f5b6129407..0c28cebefa 100644
--- a/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.c
+++ b/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.c
@@ -15,9 +15,7 @@
#include "aom_ports/system_state.h"
#include "av1/encoder/extend.h"
#include "av1/encoder/rdopt.h"
-#if CONFIG_USE_VMAF_RC
#include "config/aom_scale_rtcd.h"
-#endif
static const double kBaselineVmaf = 97.42773;
@@ -89,9 +87,9 @@ static unsigned int residual_variance(const AV1_COMP *cpi,
assert(y_stride == ref->y_stride);
const int y_offset = mb_row * mb_height * y_stride + mb_col * mb_width;
const int mv_offset = ref_mv.row * y_stride + ref_mv.col;
- const unsigned int var =
- cpi->fn_ptr[block_size].vf(ref->y_buffer + y_offset + mv_offset, y_stride,
- src->y_buffer + y_offset, y_stride, sse);
+ const unsigned int var = cpi->ppi->fn_ptr[block_size].vf(
+ ref->y_buffer + y_offset + mv_offset, y_stride, src->y_buffer + y_offset,
+ y_stride, sse);
return var;
}
@@ -117,7 +115,7 @@ static double frame_average_variance(const AV1_COMP *const cpi,
buf.buf = (uint8_t *)y_buffer + row_offset_y * y_stride + col_offset_y;
buf.stride = y_stride;
- if (cpi->common.seq_params.use_highbitdepth) {
+ if (cpi->common.seq_params->use_highbitdepth) {
assert(frame->flags & YV12_FLAG_HIGHBITDEPTH);
var += av1_high_get_sby_perpixel_variance(cpi, &buf, block_size,
bit_depth);
@@ -234,7 +232,7 @@ static AOM_INLINE void unsharp(const AV1_COMP *const cpi,
const YV12_BUFFER_CONFIG *blurred,
const YV12_BUFFER_CONFIG *dst, double amount) {
const int bit_depth = cpi->td.mb.e_mbd.bd;
- if (cpi->common.seq_params.use_highbitdepth) {
+ if (cpi->common.seq_params->use_highbitdepth) {
assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
assert(blurred->flags & YV12_FLAG_HIGHBITDEPTH);
assert(dst->flags & YV12_FLAG_HIGHBITDEPTH);
@@ -294,38 +292,27 @@ static AOM_INLINE void gaussian_blur(const int bit_depth,
}
static AOM_INLINE double cal_approx_vmaf(const AV1_COMP *const cpi,
-#if CONFIG_USE_VMAF_RC
- VmafContext *vmaf_context,
- int *vmaf_cal_index,
-#endif
double source_variance,
YV12_BUFFER_CONFIG *const source,
YV12_BUFFER_CONFIG *const sharpened) {
const int bit_depth = cpi->td.mb.e_mbd.bd;
+ const bool cal_vmaf_neg =
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
double new_vmaf;
-#if CONFIG_USE_VMAF_RC
- aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, source,
- sharpened, bit_depth, *vmaf_cal_index, &new_vmaf);
- (*vmaf_cal_index)++;
-#else
- aom_calc_vmaf(cpi->oxcf.tune_cfg.vmaf_model_path, source, sharpened,
- bit_depth, &new_vmaf);
-#endif
+ aom_calc_vmaf(cpi->vmaf_info.vmaf_model, source, sharpened, bit_depth,
+ cal_vmaf_neg, &new_vmaf);
const double sharpened_var = frame_average_variance(cpi, sharpened);
return source_variance / sharpened_var * (new_vmaf - kBaselineVmaf);
}
static double find_best_frame_unsharp_amount_loop(
- const AV1_COMP *const cpi,
-#if CONFIG_USE_VMAF_RC
- VmafContext *vmaf_context, int *vmaf_cal_index,
-#endif
- YV12_BUFFER_CONFIG *const source, YV12_BUFFER_CONFIG *const blurred,
- YV12_BUFFER_CONFIG *const sharpened, double best_vmaf,
- const double baseline_variance, const double unsharp_amount_start,
- const double step_size, const int max_loop_count, const double max_amount) {
+ const AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const source,
+ YV12_BUFFER_CONFIG *const blurred, YV12_BUFFER_CONFIG *const sharpened,
+ double best_vmaf, const double baseline_variance,
+ const double unsharp_amount_start, const double step_size,
+ const int max_loop_count, const double max_amount) {
const double min_amount = 0.0;
int loop_count = 0;
double approx_vmaf = best_vmaf;
@@ -335,11 +322,7 @@ static double find_best_frame_unsharp_amount_loop(
unsharp_amount += step_size;
if (unsharp_amount > max_amount || unsharp_amount < min_amount) break;
unsharp(cpi, source, blurred, sharpened, unsharp_amount);
- approx_vmaf = cal_approx_vmaf(cpi,
-#if CONFIG_USE_VMAF_RC
- vmaf_context, vmaf_cal_index,
-#endif
- baseline_variance, source, sharpened);
+ approx_vmaf = cal_approx_vmaf(cpi, baseline_variance, source, sharpened);
loop_count++;
} while (approx_vmaf > best_vmaf && loop_count < max_loop_count);
@@ -358,73 +341,43 @@ static double find_best_frame_unsharp_amount(const AV1_COMP *const cpi,
const AV1_COMMON *const cm = &cpi->common;
const int width = source->y_width;
const int height = source->y_height;
-#if CONFIG_USE_VMAF_RC
- VmafContext *vmaf_context;
- aom_init_vmaf_context_rc(
- &vmaf_context, cpi->vmaf_info.vmaf_model,
- cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN);
- int vmaf_cal_index = 0;
-#endif
YV12_BUFFER_CONFIG sharpened;
memset(&sharpened, 0, sizeof(sharpened));
aom_alloc_frame_buffer(
- &sharpened, width, height, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+ &sharpened, width, height, source->subsampling_x, source->subsampling_y,
+ cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
const double baseline_variance = frame_average_variance(cpi, source);
double unsharp_amount;
if (unsharp_amount_start <= step_size) {
unsharp_amount = find_best_frame_unsharp_amount_loop(
- cpi,
-#if CONFIG_USE_VMAF_RC
- vmaf_context, &vmaf_cal_index,
-#endif
- source, blurred, &sharpened, 0.0, baseline_variance, 0.0, step_size,
- max_loop_count, max_filter_amount);
+ cpi, source, blurred, &sharpened, 0.0, baseline_variance, 0.0,
+ step_size, max_loop_count, max_filter_amount);
} else {
double a0 = unsharp_amount_start - step_size, a1 = unsharp_amount_start;
double v0, v1;
unsharp(cpi, source, blurred, &sharpened, a0);
- v0 = cal_approx_vmaf(cpi,
-#if CONFIG_USE_VMAF_RC
- vmaf_context, &vmaf_cal_index,
-#endif
- baseline_variance, source, &sharpened);
+ v0 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
unsharp(cpi, source, blurred, &sharpened, a1);
- v1 = cal_approx_vmaf(cpi,
-#if CONFIG_USE_VMAF_RC
- vmaf_context, &vmaf_cal_index,
-#endif
- baseline_variance, source, &sharpened);
+ v1 = cal_approx_vmaf(cpi, baseline_variance, source, &sharpened);
if (fabs(v0 - v1) < 0.01) {
unsharp_amount = a0;
} else if (v0 > v1) {
unsharp_amount = find_best_frame_unsharp_amount_loop(
- cpi,
-#if CONFIG_USE_VMAF_RC
- vmaf_context, &vmaf_cal_index,
-#endif
- source, blurred, &sharpened, v0, baseline_variance, a0, -step_size,
- max_loop_count, max_filter_amount);
+ cpi, source, blurred, &sharpened, v0, baseline_variance, a0,
+ -step_size, max_loop_count, max_filter_amount);
} else {
unsharp_amount = find_best_frame_unsharp_amount_loop(
- cpi,
-#if CONFIG_USE_VMAF_RC
- vmaf_context, &vmaf_cal_index,
-#endif
- source, blurred, &sharpened, v1, baseline_variance, a1, step_size,
- max_loop_count, max_filter_amount);
+ cpi, source, blurred, &sharpened, v1, baseline_variance, a1,
+ step_size, max_loop_count, max_filter_amount);
}
}
aom_free_frame_buffer(&sharpened);
-#if CONFIG_USE_VMAF_RC
- aom_close_vmaf_context_rc(vmaf_context);
-#endif
return unsharp_amount;
}
-#if CONFIG_USE_VMAF_RC
void av1_vmaf_neg_preprocessing(AV1_COMP *const cpi,
YV12_BUFFER_CONFIG *const source) {
aom_clear_system_state();
@@ -433,9 +386,9 @@ void av1_vmaf_neg_preprocessing(AV1_COMP *const cpi,
const int width = source->y_width;
const int height = source->y_height;
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const int layer_depth =
- AOMMIN(gf_group->layer_depth[gf_group->index], MAX_ARF_LAYERS - 1);
+ AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
const double best_frame_unsharp_amount =
get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
@@ -444,15 +397,15 @@ void av1_vmaf_neg_preprocessing(AV1_COMP *const cpi,
YV12_BUFFER_CONFIG blurred;
memset(&blurred, 0, sizeof(blurred));
aom_alloc_frame_buffer(
- &blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+ &blurred, width, height, source->subsampling_x, source->subsampling_y,
+ cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
gaussian_blur(bit_depth, source, &blurred);
unsharp(cpi, source, &blurred, source, best_frame_unsharp_amount);
aom_free_frame_buffer(&blurred);
aom_clear_system_state();
}
-#endif
void av1_vmaf_frame_preprocessing(AV1_COMP *const cpi,
YV12_BUFFER_CONFIG *const source) {
@@ -466,19 +419,21 @@ void av1_vmaf_frame_preprocessing(AV1_COMP *const cpi,
memset(&source_extended, 0, sizeof(source_extended));
memset(&blurred, 0, sizeof(blurred));
aom_alloc_frame_buffer(
- &source_extended, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+ &source_extended, width, height, source->subsampling_x,
+ source->subsampling_y, cm->seq_params->use_highbitdepth,
cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
aom_alloc_frame_buffer(
- &blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+ &blurred, width, height, source->subsampling_x, source->subsampling_y,
+ cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
av1_copy_and_extend_frame(source, &source_extended);
gaussian_blur(bit_depth, &source_extended, &blurred);
aom_free_frame_buffer(&source_extended);
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const int layer_depth =
- AOMMIN(gf_group->layer_depth[gf_group->index], MAX_ARF_LAYERS - 1);
+ AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
const double last_frame_unsharp_amount =
get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
@@ -500,24 +455,27 @@ void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
const int width = source->y_width;
const int height = source->y_height;
const int bit_depth = cpi->td.mb.e_mbd.bd;
+ const int ss_x = source->subsampling_x;
+ const int ss_y = source->subsampling_y;
YV12_BUFFER_CONFIG source_extended, blurred;
memset(&blurred, 0, sizeof(blurred));
memset(&source_extended, 0, sizeof(source_extended));
aom_alloc_frame_buffer(
- &blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
- aom_alloc_frame_buffer(
- &source_extended, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+ &blurred, width, height, ss_x, ss_y, cm->seq_params->use_highbitdepth,
cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+ aom_alloc_frame_buffer(&source_extended, width, height, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth,
+ cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
av1_copy_and_extend_frame(source, &source_extended);
gaussian_blur(bit_depth, &source_extended, &blurred);
aom_free_frame_buffer(&source_extended);
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const int layer_depth =
- AOMMIN(gf_group->layer_depth[gf_group->index], MAX_ARF_LAYERS - 1);
+ AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
const double last_frame_unsharp_amount =
get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
@@ -540,12 +498,14 @@ void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
YV12_BUFFER_CONFIG source_block, blurred_block;
memset(&source_block, 0, sizeof(source_block));
memset(&blurred_block, 0, sizeof(blurred_block));
- aom_alloc_frame_buffer(
- &source_block, block_w, block_h, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
- aom_alloc_frame_buffer(
- &blurred_block, block_w, block_h, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+ aom_alloc_frame_buffer(&source_block, block_w, block_h, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth,
+ cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
+ aom_alloc_frame_buffer(&blurred_block, block_w, block_h, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth,
+ cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
for (int row = 0; row < num_rows; ++row) {
for (int col = 0; col < num_cols; ++col) {
@@ -555,7 +515,7 @@ void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
const int block_height = AOMMIN(height - row_offset_y, block_h);
const int index = col + row * num_cols;
- if (cm->seq_params.use_highbitdepth) {
+ if (cm->seq_params->use_highbitdepth) {
assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
assert(blurred.flags & YV12_FLAG_HIGHBITDEPTH);
uint16_t *frame_src_buf = CONVERT_TO_SHORTPTR(source->y_buffer) +
@@ -624,7 +584,7 @@ void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
const int block_height = AOMMIN(source->y_height - row_offset_y, block_h);
const int index = col + row * num_cols;
- if (cm->seq_params.use_highbitdepth) {
+ if (cm->seq_params->use_highbitdepth) {
assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
assert(blurred.flags & YV12_FLAG_HIGHBITDEPTH);
uint16_t *src_buf = CONVERT_TO_SHORTPTR(source->y_buffer) +
@@ -654,93 +614,6 @@ void av1_vmaf_blk_preprocessing(AV1_COMP *const cpi,
aom_clear_system_state();
}
-#if !CONFIG_USE_VMAF_RC
-typedef struct FrameData {
- const YV12_BUFFER_CONFIG *source, *blurred;
- int block_w, block_h, num_rows, num_cols, row, col, bit_depth;
-} FrameData;
-
-// A callback function used to pass data to VMAF.
-// Returns 0 after reading a frame.
-// Returns 2 when there is no more frame to read.
-static int update_frame(float *ref_data, float *main_data, float *temp_data,
- int stride, void *user_data) {
- FrameData *frames = (FrameData *)user_data;
- const int width = frames->source->y_width;
- const int height = frames->source->y_height;
- const int row = frames->row;
- const int col = frames->col;
- const int num_rows = frames->num_rows;
- const int num_cols = frames->num_cols;
- const int block_w = frames->block_w;
- const int block_h = frames->block_h;
- const YV12_BUFFER_CONFIG *source = frames->source;
- const YV12_BUFFER_CONFIG *blurred = frames->blurred;
- const int bit_depth = frames->bit_depth;
- const float scale_factor = 1.0f / (float)(1 << (bit_depth - 8));
- (void)temp_data;
- stride /= (int)sizeof(*ref_data);
-
- for (int i = 0; i < height; ++i) {
- float *ref, *main;
- ref = ref_data + i * stride;
- main = main_data + i * stride;
- if (source->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint16_t *src;
- src = CONVERT_TO_SHORTPTR(source->y_buffer) + i * source->y_stride;
- for (int j = 0; j < width; ++j) {
- ref[j] = main[j] = scale_factor * (float)src[j];
- }
- } else {
- uint8_t *src;
- src = source->y_buffer + i * source->y_stride;
- for (int j = 0; j < width; ++j) {
- ref[j] = main[j] = (float)src[j];
- }
- }
- }
- if (row < num_rows && col < num_cols) {
- // Set current block
- const int row_offset = row * block_h;
- const int col_offset = col * block_w;
- const int block_width = AOMMIN(width - col_offset, block_w);
- const int block_height = AOMMIN(height - row_offset, block_h);
-
- float *main_buf = main_data + col_offset + row_offset * stride;
- if (source->flags & YV12_FLAG_HIGHBITDEPTH) {
- uint16_t *blurred_buf = CONVERT_TO_SHORTPTR(blurred->y_buffer) +
- row_offset * blurred->y_stride + col_offset;
- for (int i = 0; i < block_height; ++i) {
- for (int j = 0; j < block_width; ++j) {
- main_buf[j] = scale_factor * (float)blurred_buf[j];
- }
- main_buf += stride;
- blurred_buf += blurred->y_stride;
- }
- } else {
- uint8_t *blurred_buf =
- blurred->y_buffer + row_offset * blurred->y_stride + col_offset;
- for (int i = 0; i < block_height; ++i) {
- for (int j = 0; j < block_width; ++j) {
- main_buf[j] = (float)blurred_buf[j];
- }
- main_buf += stride;
- blurred_buf += blurred->y_stride;
- }
- }
-
- frames->col++;
- if (frames->col >= num_cols) {
- frames->col = 0;
- frames->row++;
- }
- return 0;
- } else {
- return 2;
- }
-}
-#endif
-
void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
const int y_width = cpi->source->y_width;
@@ -748,13 +621,15 @@ void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
const int resized_block_size = BLOCK_32X32;
const int resize_factor = 2;
const int bit_depth = cpi->td.mb.e_mbd.bd;
+ const int ss_x = cpi->source->subsampling_x;
+ const int ss_y = cpi->source->subsampling_y;
aom_clear_system_state();
YV12_BUFFER_CONFIG resized_source;
memset(&resized_source, 0, sizeof(resized_source));
aom_alloc_frame_buffer(
- &resized_source, y_width / resize_factor, y_height / resize_factor, 1, 1,
- cm->seq_params.use_highbitdepth, cpi->oxcf.border_in_pixels,
+ &resized_source, y_width / resize_factor, y_height / resize_factor, ss_x,
+ ss_y, cm->seq_params->use_highbitdepth, cpi->oxcf.border_in_pixels,
cm->features.byte_alignment);
av1_resize_and_extend_frame_nonnormative(cpi->source, &resized_source,
bit_depth, av1_num_planes(cm));
@@ -770,42 +645,26 @@ void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
YV12_BUFFER_CONFIG blurred;
memset(&blurred, 0, sizeof(blurred));
- aom_alloc_frame_buffer(&blurred, resized_y_width, resized_y_height, 1, 1,
- cm->seq_params.use_highbitdepth,
+ aom_alloc_frame_buffer(&blurred, resized_y_width, resized_y_height, ss_x,
+ ss_y, cm->seq_params->use_highbitdepth,
cpi->oxcf.border_in_pixels,
cm->features.byte_alignment);
gaussian_blur(bit_depth, &resized_source, &blurred);
-#if CONFIG_USE_VMAF_RC
YV12_BUFFER_CONFIG recon;
memset(&recon, 0, sizeof(recon));
- aom_alloc_frame_buffer(&recon, resized_y_width, resized_y_height, 1, 1,
- cm->seq_params.use_highbitdepth,
+ aom_alloc_frame_buffer(&recon, resized_y_width, resized_y_height, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth,
cpi->oxcf.border_in_pixels,
cm->features.byte_alignment);
aom_yv12_copy_frame(&resized_source, &recon, 1);
VmafContext *vmaf_context;
- aom_init_vmaf_context_rc(
- &vmaf_context, cpi->vmaf_info.vmaf_model,
- cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN);
-#else
- double *scores = aom_malloc(sizeof(*scores) * (num_rows * num_cols));
- memset(scores, 0, sizeof(*scores) * (num_rows * num_cols));
- FrameData frame_data;
- frame_data.source = &resized_source;
- frame_data.blurred = &blurred;
- frame_data.block_w = resized_block_w;
- frame_data.block_h = resized_block_h;
- frame_data.num_rows = num_rows;
- frame_data.num_cols = num_cols;
- frame_data.row = 0;
- frame_data.col = 0;
- frame_data.bit_depth = bit_depth;
- aom_calc_vmaf_multi_frame(&frame_data, cpi->oxcf.tune_cfg.vmaf_model_path,
- update_frame, resized_y_width, resized_y_height,
- bit_depth, scores);
-#endif
+ const bool cal_vmaf_neg =
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
+ aom_init_vmaf_context(&vmaf_context, cpi->vmaf_info.vmaf_model, cal_vmaf_neg);
+ unsigned int *sses = aom_malloc(sizeof(*sses) * (num_rows * num_cols));
+ memset(sses, 0, sizeof(*sses) * (num_rows * num_cols));
// Loop through each 'block_size' block.
for (int row = 0; row < num_rows; ++row) {
@@ -820,15 +679,14 @@ void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
uint8_t *const blurred_buf =
blurred.y_buffer + row_offset_y * blurred.y_stride + col_offset_y;
- unsigned int sse;
- cpi->fn_ptr[resized_block_size].vf(orig_buf, resized_source.y_stride,
- blurred_buf, blurred.y_stride, &sse);
+ cpi->ppi->fn_ptr[resized_block_size].vf(orig_buf, resized_source.y_stride,
+ blurred_buf, blurred.y_stride,
+ &sses[index]);
-#if CONFIG_USE_VMAF_RC
uint8_t *const recon_buf =
recon.y_buffer + row_offset_y * recon.y_stride + col_offset_y;
// Set recon buf
- if (cpi->common.seq_params.use_highbitdepth) {
+ if (cpi->common.seq_params->use_highbitdepth) {
highbd_unsharp_rect(CONVERT_TO_SHORTPTR(blurred_buf), blurred.y_stride,
CONVERT_TO_SHORTPTR(blurred_buf), blurred.y_stride,
CONVERT_TO_SHORTPTR(recon_buf), recon.y_stride,
@@ -839,13 +697,11 @@ void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
resized_block_w, resized_block_h, 0.0);
}
- double vmaf;
- aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model,
- &resized_source, &recon, bit_depth, index,
- &vmaf);
+ aom_read_vmaf_image(vmaf_context, &resized_source, &recon, bit_depth,
+ index);
// Restore recon buf
- if (cpi->common.seq_params.use_highbitdepth) {
+ if (cpi->common.seq_params->use_highbitdepth) {
highbd_unsharp_rect(
CONVERT_TO_SHORTPTR(orig_buf), resized_source.y_stride,
CONVERT_TO_SHORTPTR(orig_buf), resized_source.y_stride,
@@ -856,13 +712,18 @@ void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
resized_source.y_stride, recon_buf, recon.y_stride,
resized_block_w, resized_block_h, 0.0);
}
-#else
- const double vmaf = scores[index];
-#endif
+ }
+ }
+ aom_flush_vmaf_context(vmaf_context);
+ for (int row = 0; row < num_rows; ++row) {
+ for (int col = 0; col < num_cols; ++col) {
+ const int index = row * num_cols + col;
+ const double vmaf = aom_calc_vmaf_at_index(
+ vmaf_context, cpi->vmaf_info.vmaf_model, index);
const double dvmaf = kBaselineVmaf - vmaf;
const double mse =
- (double)sse / (double)(resized_y_width * resized_y_height);
+ (double)sses[index] / (double)(resized_y_width * resized_y_height);
double weight;
const double eps = 0.01 / (num_rows * num_cols);
if (dvmaf < eps || mse < eps) {
@@ -879,11 +740,8 @@ void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi) {
aom_free_frame_buffer(&resized_source);
aom_free_frame_buffer(&blurred);
-#if CONFIG_USE_VMAF_RC
- aom_close_vmaf_context_rc(vmaf_context);
-#else
- aom_free(scores);
-#endif
+ aom_close_vmaf_context(vmaf_context);
+ aom_free(sses);
aom_clear_system_state();
}
@@ -967,27 +825,32 @@ static double calc_vmaf_motion_score(const AV1_COMP *const cpi,
const int y_height = cur->y_height;
YV12_BUFFER_CONFIG blurred_cur, blurred_last, blurred_next;
const int bit_depth = cpi->td.mb.e_mbd.bd;
+ const int ss_x = cur->subsampling_x;
+ const int ss_y = cur->subsampling_y;
memset(&blurred_cur, 0, sizeof(blurred_cur));
memset(&blurred_last, 0, sizeof(blurred_last));
memset(&blurred_next, 0, sizeof(blurred_next));
- aom_alloc_frame_buffer(
- &blurred_cur, y_width, y_height, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
- aom_alloc_frame_buffer(
- &blurred_last, y_width, y_height, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
- aom_alloc_frame_buffer(
- &blurred_next, y_width, y_height, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
+ aom_alloc_frame_buffer(&blurred_cur, y_width, y_height, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth,
+ cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
+ aom_alloc_frame_buffer(&blurred_last, y_width, y_height, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth,
+ cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
+ aom_alloc_frame_buffer(&blurred_next, y_width, y_height, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth,
+ cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
gaussian_blur(bit_depth, cur, &blurred_cur);
gaussian_blur(bit_depth, last, &blurred_last);
if (next) gaussian_blur(bit_depth, next, &blurred_next);
double motion1, motion2 = 65536.0;
- if (cm->seq_params.use_highbitdepth) {
+ if (cm->seq_params->use_highbitdepth) {
assert(blurred_cur.flags & YV12_FLAG_HIGHBITDEPTH);
assert(blurred_last.flags & YV12_FLAG_HIGHBITDEPTH);
const float scale_factor = 1.0f / (float)(1 << (bit_depth - 8));
@@ -1026,9 +889,9 @@ static AOM_INLINE void get_neighbor_frames(const AV1_COMP *const cpi,
YV12_BUFFER_CONFIG **last,
YV12_BUFFER_CONFIG **next) {
const AV1_COMMON *const cm = &cpi->common;
- const GF_GROUP *gf_group = &cpi->gf_group;
+ const GF_GROUP *gf_group = &cpi->ppi->gf_group;
const int src_index =
- cm->show_frame != 0 ? 0 : gf_group->arf_src_offset[gf_group->index];
+ cm->show_frame != 0 ? 0 : gf_group->arf_src_offset[cpi->gf_frame_index];
struct lookahead_entry *last_entry = av1_lookahead_peek(
cpi->ppi->lookahead, src_index - 1, cpi->compressor_stage);
struct lookahead_entry *next_entry = av1_lookahead_peek(
@@ -1046,9 +909,9 @@ int av1_get_vmaf_base_qindex(const AV1_COMP *const cpi, int current_qindex) {
return current_qindex;
}
aom_clear_system_state();
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const int layer_depth =
- AOMMIN(gf_group->layer_depth[gf_group->index], MAX_ARF_LAYERS - 1);
+ AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
const double last_frame_ysse =
get_layer_value(cpi->vmaf_info.last_frame_ysse, layer_depth);
const double last_frame_vmaf =
@@ -1065,7 +928,7 @@ int av1_get_vmaf_base_qindex(const AV1_COMP *const cpi, int current_qindex) {
}
YV12_BUFFER_CONFIG *cur_buf = cpi->source;
if (cm->show_frame == 0) {
- const int src_index = gf_group->arf_src_offset[gf_group->index];
+ const int src_index = gf_group->arf_src_offset[cpi->gf_frame_index];
struct lookahead_entry *cur_entry = av1_lookahead_peek(
cpi->ppi->lookahead, src_index, cpi->compressor_stage);
cur_buf = &cur_entry->img;
@@ -1084,7 +947,8 @@ int av1_get_vmaf_base_qindex(const AV1_COMP *const cpi, int current_qindex) {
const double dsse = dvmaf * approx_sse / approx_dvmaf;
const double beta = approx_sse / (dsse + approx_sse);
- const int offset = av1_get_deltaq_offset(cpi, current_qindex, beta);
+ const int offset =
+ av1_get_deltaq_offset(cm->seq_params->bit_depth, current_qindex, beta);
int qindex = current_qindex + offset;
qindex = AOMMIN(qindex, MAXQ);
@@ -1094,23 +958,23 @@ int av1_get_vmaf_base_qindex(const AV1_COMP *const cpi, int current_qindex) {
return qindex;
}
-#if CONFIG_USE_VMAF_RC
static AOM_INLINE double cal_approx_score(
- AV1_COMP *const cpi, VmafContext *vmaf_context, int vmaf_cal_index,
- double src_variance, double new_variance, double src_score,
- YV12_BUFFER_CONFIG *const src, YV12_BUFFER_CONFIG *const recon_sharpened) {
+ AV1_COMP *const cpi, double src_variance, double new_variance,
+ double src_score, YV12_BUFFER_CONFIG *const src,
+ YV12_BUFFER_CONFIG *const recon_sharpened) {
double score;
const uint32_t bit_depth = cpi->td.mb.e_mbd.bd;
- aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, src,
- recon_sharpened, bit_depth, vmaf_cal_index, &score);
+ const bool cal_vmaf_neg =
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
+ aom_calc_vmaf(cpi->vmaf_info.vmaf_model, src, recon_sharpened, bit_depth,
+ cal_vmaf_neg, &score);
return src_variance / new_variance * (score - src_score);
}
static double find_best_frame_unsharp_amount_loop_neg(
- AV1_COMP *const cpi, VmafContext *vmaf_context, double src_variance,
- double base_score, YV12_BUFFER_CONFIG *const src,
- YV12_BUFFER_CONFIG *const recon, YV12_BUFFER_CONFIG *const ref,
- YV12_BUFFER_CONFIG *const src_blurred,
+ AV1_COMP *const cpi, double src_variance, double base_score,
+ YV12_BUFFER_CONFIG *const src, YV12_BUFFER_CONFIG *const recon,
+ YV12_BUFFER_CONFIG *const ref, YV12_BUFFER_CONFIG *const src_blurred,
YV12_BUFFER_CONFIG *const recon_blurred,
YV12_BUFFER_CONFIG *const src_sharpened,
YV12_BUFFER_CONFIG *const recon_sharpened, FULLPEL_MV *mvs,
@@ -1120,7 +984,6 @@ static double find_best_frame_unsharp_amount_loop_neg(
int loop_count = 0;
double approx_score = best_score;
double unsharp_amount = unsharp_amount_start;
- int vmaf_cal_index = 3;
do {
best_score = approx_score;
@@ -1130,9 +993,8 @@ static double find_best_frame_unsharp_amount_loop_neg(
unsharp(cpi, src, src_blurred, src_sharpened, unsharp_amount);
const double new_variance =
residual_frame_average_variance(cpi, src_sharpened, ref, mvs);
- approx_score =
- cal_approx_score(cpi, vmaf_context, vmaf_cal_index++, src_variance,
- new_variance, base_score, src, recon_sharpened);
+ approx_score = cal_approx_score(cpi, src_variance, new_variance, base_score,
+ src, recon_sharpened);
loop_count++;
} while (approx_score > best_score && loop_count < max_loop_count);
@@ -1143,11 +1005,11 @@ static double find_best_frame_unsharp_amount_loop_neg(
}
static double find_best_frame_unsharp_amount_neg(
- AV1_COMP *const cpi, VmafContext *vmaf_context,
- YV12_BUFFER_CONFIG *const src, YV12_BUFFER_CONFIG *const recon,
- YV12_BUFFER_CONFIG *const ref, double base_score,
- const double unsharp_amount_start, const double step_size,
- const int max_loop_count, const double max_filter_amount) {
+ AV1_COMP *const cpi, YV12_BUFFER_CONFIG *const src,
+ YV12_BUFFER_CONFIG *const recon, YV12_BUFFER_CONFIG *const ref,
+ double base_score, const double unsharp_amount_start,
+ const double step_size, const int max_loop_count,
+ const double max_filter_amount) {
FULLPEL_MV *mvs = NULL;
const double src_variance =
residual_frame_average_variance(cpi, src, ref, mvs);
@@ -1156,22 +1018,28 @@ static double find_best_frame_unsharp_amount_neg(
const int width = recon->y_width;
const int height = recon->y_height;
const int bit_depth = cpi->td.mb.e_mbd.bd;
+ const int ss_x = recon->subsampling_x;
+ const int ss_y = recon->subsampling_y;
+
YV12_BUFFER_CONFIG src_blurred, recon_blurred, src_sharpened, recon_sharpened;
memset(&recon_sharpened, 0, sizeof(recon_sharpened));
memset(&src_sharpened, 0, sizeof(src_sharpened));
memset(&recon_blurred, 0, sizeof(recon_blurred));
memset(&src_blurred, 0, sizeof(src_blurred));
+ aom_alloc_frame_buffer(&recon_sharpened, width, height, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth,
+ cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
+ aom_alloc_frame_buffer(&src_sharpened, width, height, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth,
+ cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
+ aom_alloc_frame_buffer(&recon_blurred, width, height, ss_x, ss_y,
+ cm->seq_params->use_highbitdepth,
+ cpi->oxcf.border_in_pixels,
+ cm->features.byte_alignment);
aom_alloc_frame_buffer(
- &recon_sharpened, width, height, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
- aom_alloc_frame_buffer(
- &src_sharpened, width, height, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
- aom_alloc_frame_buffer(
- &recon_blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth,
- cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
- aom_alloc_frame_buffer(
- &src_blurred, width, height, 1, 1, cm->seq_params.use_highbitdepth,
+ &src_blurred, width, height, ss_x, ss_y, cm->seq_params->use_highbitdepth,
cpi->oxcf.border_in_pixels, cm->features.byte_alignment);
gaussian_blur(bit_depth, recon, &recon_blurred);
@@ -1181,32 +1049,28 @@ static double find_best_frame_unsharp_amount_neg(
unsharp(cpi, src, &src_blurred, &src_sharpened, unsharp_amount_start);
const double variance_start =
residual_frame_average_variance(cpi, &src_sharpened, ref, mvs);
- const double score_start =
- cal_approx_score(cpi, vmaf_context, 1, src_variance, variance_start,
- base_score, src, &recon_sharpened);
+ const double score_start = cal_approx_score(
+ cpi, src_variance, variance_start, base_score, src, &recon_sharpened);
const double unsharp_amount_next = unsharp_amount_start + step_size;
unsharp(cpi, recon, &recon_blurred, &recon_sharpened, unsharp_amount_next);
unsharp(cpi, src, &src_blurred, &src_sharpened, unsharp_amount_next);
const double variance_next =
residual_frame_average_variance(cpi, &src_sharpened, ref, mvs);
- const double score_next =
- cal_approx_score(cpi, vmaf_context, 2, src_variance, variance_next,
- base_score, src, &recon_sharpened);
+ const double score_next = cal_approx_score(cpi, src_variance, variance_next,
+ base_score, src, &recon_sharpened);
double unsharp_amount;
if (score_next > score_start) {
unsharp_amount = find_best_frame_unsharp_amount_loop_neg(
- cpi, vmaf_context, src_variance, base_score, src, recon, ref,
- &src_blurred, &recon_blurred, &src_sharpened, &recon_sharpened, mvs,
- score_next, unsharp_amount_next, step_size, max_loop_count,
- max_filter_amount);
+ cpi, src_variance, base_score, src, recon, ref, &src_blurred,
+ &recon_blurred, &src_sharpened, &recon_sharpened, mvs, score_next,
+ unsharp_amount_next, step_size, max_loop_count, max_filter_amount);
} else {
unsharp_amount = find_best_frame_unsharp_amount_loop_neg(
- cpi, vmaf_context, src_variance, base_score, src, recon, ref,
- &src_blurred, &recon_blurred, &src_sharpened, &recon_sharpened, mvs,
- score_start, unsharp_amount_start, -step_size, max_loop_count,
- max_filter_amount);
+ cpi, src_variance, base_score, src, recon, ref, &src_blurred,
+ &recon_blurred, &src_sharpened, &recon_sharpened, mvs, score_start,
+ unsharp_amount_start, -step_size, max_loop_count, max_filter_amount);
}
aom_free_frame_buffer(&recon_sharpened);
@@ -1216,29 +1080,21 @@ static double find_best_frame_unsharp_amount_neg(
aom_free(mvs);
return unsharp_amount;
}
-#endif // CONFIG_USE_VMAF_RC
void av1_update_vmaf_curve(AV1_COMP *cpi) {
YV12_BUFFER_CONFIG *source = cpi->source;
YV12_BUFFER_CONFIG *recon = &cpi->common.cur_frame->buf;
const int bit_depth = cpi->td.mb.e_mbd.bd;
- const GF_GROUP *const gf_group = &cpi->gf_group;
+ const GF_GROUP *const gf_group = &cpi->ppi->gf_group;
const int layer_depth =
- AOMMIN(gf_group->layer_depth[gf_group->index], MAX_ARF_LAYERS - 1);
-#if CONFIG_USE_VMAF_RC
+ AOMMIN(gf_group->layer_depth[cpi->gf_frame_index], MAX_ARF_LAYERS - 1);
double base_score;
- VmafContext *vmaf_context;
- aom_init_vmaf_context_rc(
- &vmaf_context, cpi->vmaf_info.vmaf_model,
- cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN);
- aom_calc_vmaf_at_index_rc(vmaf_context, cpi->vmaf_info.vmaf_model, source,
- recon, bit_depth, 0, &base_score);
+ const bool cal_vmaf_neg =
+ cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN;
+ aom_calc_vmaf(cpi->vmaf_info.vmaf_model, source, recon, bit_depth,
+ cal_vmaf_neg, &base_score);
cpi->vmaf_info.last_frame_vmaf[layer_depth] = base_score;
-#else
- aom_calc_vmaf(cpi->oxcf.tune_cfg.vmaf_model_path, source, recon, bit_depth,
- &cpi->vmaf_info.last_frame_vmaf[layer_depth]);
-#endif // CONFIG_USE_VMAF_RC
- if (cpi->common.seq_params.use_highbitdepth) {
+ if (cpi->common.seq_params->use_highbitdepth) {
assert(source->flags & YV12_FLAG_HIGHBITDEPTH);
assert(recon->flags & YV12_FLAG_HIGHBITDEPTH);
cpi->vmaf_info.last_frame_ysse[layer_depth] =
@@ -1248,7 +1104,6 @@ void av1_update_vmaf_curve(AV1_COMP *cpi) {
(double)aom_get_y_sse(source, recon);
}
-#if CONFIG_USE_VMAF_RC
if (cpi->oxcf.tune_cfg.tuning == AOM_TUNE_VMAF_NEG_MAX_GAIN) {
YV12_BUFFER_CONFIG *last, *next;
get_neighbor_frames(cpi, &last, &next);
@@ -1256,10 +1111,8 @@ void av1_update_vmaf_curve(AV1_COMP *cpi) {
get_layer_value(cpi->vmaf_info.last_frame_unsharp_amount, layer_depth);
const int max_loop_count = 5;
cpi->vmaf_info.last_frame_unsharp_amount[layer_depth] =
- find_best_frame_unsharp_amount_neg(
- cpi, vmaf_context, source, recon, last, base_score,
- best_unsharp_amount_start, 0.025, max_loop_count, 1.01);
+ find_best_frame_unsharp_amount_neg(cpi, source, recon, last, base_score,
+ best_unsharp_amount_start, 0.025,
+ max_loop_count, 1.01);
}
- aom_close_vmaf_context_rc(vmaf_context);
-#endif // CONFIG_USE_VMAF_RC
}
diff --git a/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.h b/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.h
index 01c3068bf0..4625fb9061 100644
--- a/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.h
+++ b/third_party/libaom/source/libaom/av1/encoder/tune_vmaf.h
@@ -36,10 +36,8 @@ typedef struct {
// Stores the origial qindex before scaling.
int original_qindex;
-#if CONFIG_USE_VMAF_RC
// VMAF model used in VMAF caculations.
VmafModel *vmaf_model;
-#endif
} TuneVMAFInfo;
typedef struct AV1_COMP AV1_COMP;
@@ -48,9 +46,7 @@ void av1_vmaf_blk_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source);
void av1_vmaf_frame_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source);
-#ifdef CONFIG_USE_VMAF_RC
void av1_vmaf_neg_preprocessing(AV1_COMP *cpi, YV12_BUFFER_CONFIG *source);
-#endif
void av1_set_mb_vmaf_rdmult_scaling(AV1_COMP *cpi);
diff --git a/third_party/libaom/source/libaom/av1/encoder/tx_search.c b/third_party/libaom/source/libaom/av1/encoder/tx_search.c
index 30aac0a349..e65b70f788 100644
--- a/third_party/libaom/source/libaom/av1/encoder/tx_search.c
+++ b/third_party/libaom/source/libaom/av1/encoder/tx_search.c
@@ -618,7 +618,7 @@ static AOM_INLINE void get_energy_distribution_fine(
assert(bw <= 32);
assert(bh <= 32);
assert(((bw - 1) >> w_shift) + (((bh - 1) >> h_shift) << 2) == 15);
- if (cpi->common.seq_params.use_highbitdepth) {
+ if (cpi->common.seq_params->use_highbitdepth) {
const uint16_t *src16 = CONVERT_TO_SHORTPTR(src);
const uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst);
for (int i = 0; i < bh; ++i)
@@ -643,43 +643,43 @@ static AOM_INLINE void get_energy_distribution_fine(
const BLOCK_SIZE subsize = (BLOCK_SIZE)f_index;
assert(block_size_wide[bsize] == 4 * block_size_wide[subsize]);
assert(block_size_high[bsize] == 4 * block_size_high[subsize]);
- cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[0]);
- cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
- &esq[1]);
- cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
- &esq[2]);
- cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
- dst_stride, &esq[3]);
+ cpi->ppi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[0]);
+ cpi->ppi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4,
+ dst_stride, &esq[1]);
+ cpi->ppi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2,
+ dst_stride, &esq[2]);
+ cpi->ppi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
+ dst_stride, &esq[3]);
src += bh / 4 * src_stride;
dst += bh / 4 * dst_stride;
- cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[4]);
- cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
- &esq[5]);
- cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
- &esq[6]);
- cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
- dst_stride, &esq[7]);
+ cpi->ppi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[4]);
+ cpi->ppi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4,
+ dst_stride, &esq[5]);
+ cpi->ppi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2,
+ dst_stride, &esq[6]);
+ cpi->ppi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
+ dst_stride, &esq[7]);
src += bh / 4 * src_stride;
dst += bh / 4 * dst_stride;
- cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[8]);
- cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
- &esq[9]);
- cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
- &esq[10]);
- cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
- dst_stride, &esq[11]);
+ cpi->ppi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[8]);
+ cpi->ppi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4,
+ dst_stride, &esq[9]);
+ cpi->ppi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2,
+ dst_stride, &esq[10]);
+ cpi->ppi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
+ dst_stride, &esq[11]);
src += bh / 4 * src_stride;
dst += bh / 4 * dst_stride;
- cpi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[12]);
- cpi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4, dst_stride,
- &esq[13]);
- cpi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2, dst_stride,
- &esq[14]);
- cpi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
- dst_stride, &esq[15]);
+ cpi->ppi->fn_ptr[subsize].vf(src, src_stride, dst, dst_stride, &esq[12]);
+ cpi->ppi->fn_ptr[subsize].vf(src + bw / 4, src_stride, dst + bw / 4,
+ dst_stride, &esq[13]);
+ cpi->ppi->fn_ptr[subsize].vf(src + bw / 2, src_stride, dst + bw / 2,
+ dst_stride, &esq[14]);
+ cpi->ppi->fn_ptr[subsize].vf(src + 3 * bw / 4, src_stride, dst + 3 * bw / 4,
+ dst_stride, &esq[15]);
}
double total = (double)esq[0] + esq[1] + esq[2] + esq[3] + esq[4] + esq[5] +
@@ -769,13 +769,13 @@ static AOM_INLINE void get_2x2_normalized_sses_and_sads(
if (sse_norm_arr) {
unsigned int this_sse;
- cpi->fn_ptr[tx_bsize_half].vf(this_src, src_stride, this_dst,
- dst_stride, &this_sse);
+ cpi->ppi->fn_ptr[tx_bsize_half].vf(this_src, src_stride, this_dst,
+ dst_stride, &this_sse);
sse_norm_arr[row * 2 + col] = (double)this_sse / num_samples_half;
}
if (sad_norm_arr) {
- const unsigned int this_sad = cpi->fn_ptr[tx_bsize_half].sdf(
+ const unsigned int this_sad = cpi->ppi->fn_ptr[tx_bsize_half].sdf(
this_src, src_stride, this_dst, dst_stride);
sad_norm_arr[row * 2 + col] = (double)this_sad / num_samples_half;
}
@@ -832,11 +832,11 @@ static AOM_INLINE void PrintTransformUnitStats(
const uint8_t *const dst =
&pd->dst.buf[(blk_row * dst_stride + blk_col) << MI_SIZE_LOG2];
unsigned int sse;
- cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
+ cpi->ppi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
const double sse_norm = (double)sse / num_samples;
const unsigned int sad =
- cpi->fn_ptr[tx_bsize].sdf(src, src_stride, dst, dst_stride);
+ cpi->ppi->fn_ptr[tx_bsize].sdf(src, src_stride, dst, dst_stride);
const double sad_norm = (double)sad / num_samples;
fprintf(fout, " %g %g", sse_norm, sad_norm);
@@ -905,8 +905,8 @@ static int64_t get_sse(const AV1_COMP *cpi, const MACROBLOCK *x) {
if (x->skip_chroma_rd && plane) continue;
- cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride,
- &sse);
+ cpi->ppi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf,
+ pd->dst.stride, &sse);
total_sse += sse;
}
total_sse <<= 4;
@@ -1030,7 +1030,7 @@ static AOM_INLINE void PrintPredictionUnitStats(const AV1_COMP *const cpi,
const double sse_norm = (double)sse / num_samples;
const unsigned int sad =
- cpi->fn_ptr[plane_bsize].sdf(src, src_stride, dst, dst_stride);
+ cpi->ppi->fn_ptr[plane_bsize].sdf(src, src_stride, dst, dst_stride);
const double sad_norm =
(double)sad / (1 << num_pels_log2_lookup[plane_bsize]);
@@ -1183,7 +1183,7 @@ static unsigned pixel_dist_visible_only(
unsigned sse;
if (txb_rows == visible_rows && txb_cols == visible_cols) {
- cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
+ cpi->ppi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse);
return sse;
}
@@ -2024,9 +2024,15 @@ get_tx_mask(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
assert(plane == 0);
allowed_tx_mask = ext_tx_used_flag;
int num_allowed = 0;
- const FRAME_UPDATE_TYPE update_type = get_frame_update_type(&cpi->gf_group);
- const int *tx_type_probs =
- cpi->frame_probs.tx_type_probs[update_type][tx_size];
+ const FRAME_UPDATE_TYPE update_type =
+ get_frame_update_type(&cpi->ppi->gf_group, cpi->gf_frame_index);
+ int *tx_type_probs;
+#if CONFIG_FRAME_PARALLEL_ENCODE
+ tx_type_probs =
+ (int *)cpi->ppi->temp_frame_probs.tx_type_probs[update_type][tx_size];
+#else
+ tx_type_probs = (int *)cpi->frame_probs.tx_type_probs[update_type][tx_size];
+#endif
int i;
if (cpi->sf.tx_sf.tx_type_search.prune_tx_type_using_stats) {
@@ -2097,25 +2103,8 @@ get_tx_mask(const AV1_COMP *cpi, MACROBLOCK *x, int plane, int block,
#if CONFIG_RD_DEBUG
static INLINE void update_txb_coeff_cost(RD_STATS *rd_stats, int plane,
- TX_SIZE tx_size, int blk_row,
- int blk_col, int txb_coeff_cost) {
- (void)blk_row;
- (void)blk_col;
- (void)tx_size;
+ int txb_coeff_cost) {
rd_stats->txb_coeff_cost[plane] += txb_coeff_cost;
-
- {
- const int txb_h = tx_size_high_unit[tx_size];
- const int txb_w = tx_size_wide_unit[tx_size];
- int idx, idy;
- for (idy = 0; idy < txb_h; ++idy)
- for (idx = 0; idx < txb_w; ++idx)
- rd_stats->txb_coeff_cost_map[plane][blk_row + idy][blk_col + idx] = 0;
-
- rd_stats->txb_coeff_cost_map[plane][blk_row][blk_col] = txb_coeff_cost;
- }
- assert(blk_row < TXB_COEFF_COST_MAP_SIZE);
- assert(blk_col < TXB_COEFF_COST_MAP_SIZE);
}
#endif
@@ -2674,8 +2663,7 @@ static AOM_INLINE void try_tx_block_no_split(
RDCOST(x->rdmult, zero_blk_rate, rd_stats->sse));
if (pick_skip_txfm) {
#if CONFIG_RD_DEBUG
- update_txb_coeff_cost(rd_stats, 0, tx_size, blk_row, blk_col,
- zero_blk_rate - rd_stats->rate);
+ update_txb_coeff_cost(rd_stats, 0, zero_blk_rate - rd_stats->rate);
#endif // CONFIG_RD_DEBUG
rd_stats->rate = zero_blk_rate;
rd_stats->dist = rd_stats->sse;
@@ -2720,11 +2708,12 @@ static AOM_INLINE void try_tx_block_split(
x->mode_costs.txfm_partition_cost[txfm_partition_ctx][1];
for (int r = 0, blk_idx = 0; r < txb_height; r += sub_txb_height) {
+ const int offsetr = blk_row + r;
+ if (offsetr >= max_blocks_high) break;
for (int c = 0; c < txb_width; c += sub_txb_width, ++blk_idx) {
assert(blk_idx < 4);
- const int offsetr = blk_row + r;
const int offsetc = blk_col + c;
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
+ if (offsetc >= max_blocks_wide) continue;
RD_STATS this_rd_stats;
int this_cost_valid = 1;
@@ -3173,8 +3162,7 @@ static AOM_INLINE void block_rd_txfm(int plane, int block, int blk_row,
}
#if CONFIG_RD_DEBUG
- update_txb_coeff_cost(&this_rd_stats, plane, tx_size, blk_row, blk_col,
- this_rd_stats.rate);
+ update_txb_coeff_cost(&this_rd_stats, plane, this_rd_stats.rate);
#endif // CONFIG_RD_DEBUG
av1_set_txb_context(x, plane, block, tx_size, a, l);
@@ -3452,15 +3440,18 @@ static AOM_INLINE void tx_block_yrd(
const int txb_width = tx_size_wide_unit[sub_txs];
const int txb_height = tx_size_high_unit[sub_txs];
const int step = txb_height * txb_width;
+ const int row_end =
+ AOMMIN(tx_size_high_unit[tx_size], max_blocks_high - blk_row);
+ const int col_end =
+ AOMMIN(tx_size_wide_unit[tx_size], max_blocks_wide - blk_col);
RD_STATS pn_rd_stats;
int64_t this_rd = 0;
assert(txb_width > 0 && txb_height > 0);
- for (int row = 0; row < tx_size_high_unit[tx_size]; row += txb_height) {
- for (int col = 0; col < tx_size_wide_unit[tx_size]; col += txb_width) {
- const int offsetr = blk_row + row;
+ for (int row = 0; row < row_end; row += txb_height) {
+ const int offsetr = blk_row + row;
+ for (int col = 0; col < col_end; col += txb_width) {
const int offsetc = blk_col + col;
- if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
av1_init_rd_stats(&pn_rd_stats);
tx_block_yrd(cpi, x, offsetr, offsetc, block, sub_txs, plane_bsize,
diff --git a/third_party/libaom/source/libaom/av1/encoder/txb_rdopt.c b/third_party/libaom/source/libaom/av1/encoder/txb_rdopt.c
index 31b86abe64..884d0a9e8b 100644
--- a/third_party/libaom/source/libaom/av1/encoder/txb_rdopt.c
+++ b/third_party/libaom/source/libaom/av1/encoder/txb_rdopt.c
@@ -327,16 +327,8 @@ int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
const LV_MAP_EOB_COST *txb_eob_costs =
&coeff_costs->eob_costs[eob_multi_size][plane_type];
- const int rshift =
- (sharpness +
- (cpi->oxcf.q_cfg.aq_mode == VARIANCE_AQ && mbmi->segment_id < 4
- ? 7 - mbmi->segment_id
- : 2) +
- (cpi->oxcf.q_cfg.aq_mode != VARIANCE_AQ &&
- cpi->oxcf.q_cfg.deltaq_mode == DELTA_Q_PERCEPTUAL &&
- cm->delta_q_info.delta_q_present_flag && x->sb_energy_level < 0
- ? (3 - x->sb_energy_level)
- : 0));
+ const int rshift = sharpness + 2;
+
const int64_t rdmult =
(((int64_t)x->rdmult *
(plane_rd_mult[is_inter][plane_type] << (2 * (xd->bd - 8)))) +
diff --git a/third_party/libaom/source/libaom/av1/encoder/txb_rdopt.h b/third_party/libaom/source/libaom/av1/encoder/txb_rdopt.h
index e86caaa06e..70b322a2e1 100644
--- a/third_party/libaom/source/libaom/av1/encoder/txb_rdopt.h
+++ b/third_party/libaom/source/libaom/av1/encoder/txb_rdopt.h
@@ -44,11 +44,11 @@ extern "C" {
* skip flag (tx_skip) and the sign of DC coefficient (dc_sign).
* \param[out] rate_cost The entropy cost of coding the transform block
* after adjustment of coefficients.
- * \param[in] sharpness When sharpness == 1, the function will be less
- * aggressive toward lowering the magnitude of coefficients.
+ * \param[in] sharpness When sharpness > 0, the function will be less
+ * aggressive towards lowering the magnitude of coefficients.
* In this way, the transform block will contain more high-frequency
- coefficients
- * and therefore preserve the sharpness of the reconstructed block.
+ * coefficients and therefore will preserve the sharpness of the reconstructed
+ * block.
*/
int av1_optimize_txb(const struct AV1_COMP *cpi, MACROBLOCK *x, int plane,
int block, TX_SIZE tx_size, TX_TYPE tx_type,
diff --git a/third_party/libaom/source/libaom/av1/encoder/var_based_part.c b/third_party/libaom/source/libaom/av1/encoder/var_based_part.c
index a42be4553f..8907d0d0ba 100644
--- a/third_party/libaom/source/libaom/av1/encoder/var_based_part.c
+++ b/third_party/libaom/source/libaom/av1/encoder/var_based_part.c
@@ -341,7 +341,7 @@ static int64_t scale_part_thresh_content(int64_t threshold_base, int speed,
static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
int q, int content_lowsumdiff,
- int segment_id) {
+ int source_sad, int segment_id) {
AV1_COMMON *const cm = &cpi->common;
const int is_key_frame = frame_is_intra_only(cm);
const int threshold_multiplier = is_key_frame ? 120 : 1;
@@ -394,7 +394,6 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
scale_part_thresh_content(threshold_base, cpi->oxcf.speed, cm->width,
cm->height, cpi->svc.non_reference_frame);
#endif
-
thresholds[0] = threshold_base >> 1;
thresholds[1] = threshold_base;
thresholds[3] = threshold_base << cpi->oxcf.speed;
@@ -436,20 +435,45 @@ static AOM_INLINE void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[],
thresholds[2] = (5 * threshold_base) >> 1;
}
if (cpi->sf.rt_sf.force_large_partition_blocks) {
+ double weight;
+ const int win = 20;
+ if (current_qindex < QINDEX_LARGE_BLOCK_THR - win)
+ weight = 1.0;
+ else if (current_qindex > QINDEX_LARGE_BLOCK_THR + win)
+ weight = 0.0;
+ else
+ weight =
+ 1.0 - (current_qindex - QINDEX_LARGE_BLOCK_THR + win) / (2 * win);
+ if (cm->width * cm->height > 640 * 480) {
+ for (int i = 0; i < 4; i++) {
+ thresholds[i] <<= 1;
+ }
+ }
if (cm->width * cm->height <= 352 * 288) {
thresholds[1] <<= 2;
thresholds[2] <<= 5;
thresholds[3] = INT32_MAX;
- } else if (cm->width * cm->height > 640 * 480 && segment_id == 0) {
+ // Condition the increase of partition thresholds on the segment
+ // and the content. Avoid the increase for superblocks which have
+ // high source sad, unless the whole frame has very high motion
+ // (i.e, cpi->rc.avg_source_sad is very large, in which case all blocks
+ // have high source sad).
+ } else if (cm->width * cm->height > 640 * 480 && segment_id == 0 &&
+ (source_sad != kHighSad || cpi->rc.avg_source_sad > 50000)) {
thresholds[0] = (3 * thresholds[0]) >> 1;
thresholds[3] = INT32_MAX;
- if (current_qindex >= QINDEX_LARGE_BLOCK_THR) {
- thresholds[1] <<= 1;
- thresholds[2] <<= 1;
+ if (current_qindex > QINDEX_LARGE_BLOCK_THR) {
+ thresholds[1] = (int)((1 - weight) * (thresholds[1] << 1) +
+ weight * thresholds[1]);
+ thresholds[2] = (int)((1 - weight) * (thresholds[2] << 1) +
+ weight * thresholds[2]);
}
- } else if (current_qindex > QINDEX_LARGE_BLOCK_THR && segment_id == 0) {
- thresholds[1] <<= 2;
- thresholds[2] <<= 5;
+ } else if (current_qindex > QINDEX_LARGE_BLOCK_THR && segment_id == 0 &&
+ (source_sad != kHighSad || cpi->rc.avg_source_sad > 50000)) {
+ thresholds[1] =
+ (int)((1 - weight) * (thresholds[1] << 2) + weight * thresholds[1]);
+ thresholds[2] =
+ (int)((1 - weight) * (thresholds[2] << 4) + weight * thresholds[2]);
thresholds[3] = INT32_MAX;
}
}
@@ -605,7 +629,7 @@ static AOM_INLINE void set_low_temp_var_flag(
xd->mi[0]->mv[0].as_mv.col > -mv_thr &&
xd->mi[0]->mv[0].as_mv.row < mv_thr &&
xd->mi[0]->mv[0].as_mv.row > -mv_thr))) {
- const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64);
+ const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
if (is_small_sb)
set_low_temp_var_flag_64x64(&cm->mi_params, part_info, xd,
&(vt->split[0]), thresholds, mi_col, mi_row);
@@ -621,7 +645,8 @@ void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
if (sf->part_sf.partition_search_type != VAR_BASED_PARTITION) {
return;
} else {
- set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, q, content_lowsumdiff, 0);
+ set_vbp_thresholds(cpi, cpi->vbp_info.thresholds, q, content_lowsumdiff, 0,
+ 0);
// The threshold below is not changed locally.
cpi->vbp_info.threshold_minmax = 15 + (q >> 3);
}
@@ -643,10 +668,17 @@ static AOM_INLINE void chroma_check(AV1_COMP *cpi, MACROBLOCK *x,
get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
if (bs != BLOCK_INVALID)
- uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
- pd->dst.stride);
-
- x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
+ uv_sad = cpi->ppi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf,
+ pd->dst.stride);
+
+ if (uv_sad > (y_sad >> 1))
+ x->color_sensitivity_sb[i - 1] = 1;
+ else if (uv_sad < (y_sad >> 3))
+ x->color_sensitivity_sb[i - 1] = 0;
+ // Borderline case: to be refined at coding block level in nonrd_pickmode,
+ // for coding block size < sb_size.
+ else
+ x->color_sensitivity_sb[i - 1] = 2;
}
}
@@ -658,7 +690,7 @@ static void fill_variance_tree_leaves(
AV1_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
const int is_key_frame = frame_is_intra_only(cm);
- const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64);
+ const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
const int num_64x64_blocks = is_small_sb ? 1 : 4;
// TODO(kyslov) Bring back compute_minmax_variance with content type detection
const int compute_minmax_variance = 0;
@@ -772,7 +804,7 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
const int num_planes = av1_num_planes(cm);
- const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64);
+ const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
BLOCK_SIZE bsize = is_small_sb ? BLOCK_64X64 : BLOCK_128X128;
// TODO(kyslov): we are assuming that the ref is LAST_FRAME! Check if it
// is!!
@@ -783,13 +815,13 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
// For non-SVC GOLDEN is another temporal reference. Check if it should be
// used as reference for partitioning.
- if (!cpi->use_svc && (cpi->ref_frame_flags & AOM_GOLD_FLAG) &&
+ if (!cpi->ppi->use_svc && (cpi->ref_frame_flags & AOM_GOLD_FLAG) &&
cpi->sf.rt_sf.use_nonrd_pick_mode) {
yv12_g = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
if (yv12_g && yv12_g != yv12) {
av1_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
get_ref_scale_factors(cm, GOLDEN_FRAME), num_planes);
- *y_sad_g = cpi->fn_ptr[bsize].sdf(
+ *y_sad_g = cpi->ppi->fn_ptr[bsize].sdf(
x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
xd->plane[0].pre[0].stride);
}
@@ -799,20 +831,20 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
get_ref_scale_factors(cm, LAST_FRAME), num_planes);
mi->ref_frame[0] = LAST_FRAME;
mi->ref_frame[1] = NONE_FRAME;
- mi->bsize = cm->seq_params.sb_size;
+ mi->bsize = cm->seq_params->sb_size;
mi->mv[0].as_int = 0;
mi->interp_filters = av1_broadcast_interp_filter(BILINEAR);
if (cpi->sf.rt_sf.estimate_motion_for_var_based_partition) {
if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
const MV dummy_mv = { 0, 0 };
- *y_sad = av1_int_pro_motion_estimation(cpi, x, cm->seq_params.sb_size,
+ *y_sad = av1_int_pro_motion_estimation(cpi, x, cm->seq_params->sb_size,
mi_row, mi_col, &dummy_mv);
}
}
if (*y_sad == UINT_MAX) {
- *y_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride,
- xd->plane[0].pre[0].buf,
- xd->plane[0].pre[0].stride);
+ *y_sad = cpi->ppi->fn_ptr[bsize].sdf(
+ x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf,
+ xd->plane[0].pre[0].stride);
}
// Pick the ref frame for partitioning, use golden frame only if its
@@ -834,7 +866,7 @@ static void setup_planes(AV1_COMP *cpi, MACROBLOCK *x, unsigned int *y_sad,
set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
- cm->seq_params.sb_size, AOM_PLANE_Y,
+ cm->seq_params->sb_size, AOM_PLANE_Y,
AOM_PLANE_Y);
}
@@ -869,12 +901,12 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
int is_key_frame =
(frame_is_intra_only(cm) ||
- (cpi->use_svc &&
+ (cpi->ppi->use_svc &&
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame));
- assert(cm->seq_params.sb_size == BLOCK_64X64 ||
- cm->seq_params.sb_size == BLOCK_128X128);
- const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64);
+ assert(cm->seq_params->sb_size == BLOCK_64X64 ||
+ cm->seq_params->sb_size == BLOCK_128X128);
+ const int is_small_sb = (cm->seq_params->sb_size == BLOCK_64X64);
const int num_64x64_blocks = is_small_sb ? 1 : 4;
unsigned int y_sad = UINT_MAX;
@@ -900,10 +932,12 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
cyclic_refresh_segment_id_boosted(segment_id) &&
cpi->sf.rt_sf.use_nonrd_pick_mode) {
int q = av1_get_qindex(&cm->seg, segment_id, cm->quant_params.base_qindex);
- set_vbp_thresholds(cpi, thresholds, q, x->content_state_sb.low_sumdiff, 1);
+ set_vbp_thresholds(cpi, thresholds, q, x->content_state_sb.low_sumdiff,
+ x->content_state_sb.source_sad, 1);
} else {
set_vbp_thresholds(cpi, thresholds, cm->quant_params.base_qindex,
- x->content_state_sb.low_sumdiff, 0);
+ x->content_state_sb.low_sumdiff,
+ x->content_state_sb.source_sad, 0);
}
// For non keyframes, disable 4x4 average for low resolution when speed = 8
@@ -1025,7 +1059,7 @@ int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
if (!is_key_frame &&
(max_var_32x32[m] - min_var_32x32[m]) > 3 * (thresholds[1] >> 3) &&
max_var_32x32[m] > thresholds[1] >> 1 &&
- (noise_level >= kMedium || cpi->use_svc ||
+ (noise_level >= kMedium || cpi->ppi->use_svc ||
cpi->sf.rt_sf.force_large_partition_blocks ||
!cpi->sf.rt_sf.use_nonrd_pick_mode)) {
force_split[1 + m] = 1;
diff --git a/third_party/libaom/source/libaom/av1/encoder/x86/highbd_temporal_filter_avx2.c b/third_party/libaom/source/libaom/av1/encoder/x86/highbd_temporal_filter_avx2.c
index b5477ec9ba..68509fa106 100644
--- a/third_party/libaom/source/libaom/av1/encoder/x86/highbd_temporal_filter_avx2.c
+++ b/third_party/libaom/source/libaom/av1/encoder/x86/highbd_temporal_filter_avx2.c
@@ -352,10 +352,16 @@ void av1_highbd_apply_temporal_filter_avx2(
TF_SEARCH_ERROR_NORM_WEIGHT);
const double weight_factor =
(double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
- // Decay factors for non-local mean approach.
- // Smaller q -> smaller filtering weight.
+ // Adjust filtering based on q.
+ // Larger q -> stronger filtering -> larger weight.
+ // Smaller q -> weaker filtering -> smaller weight.
double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
q_decay = CLIP(q_decay, 1e-5, 1);
+ if (q_factor >= TF_QINDEX_CUTOFF) {
+ // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+ // We do not need a clip here.
+ q_decay = 0.5 * pow((double)q_factor / 64, 2);
+ }
// Smaller strength -> smaller filtering weight.
double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
s_decay = CLIP(s_decay, 1e-5, 1);
@@ -393,6 +399,7 @@ void av1_highbd_apply_temporal_filter_avx2(
const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
// Larger noise -> larger filtering weight.
const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+ // Decay factors for non-local mean approach.
const double decay_factor = 1 / (n_decay * q_decay * s_decay);
// Filter U-plane and V-plane using Y-plane. This is because motion
diff --git a/third_party/libaom/source/libaom/av1/encoder/x86/highbd_temporal_filter_sse2.c b/third_party/libaom/source/libaom/av1/encoder/x86/highbd_temporal_filter_sse2.c
index bbb3771543..1bfdaf72e1 100644
--- a/third_party/libaom/source/libaom/av1/encoder/x86/highbd_temporal_filter_sse2.c
+++ b/third_party/libaom/source/libaom/av1/encoder/x86/highbd_temporal_filter_sse2.c
@@ -227,10 +227,16 @@ void av1_highbd_apply_temporal_filter_sse2(
TF_SEARCH_ERROR_NORM_WEIGHT);
const double weight_factor =
(double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
- // Decay factors for non-local mean approach.
- // Smaller q -> smaller filtering weight.
+ // Adjust filtering based on q.
+ // Larger q -> stronger filtering -> larger weight.
+ // Smaller q -> weaker filtering -> smaller weight.
double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
q_decay = CLIP(q_decay, 1e-5, 1);
+ if (q_factor >= TF_QINDEX_CUTOFF) {
+ // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+ // We do not need a clip here.
+ q_decay = 0.5 * pow((double)q_factor / 64, 2);
+ }
// Smaller strength -> smaller filtering weight.
double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
s_decay = CLIP(s_decay, 1e-5, 1);
@@ -268,6 +274,7 @@ void av1_highbd_apply_temporal_filter_sse2(
const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
// Larger noise -> larger filtering weight.
const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+ // Decay factors for non-local mean approach.
const double decay_factor = 1 / (n_decay * q_decay * s_decay);
// Filter U-plane and V-plane using Y-plane. This is because motion
diff --git a/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_avx2.c b/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_avx2.c
index 72914e1781..8aa07641aa 100644
--- a/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_avx2.c
+++ b/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_avx2.c
@@ -238,10 +238,16 @@ void av1_apply_temporal_filter_avx2(
TF_SEARCH_ERROR_NORM_WEIGHT);
const double weight_factor =
(double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
- // Decay factors for non-local mean approach.
- // Smaller q -> smaller filtering weight.
+ // Adjust filtering based on q.
+ // Larger q -> stronger filtering -> larger weight.
+ // Smaller q -> weaker filtering -> smaller weight.
double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
q_decay = CLIP(q_decay, 1e-5, 1);
+ if (q_factor >= TF_QINDEX_CUTOFF) {
+ // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+ // We do not need a clip here.
+ q_decay = 0.5 * pow((double)q_factor / 64, 2);
+ }
// Smaller strength -> smaller filtering weight.
double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
s_decay = CLIP(s_decay, 1e-5, 1);
@@ -277,6 +283,7 @@ void av1_apply_temporal_filter_avx2(
const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
// Larger noise -> larger filtering weight.
const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+ // Decay factors for non-local mean approach.
const double decay_factor = 1 / (n_decay * q_decay * s_decay);
// Filter U-plane and V-plane using Y-plane. This is because motion
diff --git a/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_sse2.c b/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_sse2.c
index d70792c644..26c3926dca 100644
--- a/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_sse2.c
+++ b/third_party/libaom/source/libaom/av1/encoder/x86/temporal_filter_sse2.c
@@ -215,10 +215,16 @@ void av1_apply_temporal_filter_sse2(
TF_SEARCH_ERROR_NORM_WEIGHT);
const double weight_factor =
(double)TF_WINDOW_BLOCK_BALANCE_WEIGHT * inv_factor;
- // Decay factors for non-local mean approach.
- // Smaller q -> smaller filtering weight.
+ // Adjust filtering based on q.
+ // Larger q -> stronger filtering -> larger weight.
+ // Smaller q -> weaker filtering -> smaller weight.
double q_decay = pow((double)q_factor / TF_Q_DECAY_THRESHOLD, 2);
q_decay = CLIP(q_decay, 1e-5, 1);
+ if (q_factor >= TF_QINDEX_CUTOFF) {
+ // Max q_factor is 255, therefore the upper bound of q_decay is 8.
+ // We do not need a clip here.
+ q_decay = 0.5 * pow((double)q_factor / 64, 2);
+ }
// Smaller strength -> smaller filtering weight.
double s_decay = pow((double)filter_strength / TF_STRENGTH_THRESHOLD, 2);
s_decay = CLIP(s_decay, 1e-5, 1);
@@ -254,6 +260,7 @@ void av1_apply_temporal_filter_sse2(
const double inv_num_ref_pixels = 1.0 / num_ref_pixels;
// Larger noise -> larger filtering weight.
const double n_decay = 0.5 + log(2 * noise_levels[plane] + 5.0);
+ // Decay factors for non-local mean approach.
const double decay_factor = 1 / (n_decay * q_decay * s_decay);
// Filter U-plane and V-plane using Y-plane. This is because motion