diff options
author | Erwin Jansen <jansene@google.com> | 2021-06-30 07:29:26 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2021-06-30 07:29:26 +0000 |
commit | 059cdc5996938f5f6b5343b6c969c12098275587 (patch) | |
tree | 6eacaffe4bebf8e00c290c1e1839e084b0c52e88 /third_party/libaom/source/libaom/av1/encoder/speed_features.c | |
parent | 97e54a7e73c7b24e464ef06ef3c3b3716f21bb15 (diff) | |
parent | 16be34ae72cdb525c88c2b31b21b976f35fe36d8 (diff) | |
download | webrtc-059cdc5996938f5f6b5343b6c969c12098275587.tar.gz |
Merge "Merge upstream-master and enable ARM64" into emu-master-devemu-31-stable-releaseemu-31-release
Diffstat (limited to 'third_party/libaom/source/libaom/av1/encoder/speed_features.c')
-rw-r--r-- | third_party/libaom/source/libaom/av1/encoder/speed_features.c | 274 |
1 files changed, 201 insertions, 73 deletions
diff --git a/third_party/libaom/source/libaom/av1/encoder/speed_features.c b/third_party/libaom/source/libaom/av1/encoder/speed_features.c index 2244aaae91..916a818513 100644 --- a/third_party/libaom/source/libaom/av1/encoder/speed_features.c +++ b/third_party/libaom/source/libaom/av1/encoder/speed_features.c @@ -274,6 +274,20 @@ static void set_allintra_speed_feature_framesize_dependent( sf->part_sf.use_square_partition_only_threshold = BLOCK_16X16; } + + if (speed >= 7) { + if (!is_480p_or_larger) { + sf->rt_sf.nonrd_check_partition_merge_mode = 2; + } + } + + if (speed >= 8) { + // TODO(kyslov): add more speed features to control speed/quality + } + + if (speed >= 9) { + // TODO(kyslov): add more speed features to control speed/quality + } } static void set_allintra_speed_features_framesize_independent( @@ -289,8 +303,11 @@ static void set_allintra_speed_features_framesize_independent( sf->part_sf.prune_part4_search = 2; sf->part_sf.simple_motion_search_prune_rect = 1; sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3; + sf->part_sf.reuse_prev_rd_results_for_part_ab = 1; + sf->part_sf.use_best_rd_for_pruning = 1; sf->intra_sf.intra_pruning_with_hog = 1; + sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF; sf->tx_sf.adaptive_txb_search_level = 1; sf->tx_sf.intra_tx_size_search_init_depth_sqr = 1; @@ -300,7 +317,7 @@ static void set_allintra_speed_features_framesize_independent( sf->rt_sf.use_nonrd_pick_mode = 0; sf->rt_sf.use_real_time_ref_set = 0; - if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION || + if (cpi->ppi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION || cpi->use_screen_content_tools) { sf->mv_sf.exhaustive_searches_thresh = (1 << 20); } else { @@ -318,10 +335,12 @@ static void set_allintra_speed_features_framesize_independent( // speed feature accordingly sf->part_sf.simple_motion_search_split = allow_screen_content_tools ? 1 : 2; sf->part_sf.ml_predict_breakout_level = use_hbd ? 2 : 3; + sf->part_sf.reuse_best_prediction_for_part_ab = 1; sf->mv_sf.exhaustive_searches_thresh <<= 1; sf->intra_sf.prune_palette_search_level = 1; + sf->intra_sf.top_intra_model_count_allowed = 3; sf->tx_sf.adaptive_txb_search_level = 2; sf->tx_sf.inter_tx_size_search_init_depth_rect = 1; @@ -348,6 +367,7 @@ static void set_allintra_speed_features_framesize_independent( sf->intra_sf.disable_smooth_intra = 1; sf->intra_sf.intra_pruning_with_hog = 2; + sf->intra_sf.prune_filter_intra_level = 1; sf->rd_sf.perform_coeff_opt = 3; @@ -397,9 +417,6 @@ static void set_allintra_speed_features_framesize_independent( sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL; sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL; sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL; - sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V; sf->intra_sf.prune_chroma_modes_using_luma_winner = 1; sf->mv_sf.simple_motion_subpel_force_stop = HALF_PEL; @@ -408,7 +425,7 @@ static void set_allintra_speed_features_framesize_independent( sf->tpl_sf.subpel_force_stop = HALF_PEL; sf->tpl_sf.search_method = FAST_BIGDIA; - sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1; + sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2; sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1; sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3; sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 1; @@ -443,9 +460,10 @@ static void set_allintra_speed_features_framesize_independent( } if (speed >= 6) { - sf->intra_sf.disable_filter_intra = 1; + sf->intra_sf.prune_filter_intra_level = 2; sf->intra_sf.chroma_intra_pruning_with_hog = 4; sf->intra_sf.intra_pruning_with_hog = 4; + sf->intra_sf.cfl_search_range = 1; sf->part_sf.prune_rectangular_split_based_on_qidx = allow_screen_content_tools ? 0 : 1; @@ -458,7 +476,7 @@ static void set_allintra_speed_features_framesize_independent( sf->mv_sf.use_bsize_dependent_search_method = 1; - sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2; + sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3; sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0; // Use largest txfm block size for square coding blocks. sf->tx_sf.intra_tx_size_search_init_depth_sqr = 2; @@ -466,10 +484,39 @@ static void set_allintra_speed_features_framesize_independent( sf->rd_sf.perform_coeff_opt = 6; sf->lpf_sf.cdef_pick_method = CDEF_FAST_SEARCH_LVL4; + sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q; sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF; } + if (speed >= 7) { + sf->part_sf.default_min_partition_size = BLOCK_8X8; + sf->part_sf.partition_search_type = VAR_BASED_PARTITION; + + sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q; + + sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH; + sf->rt_sf.use_nonrd_pick_mode = 1; + sf->rt_sf.nonrd_check_partition_merge_mode = 1; + sf->rt_sf.nonrd_check_partition_split = 0; + sf->rt_sf.skip_intra_pred_if_tx_skip = 1; + // Set mask for intra modes. + for (int i = 0; i < BLOCK_SIZES; ++i) + if (i >= BLOCK_32X32) + sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC; + else + // Use DC, H, V intra mode for block sizes < 32X32. + sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V; + } + + if (speed >= 8) { + // TODO(kyslov): add more speed features to control speed/quality + } + + if (speed >= 9) { + // TODO(kyslov): add more speed features to control speed/quality + } + // Intra txb hash is currently not compatible with multi-winner mode as the // hashes got reset during multi-winner mode processing. assert(IMPLIES( @@ -480,6 +527,7 @@ static void set_allintra_speed_features_framesize_independent( static void set_good_speed_feature_framesize_dependent( const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { const AV1_COMMON *const cm = &cpi->common; + const int is_480p_or_lesser = AOMMIN(cm->width, cm->height) <= 480; const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080; @@ -518,7 +566,16 @@ static void set_good_speed_feature_framesize_dependent( sf->mv_sf.use_downsampled_sad = 1; } + if (!is_720p_or_larger) { + const RateControlCfg *const rc_cfg = &cpi->oxcf.rc_cfg; + const int rate_tolerance = + AOMMIN(rc_cfg->under_shoot_pct, rc_cfg->over_shoot_pct); + sf->hl_sf.recode_tolerance = 25 + (rate_tolerance >> 2); + } + if (speed >= 1) { + if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 1; + if (is_720p_or_larger) { sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; } else if (is_480p_or_larger) { @@ -561,6 +618,12 @@ static void set_good_speed_feature_framesize_dependent( } if (is_480p_or_larger) { + sf->inter_sf.disable_interintra_wedge_var_thresh = 100; + } else { + sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX; + } + + if (is_480p_or_larger) { sf->tx_sf.tx_type_search.prune_tx_type_using_stats = 1; if (use_hbd) sf->tx_sf.prune_tx_size_level = 2; } else { @@ -573,6 +636,8 @@ static void set_good_speed_feature_framesize_dependent( } if (speed >= 3) { + sf->inter_sf.skip_newmv_in_drl = 2; + sf->part_sf.ml_early_term_after_part_split_level = 0; if (is_720p_or_larger) { @@ -584,6 +649,10 @@ static void set_good_speed_feature_framesize_dependent( sf->part_sf.partition_search_breakout_rate_thr = 120; } if (use_hbd) sf->tx_sf.prune_tx_size_level = 3; + + if (is_480p_or_larger) sf->intra_sf.top_intra_model_count_allowed = 2; + + sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX; } if (speed >= 4) { @@ -598,11 +667,14 @@ static void set_good_speed_feature_framesize_dependent( } sf->inter_sf.prune_obmc_prob_thresh = INT_MAX; + if (is_480p_or_lesser) sf->inter_sf.skip_newmv_in_drl = 3; if (is_720p_or_larger) sf->hl_sf.recode_tolerance = 32; else sf->hl_sf.recode_tolerance = 55; + + sf->intra_sf.top_intra_model_count_allowed = 2; } if (speed >= 5) { @@ -612,6 +684,8 @@ static void set_good_speed_feature_framesize_dependent( sf->inter_sf.prune_warped_prob_thresh = 8; } if (is_720p_or_larger) sf->hl_sf.recode_tolerance = 40; + + sf->inter_sf.skip_newmv_in_drl = 4; } if (speed >= 6) { @@ -630,7 +704,9 @@ static void set_good_speed_feature_framesize_dependent( } if (!is_720p_or_larger) { - sf->inter_sf.mv_cost_upd_level = 2; + sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW_SET; + sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_SBROW; + sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_SBROW; } if (is_720p_or_larger) { @@ -650,10 +726,10 @@ static void set_good_speed_feature_framesize_dependent( static void set_good_speed_features_framesize_independent( const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { const AV1_COMMON *const cm = &cpi->common; - const GF_GROUP *const gf_group = &cpi->gf_group; + const GF_GROUP *const gf_group = &cpi->ppi->gf_group; const int boosted = frame_is_boosted(cpi); const int is_boosted_arf2_bwd_type = - boosted || gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE; + boosted || gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE; const int allow_screen_content_tools = cm->features.allow_screen_content_tools; const int use_hbd = cpi->oxcf.use_highbitdepth; @@ -670,6 +746,8 @@ static void set_good_speed_features_framesize_independent( sf->part_sf.prune_part4_search = 2; sf->part_sf.simple_motion_search_prune_rect = 1; sf->part_sf.ml_predict_breakout_level = use_hbd ? 1 : 3; + sf->part_sf.reuse_prev_rd_results_for_part_ab = 1; + sf->part_sf.use_best_rd_for_pruning = 1; // TODO(debargha): Test, tweak and turn on either 1 or 2 sf->inter_sf.inter_mode_rd_model_estimation = 1; @@ -698,7 +776,7 @@ static void set_good_speed_features_framesize_independent( sf->rt_sf.use_nonrd_pick_mode = 0; sf->rt_sf.use_real_time_ref_set = 0; - if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION || + if (cpi->ppi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION || cpi->use_screen_content_tools) { sf->mv_sf.exhaustive_searches_thresh = (1 << 20); } else { @@ -725,7 +803,6 @@ static void set_good_speed_features_framesize_independent( sf->mv_sf.use_accurate_subpel_search = USE_4_TAPS; sf->mv_sf.disable_extensive_joint_motion_search = 1; - sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1; sf->inter_sf.prune_comp_search_by_single_result = boosted ? 2 : 1; sf->inter_sf.prune_comp_type_by_comp_avg = 1; sf->inter_sf.prune_comp_type_by_model_rd = boosted ? 0 : 1; @@ -736,7 +813,6 @@ static void set_good_speed_features_framesize_independent( sf->inter_sf.reduce_inter_modes = boosted ? 1 : 3; sf->inter_sf.reuse_inter_intra_mode = 1; sf->inter_sf.selective_ref_frame = 2; - sf->inter_sf.skip_repeated_newmv = 1; sf->interp_sf.use_interp_filter = 1; @@ -766,7 +842,11 @@ static void set_good_speed_features_framesize_independent( if (speed >= 2) { sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF; + sf->fp_sf.skip_motion_search_threshold = 25; + sf->part_sf.allow_partition_search_skip = 1; + sf->part_sf.reuse_best_prediction_for_part_ab = + !frame_is_intra_only(&cpi->common); sf->mv_sf.auto_mv_step_size = 1; sf->mv_sf.simple_motion_subpel_force_stop = QUARTER_PEL; @@ -778,20 +858,21 @@ static void set_good_speed_features_framesize_independent( // bit more closely to figure out why. sf->inter_sf.adaptive_rd_thresh = 1; sf->inter_sf.comp_inter_joint_search_thresh = BLOCK_SIZES_ALL; - sf->inter_sf.disable_interintra_wedge_var_thresh = 100; sf->inter_sf.disable_interinter_wedge_var_thresh = 100; sf->inter_sf.fast_interintra_wedge_search = 1; sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 1; - sf->inter_sf.prune_compound_using_neighbors = 1; + sf->inter_sf.prune_ext_comp_using_neighbors = 1; sf->inter_sf.prune_comp_using_best_single_mode_ref = 2; sf->inter_sf.prune_comp_type_by_comp_avg = 2; - sf->inter_sf.reuse_best_prediction_for_part_ab = 1; sf->inter_sf.selective_ref_frame = 3; sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED; // Enable fast search only for COMPOUND_DIFFWTD type. sf->inter_sf.enable_fast_compound_mode_search = 1; sf->inter_sf.reuse_mask_search_results = 1; sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 1; + sf->inter_sf.disable_interinter_wedge_newmv_search = + is_boosted_arf2_bwd_type ? 0 : 1; + sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 1; // TODO(Sachin): Enable/Enhance this speed feature for speed 2 & 3 sf->interp_sf.adaptive_interp_filter_search = 1; @@ -831,7 +912,8 @@ static void set_good_speed_features_framesize_independent( sf->mv_sf.search_method = DIAMOND; sf->mv_sf.disable_second_mv = 2; - sf->inter_sf.mv_cost_upd_level = 1; + sf->inter_sf.disable_interinter_wedge_newmv_search = boosted ? 0 : 1; + sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW; sf->inter_sf.disable_onesided_comp = 1; // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine // it with cpi->sf.disable_wedge_search_var_thresh. @@ -843,10 +925,11 @@ static void set_good_speed_features_framesize_independent( sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 2; sf->inter_sf.selective_ref_frame = 5; sf->inter_sf.skip_repeated_ref_mv = 1; - sf->inter_sf.skip_repeated_full_newmv = 1; sf->inter_sf.reuse_compound_type_decision = 1; sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2); + sf->inter_sf.enable_fast_wedge_mask_search = 1; + sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 2; sf->interp_sf.adaptive_interp_filter_search = 2; @@ -865,6 +948,8 @@ static void set_good_speed_features_framesize_independent( sf->tx_sf.adaptive_txb_search_level = boosted ? 2 : 3; sf->tx_sf.tx_type_search.use_skip_flag_prediction = 2; sf->tx_sf.use_intra_txb_hash = 1; + sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3; + sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1; // TODO(any): Refactor the code related to following winner mode speed // features @@ -874,10 +959,10 @@ static void set_good_speed_features_framesize_independent( frame_is_intra_only(&cpi->common) ? 0 : 1; sf->winner_mode_sf.enable_winner_mode_for_use_tx_domain_dist = 1; sf->winner_mode_sf.motion_mode_for_winner_cand = - boosted - ? 0 - : gf_group->update_type[gf_group->index] == INTNL_ARF_UPDATE ? 1 - : 2; + boosted ? 0 + : gf_group->update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE + ? 1 + : 2; // TODO(any): evaluate if these lpf features can be moved to speed 2. // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality @@ -889,6 +974,8 @@ static void set_good_speed_features_framesize_independent( } if (speed >= 4) { + sf->gm_sf.prune_zero_mv_with_sse = 1; + sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; sf->part_sf.simple_motion_search_prune_agg = 2; @@ -901,7 +988,7 @@ static void set_good_speed_features_framesize_independent( sf->inter_sf.txfm_rd_gate_level = boosted ? 0 : 3; sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 2; - sf->inter_sf.prune_compound_using_neighbors = 2; + sf->inter_sf.prune_ext_comp_using_neighbors = 2; sf->inter_sf.prune_obmc_prob_thresh = INT_MAX; sf->interp_sf.cb_pred_filter_search = 1; @@ -911,9 +998,10 @@ static void set_good_speed_features_framesize_independent( sf->intra_sf.intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL; sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL; sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL; - sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; - sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; - sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V; + // TODO(any): "intra_y_mode_mask" doesn't help much at speed 4. + // sf->intra_sf.intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; + // sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; + // sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V; // TODO(any): Experiment with this speed feature set to 2 for higher quality // presets as well sf->intra_sf.skip_intra_in_interframe = 2; @@ -923,10 +1011,10 @@ static void set_good_speed_features_framesize_independent( sf->tpl_sf.prune_starting_mv = 2; sf->tpl_sf.subpel_force_stop = HALF_PEL; sf->tpl_sf.search_method = FAST_BIGDIA; + sf->tpl_sf.gop_length_decision_method = 1; - sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 1; + sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2; sf->tx_sf.tx_type_search.fast_intra_tx_type_search = 1; - sf->tx_sf.tx_type_search.prune_2d_txfm_mode = TX_TYPE_PRUNE_3; sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 1; // TODO(any): Experiment with enabling of this speed feature as hash state // is reset during winner mode processing @@ -948,9 +1036,14 @@ static void set_good_speed_features_framesize_independent( } if (speed >= 5) { + sf->fp_sf.reduce_mv_step_param = 4; + sf->part_sf.simple_motion_search_prune_agg = 3; sf->part_sf.ext_partition_eval_thresh = allow_screen_content_tools ? BLOCK_8X8 : BLOCK_16X16; + sf->part_sf.prune_sub_8x8_partition_level = + (allow_screen_content_tools || frame_is_intra_only(&cpi->common)) ? 0 + : 2; sf->inter_sf.disable_interinter_wedge_var_thresh = UINT_MAX; sf->inter_sf.prune_inter_modes_if_skippable = 1; @@ -974,8 +1067,11 @@ static void set_good_speed_features_framesize_independent( sf->tpl_sf.prune_starting_mv = 3; sf->tpl_sf.use_y_only_rate_distortion = 1; sf->tpl_sf.subpel_force_stop = FULL_PEL; + sf->tpl_sf.gop_length_decision_method = 2; sf->winner_mode_sf.dc_blk_pred_level = 1; + + sf->fp_sf.disable_recon = 1; } if (speed >= 6) { @@ -986,9 +1082,14 @@ static void set_good_speed_features_framesize_independent( sf->inter_sf.prune_inter_modes_based_on_tpl = boosted ? 0 : 3; sf->inter_sf.prune_nearmv_using_neighbors = 1; sf->inter_sf.selective_ref_frame = 6; + sf->inter_sf.prune_ext_comp_using_neighbors = 3; sf->intra_sf.chroma_intra_pruning_with_hog = 4; sf->intra_sf.intra_pruning_with_hog = 4; + sf->intra_sf.intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC; + sf->intra_sf.intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC; + sf->intra_sf.intra_y_mode_mask[TX_32X32] = INTRA_DC; + sf->intra_sf.intra_y_mode_mask[TX_64X64] = INTRA_DC; sf->part_sf.prune_rectangular_split_based_on_qidx = boosted || allow_screen_content_tools ? 0 : 1; @@ -1000,10 +1101,10 @@ static void set_good_speed_features_framesize_independent( sf->mv_sf.simple_motion_subpel_force_stop = FULL_PEL; sf->mv_sf.use_bsize_dependent_search_method = 1; - sf->tpl_sf.disable_gop_length_decision = 1; + sf->tpl_sf.gop_length_decision_method = 3; sf->tpl_sf.disable_filtered_key_tpl = 1; - sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 2; + sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 4; sf->tx_sf.use_intra_txb_hash = 1; sf->tx_sf.tx_type_search.prune_tx_type_est_rd = 0; @@ -1052,10 +1153,13 @@ static void set_rt_speed_feature_framesize_dependent(const AV1_COMP *const cpi, #endif } } else { - if (speed == 8 && !cpi->use_svc) { + if (speed == 8 && !cpi->ppi->use_svc) { sf->rt_sf.short_circuit_low_temp_var = 0; sf->rt_sf.use_nonrd_altref_frame = 1; } + if (speed >= 9) { + sf->rt_sf.skip_cdef_sb = 1; + } } if (!is_480p_or_larger) { if (speed == 7) { @@ -1088,6 +1192,8 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi, sf->part_sf.less_rectangular_check_level = 1; sf->part_sf.ml_prune_partition = 1; sf->part_sf.prune_ext_partition_types_search_level = 1; + sf->part_sf.reuse_prev_rd_results_for_part_ab = 1; + sf->part_sf.use_best_rd_for_pruning = 1; // TODO(debargha): Test, tweak and turn on either 1 or 2 sf->inter_sf.inter_mode_rd_model_estimation = 0; @@ -1103,6 +1209,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi, sf->interp_sf.use_fast_interpolation_filter_search = 1; + sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF; sf->intra_sf.intra_pruning_with_hog = 1; sf->mv_sf.full_pixel_search_level = 1; @@ -1140,7 +1247,6 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi, sf->inter_sf.prune_comp_search_by_single_result = 1; sf->inter_sf.reuse_inter_intra_mode = 1; sf->inter_sf.selective_ref_frame = 2; - sf->inter_sf.skip_repeated_newmv = 1; sf->inter_sf.disable_interintra_wedge_var_thresh = 0; sf->inter_sf.disable_interinter_wedge_var_thresh = 0; sf->inter_sf.prune_comp_type_by_comp_avg = 1; @@ -1191,7 +1297,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi, if (speed >= 3) { sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF; - sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH; + sf->gm_sf.gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2; sf->part_sf.less_rectangular_check_level = 2; @@ -1202,7 +1308,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi, // sf->mv_sf.adaptive_motion_search = 1; sf->inter_sf.adaptive_rd_thresh = 2; - sf->inter_sf.mv_cost_upd_level = 1; + sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW; // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine // it with cpi->sf.disable_wedge_search_var_thresh. sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX; @@ -1306,12 +1412,20 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi, sf->part_sf.default_min_partition_size = BLOCK_8X8; sf->part_sf.partition_search_type = VAR_BASED_PARTITION; + sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH; + sf->mv_sf.search_method = FAST_DIAMOND; sf->mv_sf.subpel_force_stop = QUARTER_PEL; sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED; sf->inter_sf.inter_mode_rd_model_estimation = 2; + // Disable intra_y_mode_mask pruning since the performance at speed 7 isn't + // good. May need more study. + for (int i = 0; i < TX_SIZES; ++i) { + sf->intra_sf.intra_y_mode_mask[i] = INTRA_ALL; + } + sf->lpf_sf.lpf_pick = LPF_PICK_FROM_Q; sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH; @@ -1348,7 +1462,7 @@ static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi, // TODO(marpan): Look into why enabling skip_loopfilter_non_reference is // not bitexact on rtc testset, its very close (< ~0.01 bdrate), but not // always bitexact. - if (cpi->use_svc && cpi->svc.non_reference_frame && + if (cpi->ppi->use_svc && cpi->svc.non_reference_frame && sf->lpf_sf.cdef_pick_method == CDEF_PICK_FROM_Q && sf->lpf_sf.lpf_pick == LPF_PICK_FROM_Q) sf->rt_sf.skip_loopfilter_non_reference = 1; @@ -1398,8 +1512,14 @@ static AOM_INLINE void init_hl_sf(HIGH_LEVEL_SPEED_FEATURES *hl_sf) { hl_sf->second_alt_ref_filtering = 1; } +static AOM_INLINE void init_fp_sf(FIRST_PASS_SPEED_FEATURES *fp_sf) { + fp_sf->reduce_mv_step_param = 3; + fp_sf->skip_motion_search_threshold = 0; + fp_sf->disable_recon = 0; +} + static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) { - tpl_sf->disable_gop_length_decision = 0; + tpl_sf->gop_length_decision_method = 0; tpl_sf->prune_intra_modes = 0; tpl_sf->prune_starting_mv = 0; tpl_sf->reduce_first_step_size = 0; @@ -1415,6 +1535,7 @@ static AOM_INLINE void init_tpl_sf(TPL_SPEED_FEATURES *tpl_sf) { static AOM_INLINE void init_gm_sf(GLOBAL_MOTION_SPEED_FEATURES *gm_sf) { gm_sf->gm_search_type = GM_FULL_SEARCH; gm_sf->prune_ref_frame_for_gm_search = 0; + gm_sf->prune_zero_mv_with_sse = 0; } static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) { @@ -1454,6 +1575,9 @@ static AOM_INLINE void init_part_sf(PARTITION_SPEED_FEATURES *part_sf) { part_sf->ml_predict_breakout_level = 0; part_sf->prune_sub_8x8_partition_level = 0; part_sf->simple_motion_search_rect_split = 0; + part_sf->reuse_prev_rd_results_for_part_ab = 0; + part_sf->reuse_best_prediction_for_part_ab = 0; + part_sf->use_best_rd_for_pruning = 0; } static AOM_INLINE void init_mv_sf(MV_SPEED_FEATURES *mv_sf) { @@ -1487,16 +1611,17 @@ static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) { inter_sf->fast_wedge_sign_estimate = 0; inter_sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED; inter_sf->reuse_inter_intra_mode = 0; - inter_sf->mv_cost_upd_level = 0; + inter_sf->mv_cost_upd_level = INTERNAL_COST_UPD_SB; + inter_sf->coeff_cost_upd_level = INTERNAL_COST_UPD_SB; + inter_sf->mode_cost_upd_level = INTERNAL_COST_UPD_SB; inter_sf->prune_inter_modes_based_on_tpl = 0; inter_sf->prune_nearmv_using_neighbors = 0; inter_sf->prune_comp_search_by_single_result = 0; inter_sf->skip_repeated_ref_mv = 0; - inter_sf->skip_repeated_newmv = 0; - inter_sf->skip_repeated_full_newmv = 0; + inter_sf->skip_newmv_in_drl = 0; inter_sf->inter_mode_rd_model_estimation = 0; inter_sf->prune_compound_using_single_ref = 0; - inter_sf->prune_compound_using_neighbors = 0; + inter_sf->prune_ext_comp_using_neighbors = 0; inter_sf->prune_comp_using_best_single_mode_ref = 0; inter_sf->disable_onesided_comp = 0; inter_sf->prune_mode_search_simple_translation = 0; @@ -1514,9 +1639,10 @@ static AOM_INLINE void init_inter_sf(INTER_MODE_SPEED_FEATURES *inter_sf) { inter_sf->txfm_rd_gate_level = 0; inter_sf->prune_inter_modes_if_skippable = 0; inter_sf->disable_masked_comp = 0; - inter_sf->reuse_best_prediction_for_part_ab = 0; inter_sf->enable_fast_compound_mode_search = 0; inter_sf->reuse_mask_search_results = 0; + inter_sf->enable_fast_wedge_mask_search = 0; + inter_sf->inter_mode_txfm_breakout = 0; } static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) { @@ -1529,6 +1655,7 @@ static AOM_INLINE void init_interp_sf(INTERP_FILTER_SPEED_FEATURES *interp_sf) { } static AOM_INLINE void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) { + intra_sf->dv_cost_upd_level = INTERNAL_COST_UPD_SB; intra_sf->skip_intra_in_interframe = 1; intra_sf->intra_pruning_with_hog = 0; intra_sf->chroma_intra_pruning_with_hog = 0; @@ -1539,8 +1666,10 @@ static AOM_INLINE void init_intra_sf(INTRA_MODE_SPEED_FEATURES *intra_sf) { intra_sf->intra_uv_mode_mask[i] = UV_INTRA_ALL; } intra_sf->disable_smooth_intra = 0; - intra_sf->disable_filter_intra = 0; + intra_sf->prune_filter_intra_level = 0; intra_sf->prune_chroma_modes_using_luma_winner = 0; + intra_sf->cfl_search_range = 3; + intra_sf->top_intra_model_count_allowed = TOP_INTRA_MODEL_COUNT; } static AOM_INLINE void init_tx_sf(TX_SPEED_FEATURES *tx_sf) { @@ -1650,9 +1779,11 @@ void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) { break; } - if (!cpi->seq_params_locked) { - cpi->common.seq_params.enable_masked_compound &= + if (!cpi->ppi->seq_params_locked) { + cpi->common.seq_params->enable_masked_compound &= !sf->inter_sf.disable_masked_comp; + cpi->common.seq_params->enable_interintra_compound &= + (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX); } // This is only used in motion vector unit test. @@ -1662,9 +1793,9 @@ void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) { cpi->mv_search_params.find_fractional_mv_step = av1_return_min_sub_pixel_mv; if ((cpi->oxcf.row_mt == 1) && (cpi->oxcf.max_threads > 1)) { - if (sf->inter_sf.mv_cost_upd_level > 1) { + if (sf->inter_sf.mv_cost_upd_level < INTERNAL_COST_UPD_SBROW) { // Set mv_cost_upd_level to use row level update. - sf->inter_sf.mv_cost_upd_level = 1; + sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW; } } } @@ -1676,6 +1807,7 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) { int i; init_hl_sf(&sf->hl_sf); + init_fp_sf(&sf->fp_sf); init_tpl_sf(&sf->tpl_sf); init_gm_sf(&sf->gm_sf); init_part_sf(&sf->part_sf); @@ -1701,12 +1833,12 @@ void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) { break; } - if (!cpi->seq_params_locked) { - cpi->common.seq_params.enable_dual_filter &= + if (!cpi->ppi->seq_params_locked) { + cpi->common.seq_params->enable_dual_filter &= !sf->interp_sf.disable_dual_filter; - cpi->common.seq_params.enable_restoration &= !sf->lpf_sf.disable_lr_filter; + cpi->common.seq_params->enable_restoration &= !sf->lpf_sf.disable_lr_filter; - cpi->common.seq_params.enable_interintra_compound &= + cpi->common.seq_params->enable_interintra_compound &= (sf->inter_sf.disable_interintra_wedge_var_thresh != UINT_MAX); } @@ -1821,10 +1953,11 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) { SPEED_FEATURES *const sf = &cpi->sf; WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params; const int boosted = frame_is_boosted(cpi); + const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; const int is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080; const int is_arf2_bwd_type = - cpi->gf_group.update_type[cpi->gf_group.index] == INTNL_ARF_UPDATE; + cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE; if (cpi->oxcf.mode == REALTIME) return; @@ -1832,7 +1965,6 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) { // qindex_thresh for resolution < 720p const int qindex_thresh = boosted ? 70 : (is_arf2_bwd_type ? 110 : 140); if (!is_720p_or_larger && cm->quant_params.base_qindex <= qindex_thresh) { - sf->inter_sf.skip_repeated_newmv = 1; sf->part_sf.simple_motion_search_split = cm->features.allow_screen_content_tools ? 1 : 2; sf->part_sf.simple_motion_search_early_term_none = 1; @@ -1849,7 +1981,6 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) { sf->tx_sf.inter_tx_size_search_init_depth_rect = 1; sf->tx_sf.inter_tx_size_search_init_depth_sqr = 1; sf->tx_sf.intra_tx_size_search_init_depth_rect = 1; - sf->inter_sf.skip_repeated_newmv = 1; sf->tx_sf.model_based_prune_tx_search_level = 0; if (is_1080p_or_larger && cm->quant_params.base_qindex <= 108) { @@ -1866,28 +1997,25 @@ void av1_set_speed_features_qindex_dependent(AV1_COMP *cpi, int speed) { } } - if (speed >= 3) { - // Disable extended partitions for lower quantizers - const int qindex_thresh = - cm->features.allow_screen_content_tools ? 50 : 100; - if (cm->quant_params.base_qindex <= qindex_thresh && !boosted) { - sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128; - } - } - - if (speed >= 4) { + if (speed >= 2) { // Disable extended partitions for lower quantizers - const int qindex_thresh = boosted ? 80 : 120; - if (cm->quant_params.base_qindex <= qindex_thresh && - !frame_is_intra_only(&cpi->common)) { - sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128; + const int aggr = AOMMIN(3, speed - 2); + const int qindex_thresh1[4] = { 50, 50, 80, 100 }; + const int qindex_thresh2[4] = { 80, 100, 120, 160 }; + int qindex_thresh; + int disable_ext_part; + if (aggr <= 1) { + const int qthresh2 = + (!aggr && !is_480p_or_larger) ? 70 : qindex_thresh2[aggr]; + qindex_thresh = cm->features.allow_screen_content_tools + ? qindex_thresh1[aggr] + : qthresh2; + disable_ext_part = !boosted; + } else { + qindex_thresh = boosted ? qindex_thresh1[aggr] : qindex_thresh2[aggr]; + disable_ext_part = !frame_is_intra_only(cm); } - } - - if (speed >= 5) { - const int qindex_thresh = boosted ? 100 : 160; - if (cm->quant_params.base_qindex <= qindex_thresh && - !frame_is_intra_only(&cpi->common)) { + if (cm->quant_params.base_qindex <= qindex_thresh && disable_ext_part) { sf->part_sf.ext_partition_eval_thresh = BLOCK_128X128; } } |