diff options
author | Marco Paniconi <marpan@google.com> | 2022-11-15 21:45:07 +0000 |
---|---|---|
committer | Gerrit Code Review <noreply-gerritcodereview@google.com> | 2022-11-15 21:45:07 +0000 |
commit | 605350bd5b68ac47f595d60cc8ef346588e773c0 (patch) | |
tree | aefee729b4b55acc05f5fd16000f0e579b878828 | |
parent | c1406fc26783c8bf8ea3a6a516ddbf45f7f3b9fc (diff) | |
parent | 76e9bf7a184eb1caf979dd07e1107e3b74ac10b6 (diff) | |
download | libvpx-605350bd5b68ac47f595d60cc8ef346588e773c0.tar.gz |
Merge "vp9-svc: Fixes to make SVC work with VBR" into main
-rw-r--r-- | test/svc_datarate_test.cc | 31 | ||||
-rw-r--r-- | vp9/encoder/vp9_aq_cyclicrefresh.c | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_encodeframe.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.c | 38 | ||||
-rw-r--r-- | vp9/encoder/vp9_encoder.h | 2 | ||||
-rw-r--r-- | vp9/encoder/vp9_ratectrl.c | 50 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.c | 6 | ||||
-rw-r--r-- | vp9/encoder/vp9_svc_layercontext.h | 2 | ||||
-rw-r--r-- | vp9/vp9_cx_iface.c | 5 |
9 files changed, 101 insertions, 41 deletions
diff --git a/test/svc_datarate_test.cc b/test/svc_datarate_test.cc index 010c27342..484252ca4 100644 --- a/test/svc_datarate_test.cc +++ b/test/svc_datarate_test.cc @@ -1203,6 +1203,37 @@ TEST_P(DatarateOnePassCbrSvcMultiBR, OnePassCbrSvc2SL3TL) { #endif } +// Check basic rate targeting for 1 pass VBR SVC: 2 spatial layers and +// 3 temporal layers. Run VGA clip with 1 thread. +TEST_P(DatarateOnePassCbrSvcMultiBR, OnePassVbrSvc2SL3TL) { + SetSvcConfig(2, 3); + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 2; + cfg_.rc_max_quantizer = 56; + cfg_.g_threads = 1; + cfg_.rc_dropframe_thresh = 30; + cfg_.kf_max_dist = 9999; + cfg_.rc_end_usage = VPX_VBR; + ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, + 0, 400); + top_sl_width_ = 640; + top_sl_height_ = 480; + const int bitrates[3] = { 200, 400, 600 }; + cfg_.rc_target_bitrate = bitrates[GET_PARAM(2)]; + ResetModel(); + AssignLayerBitrates(); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.70, + 1.3); +#if CONFIG_VP9_DECODER + // The non-reference frames are expected to be mismatched frames as the + // encoder will avoid loopfilter on these frames. + EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); +#endif +} + // Params: speed setting, layer framedrop control and index for bitrate array. class DatarateOnePassCbrSvcFrameDropMultiBR : public DatarateOnePassCbrSvc, diff --git a/vp9/encoder/vp9_aq_cyclicrefresh.c b/vp9/encoder/vp9_aq_cyclicrefresh.c index 90792aebe..28ab10a13 100644 --- a/vp9/encoder/vp9_aq_cyclicrefresh.c +++ b/vp9/encoder/vp9_aq_cyclicrefresh.c @@ -558,7 +558,7 @@ void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { cr->percent_refresh = 10; cr->rate_ratio_qdelta = 1.5; cr->rate_boost_fac = 10; - if (cpi->refresh_golden_frame == 1) { + if (cpi->refresh_golden_frame == 1 && !cpi->use_svc) { cr->percent_refresh = 0; cr->rate_ratio_qdelta = 1.0; } diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index a9f392bf5..a1ee9c678 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -1299,7 +1299,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). int is_key_frame = (frame_is_intra_only(cm) || - (is_one_pass_cbr_svc(cpi) && + (is_one_pass_svc(cpi) && cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); // Always use 4x4 partition for key frame. const int use_4x4_partition = frame_is_intra_only(cm); @@ -1406,7 +1406,7 @@ static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, assert(yv12 != NULL); - if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || + if (!(is_one_pass_svc(cpi) && cpi->svc.spatial_layer_id) || cpi->svc.use_gf_temporal_ref_current_layer) { // For now, GOLDEN will not be used for non-zero spatial layers, since // it may not be a temporal reference. @@ -5381,7 +5381,7 @@ static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile, assert(yv12 != NULL); - if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || + if (!(is_one_pass_svc(cpi) && cpi->svc.spatial_layer_id) || cpi->svc.use_gf_temporal_ref_current_layer) { // For now, GOLDEN will not be used for non-zero spatial layers, since // it may not be a temporal reference. diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index ca3439d7c..87c5d7b67 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -1333,7 +1333,7 @@ static void alloc_util_frame_buffers(VP9_COMP *cpi) { // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a // target of 1/4x1/4. number_spatial_layers must be greater than 2. - if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc && + if (is_one_pass_svc(cpi) && !cpi->svc.scaled_temp_is_alloc && cpi->svc.number_spatial_layers > 2) { cpi->svc.scaled_temp_is_alloc = 1; if (vpx_realloc_frame_buffer( @@ -1511,7 +1511,7 @@ static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { // Temporal scalability. cpi->svc.number_temporal_layers = oxcf->ts_number_layers; - if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || + if ((cpi->svc.number_temporal_layers > 1) || ((cpi->svc.number_temporal_layers > 1 || cpi->svc.number_spatial_layers > 1) && cpi->oxcf.pass != 1)) { @@ -2077,7 +2077,7 @@ void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { rc->rc_2_frame = 0; } - if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || + if ((cpi->svc.number_temporal_layers > 1) || ((cpi->svc.number_temporal_layers > 1 || cpi->svc.number_spatial_layers > 1) && cpi->oxcf.pass != 1)) { @@ -3263,7 +3263,7 @@ void vp9_update_reference_frames(VP9_COMP *cpi) { vp9_denoiser_update_ref_frame(cpi); #endif - if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi); + if (is_one_pass_svc(cpi)) vp9_svc_update_ref_frame(cpi); } static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { @@ -3857,11 +3857,11 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, int q = 0, bottom_index = 0, top_index = 0; int no_drop_scene_change = 0; const INTERP_FILTER filter_scaler = - (is_one_pass_cbr_svc(cpi)) + (is_one_pass_svc(cpi)) ? svc->downsample_filter_type[svc->spatial_layer_id] : EIGHTTAP; const int phase_scaler = - (is_one_pass_cbr_svc(cpi)) + (is_one_pass_svc(cpi)) ? svc->downsample_filter_phase[svc->spatial_layer_id] : 0; @@ -3882,7 +3882,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, set_frame_size(cpi); - if (is_one_pass_cbr_svc(cpi) && + if (is_one_pass_svc(cpi) && cpi->un_scaled_source->y_width == cm->width << 2 && cpi->un_scaled_source->y_height == cm->height << 2 && svc->scaled_temp.y_width == cm->width << 1 && @@ -3896,7 +3896,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp, filter_scaler, phase_scaler, filter_scaler2, phase_scaler2); svc->scaled_one_half = 1; - } else if (is_one_pass_cbr_svc(cpi) && + } else if (is_one_pass_svc(cpi) && cpi->un_scaled_source->y_width == cm->width << 1 && cpi->un_scaled_source->y_height == cm->height << 1 && svc->scaled_one_half) { @@ -3911,7 +3911,7 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, } #ifdef OUTPUT_YUV_SVC_SRC // Write out at most 3 spatial layers. - if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) { + if (is_one_pass_svc(cpi) && svc->spatial_layer_id < 3) { vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source); } #endif @@ -4020,14 +4020,14 @@ static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, if (vp9_rc_drop_frame(cpi)) return 0; } - // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame + // For 1 pass SVC, only ZEROMV is allowed for spatial reference frame // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can // avoid this frame-level upsampling (for non intra_only frames). // For SVC single_layer mode, dynamic resize is allowed and we need to // scale references for this case. if (frame_is_intra_only(cm) == 0 && ((svc->single_layer_svc && cpi->oxcf.resize_mode == RESIZE_DYNAMIC) || - !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref))) { + !(is_one_pass_svc(cpi) && svc->force_zero_mode_spatial_ref))) { vp9_scale_references(cpi); } @@ -7613,8 +7613,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, const int gf_group_index = cpi->twopass.gf_group.index; int i; - if (is_one_pass_cbr_svc(cpi)) { - vp9_one_pass_cbr_svc_start_layer(cpi); + if (is_one_pass_svc(cpi)) { + vp9_one_pass_svc_start_layer(cpi); } vpx_usec_timer_start(&cmptimer); @@ -7634,7 +7634,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Normal defaults cm->reset_frame_context = 0; cm->refresh_frame_context = 1; - if (!is_one_pass_cbr_svc(cpi)) { + if (!is_one_pass_svc(cpi)) { cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 0; @@ -7767,7 +7767,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, adjust_frame_rate(cpi, source); } - if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_svc(cpi)) { vp9_update_temporal_layer_framerate(cpi); vp9_restore_layer_context(cpi); } @@ -7901,9 +7901,9 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Save layer specific state. - if (is_one_pass_cbr_svc(cpi) || ((cpi->svc.number_temporal_layers > 1 || - cpi->svc.number_spatial_layers > 1) && - oxcf->pass == 2)) { + if (is_one_pass_svc(cpi) || ((cpi->svc.number_temporal_layers > 1 || + cpi->svc.number_spatial_layers > 1) && + oxcf->pass == 2)) { vp9_save_layer_context(cpi); } @@ -8077,7 +8077,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #endif - if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_svc(cpi)) { if (cm->show_frame) { ++cpi->svc.spatial_layer_to_encode; if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers) diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 1d5894525..3e0b80677 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -1305,7 +1305,7 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required( void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); -static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) { +static INLINE int is_one_pass_svc(const struct VP9_COMP *const cpi) { return (cpi->use_svc && cpi->oxcf.pass == 0); } diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 1ddf64d41..d9207f7a2 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -327,7 +327,7 @@ static void update_buffer_level_postencode(VP9_COMP *cpi, rc->buffer_level = rc->bits_off_target; - if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_svc(cpi)) { update_layer_buffer_level_postencode(&cpi->svc, encoded_frame_size); } } @@ -910,7 +910,7 @@ static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) { active_worst_quality = curr_frame == 0 ? rc->worst_quality : rc->last_q[KEY_FRAME] << 1; } else { - if (!rc->is_src_frame_alt_ref && + if (!rc->is_src_frame_alt_ref && !cpi->use_svc && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { active_worst_quality = curr_frame == 1 @@ -1871,7 +1871,7 @@ void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { } } } else { - if ((cpi->use_svc && oxcf->rc_mode == VPX_CBR) || + if ((cpi->use_svc) || (!rc->is_src_frame_alt_ref && !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { rc->last_q[INTER_FRAME] = qindex; @@ -2021,6 +2021,11 @@ int vp9_calc_pframe_target_size_one_pass_vbr(const VP9_COMP *cpi) { (rc->baseline_gf_interval + af_ratio - 1) : ((int64_t)rc->avg_frame_bandwidth * rc->baseline_gf_interval) / (rc->baseline_gf_interval + af_ratio - 1); + // For SVC: refresh flags are used to define the pattern, so we can't + // use that for boosting the target size here. + // TODO(marpan): Consider adding internal boost on TL0 for VBR-SVC. + // For now just use the CBR logic for setting target size. + if (cpi->use_svc) target = vp9_calc_pframe_target_size_one_pass_cbr(cpi); if (target > INT_MAX) target = INT_MAX; return vp9_rc_clamp_pframe_target_size(cpi, (int)target); } @@ -2147,7 +2152,7 @@ int vp9_calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { } else { target = rc->avg_frame_bandwidth; } - if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_svc(cpi)) { // Note that for layers, avg_frame_bandwidth is the cumulative // per-frame-bandwidth. For the target size of this frame, use the // layer average frame size (i.e., non-cumulative per-frame-bw). @@ -2282,7 +2287,7 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { (svc->spatial_layer_sync[0] == 1 && svc->spatial_layer_id == 0)) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; - if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_svc(cpi)) { if (cm->current_video_frame > 0) vp9_svc_reset_temporal_layers(cpi, 1); layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, svc->number_temporal_layers); @@ -2290,11 +2295,14 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); // Assumption here is that LAST_FRAME is being updated for a keyframe. // Thus no change in update flags. - target = vp9_calc_iframe_target_size_one_pass_cbr(cpi); + if (cpi->oxcf.rc_mode == VPX_CBR) + target = vp9_calc_iframe_target_size_one_pass_cbr(cpi); + else + target = vp9_calc_iframe_target_size_one_pass_vbr(cpi); } } else { cm->frame_type = INTER_FRAME; - if (is_one_pass_cbr_svc(cpi)) { + if (is_one_pass_svc(cpi)) { LAYER_CONTEXT *lc = &svc->layer_context[layer]; // Add condition current_video_frame > 0 for the case where first frame // is intra only followed by overlay/copy frame. In this case we don't @@ -2303,7 +2311,23 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { (svc->spatial_layer_id == 0 && cm->current_video_frame > 0) ? 0 : svc->layer_context[svc->temporal_layer_id].is_key_frame; - target = vp9_calc_pframe_target_size_one_pass_cbr(cpi); + if (cpi->oxcf.rc_mode == VPX_CBR) { + target = vp9_calc_pframe_target_size_one_pass_cbr(cpi); + } else { + double rate_err = 0.0; + rc->fac_active_worst_inter = 140; + rc->fac_active_worst_gf = 100; + if (rc->rolling_target_bits > 0) { + rate_err = + (double)rc->rolling_actual_bits / (double)rc->rolling_target_bits; + if (rate_err < 1.0) + rc->fac_active_worst_inter = 120; + else if (rate_err > 2.0) + // Increase active_worst faster if rate fluctuation is high. + rc->fac_active_worst_inter = 160; + } + target = vp9_calc_pframe_target_size_one_pass_vbr(cpi); + } } } @@ -2312,7 +2336,10 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { svc->layer_context[layer].is_key_frame == 1) { cm->frame_type = KEY_FRAME; cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); - target = vp9_calc_iframe_target_size_one_pass_cbr(cpi); + if (cpi->oxcf.rc_mode == VPX_CBR) + target = vp9_calc_iframe_target_size_one_pass_cbr(cpi); + else + target = vp9_calc_iframe_target_size_one_pass_vbr(cpi); } // Set the buffer idx and refresh flags for key frames in simulcast mode. // Note the buffer slot for long-term reference is set below (line 2255), @@ -2397,7 +2424,10 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { } if (svc->set_intra_only_frame) { set_intra_only_frame(cpi); - target = vp9_calc_iframe_target_size_one_pass_cbr(cpi); + if (cpi->oxcf.rc_mode == VPX_CBR) + target = vp9_calc_iframe_target_size_one_pass_cbr(cpi); + else + target = vp9_calc_iframe_target_size_one_pass_vbr(cpi); } // Overlay frame predicts from LAST (intra-only) if (svc->previous_frame_is_intra_only) cpi->ref_frame_flags |= VP9_LAST_FLAG; diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index a57a70ab1..518c00b34 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -290,7 +290,7 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, } static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) { - if (is_one_pass_cbr_svc(cpi)) + if (is_one_pass_svc(cpi)) return &cpi->svc.layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id]; @@ -354,7 +354,7 @@ void vp9_restore_layer_context(VP9_COMP *const cpi) { cpi->alt_ref_source = lc->alt_ref_source; // Check if it is one_pass_cbr_svc mode and lc->speed > 0 (real-time mode // does not use speed = 0). - if (is_one_pass_cbr_svc(cpi) && lc->speed > 0) { + if (is_one_pass_svc(cpi) && lc->speed > 0) { cpi->oxcf.speed = lc->speed; } cpi->loopfilter_ctrl = lc->loopfilter_ctrl; @@ -754,7 +754,7 @@ void vp9_copy_flags_ref_update_idx(VP9_COMP *const cpi) { svc->reference_altref[sl] = (uint8_t)(cpi->ref_frame_flags & VP9_ALT_FLAG); } -int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { +int vp9_one_pass_svc_start_layer(VP9_COMP *const cpi) { int width = 0, height = 0; SVC *const svc = &cpi->svc; LAYER_CONTEXT *lc = NULL; diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index b2d1d1b98..c7328cf57 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -255,7 +255,7 @@ int vp9_denoise_svc_non_key(struct VP9_COMP *const cpi); void vp9_copy_flags_ref_update_idx(struct VP9_COMP *const cpi); -int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi); +int vp9_one_pass_svc_start_layer(struct VP9_COMP *const cpi); void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi); diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 02bd2e579..695774e73 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -1527,9 +1527,8 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, cx_data += size; cx_data_sz -= size; - if (is_one_pass_cbr_svc(cpi) && - (cpi->svc.spatial_layer_id == - cpi->svc.number_spatial_layers - 1)) { + if (is_one_pass_svc(cpi) && (cpi->svc.spatial_layer_id == + cpi->svc.number_spatial_layers - 1)) { // Encoded all spatial layers; exit loop. break; } |