diff options
Diffstat (limited to 'third_party/libaom/source/libaom/examples/svc_encoder_rtc.c')
-rw-r--r-- | third_party/libaom/source/libaom/examples/svc_encoder_rtc.c | 191 |
1 files changed, 156 insertions, 35 deletions
diff --git a/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c b/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c index 87e3aa95f1..44bed38318 100644 --- a/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c +++ b/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c @@ -24,6 +24,7 @@ #include "common/args.h" #include "common/tools_common.h" #include "common/video_writer.h" +#include "examples/encoder_util.h" #include "aom_ports/aom_timer.h" #define OPTION_BUFFER_SIZE 1024 @@ -286,6 +287,9 @@ static void parse_command_line(int argc, const char **argv_, if (app_input->speed > 9) { warn("Mapping speed %d to speed 9.\n", app_input->speed); } + if (app_input->speed <= 6) { + die("Encoder speed setting should be in [7, 9].\n"); + } } else if (arg_match(&arg, &aqmode_arg, argi)) { app_input->aq_mode = arg_parse_uint(&arg); } else if (arg_match(&arg, &threads_arg, argi)) { @@ -567,7 +571,7 @@ static void set_layer_pattern(int layering_mode, int superframe_cnt, layer_id->spatial_layer_id = spatial_layer_id; int lag_index = 0; int base_count = superframe_cnt >> 2; - // Set the referende map buffer idx for the 7 references: + // Set the reference map buffer idx for the 7 references: // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3), // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6). for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i; @@ -795,12 +799,10 @@ static void set_layer_pattern(int layering_mode, int superframe_cnt, } else if (layer_id->spatial_layer_id == 1) { // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1, // GOLDEN (and all other refs) to slot 3. - // Set LAST2 to slot 4 and Update slot 4. + // No update. for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = 3; ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1; - ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4; - ref_frame_config->refresh[4] = 1; } } else if ((superframe_cnt - 2) % 4 == 0) { // Middle temporal enhancement layer. @@ -837,13 +839,11 @@ static void set_layer_pattern(int layering_mode, int superframe_cnt, ref_frame_config->refresh[3] = 1; } else if (layer_id->spatial_layer_id == 1) { // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6, - // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4. + // GOLDEN to slot 3. No update. for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = 0; ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift; ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3; - ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4; - ref_frame_config->refresh[4] = 1; } } if (layer_id->spatial_layer_id > 0 && !ksvc_mode) { @@ -998,6 +998,64 @@ static void set_layer_pattern(int layering_mode, int superframe_cnt, } } +#if CONFIG_AV1_DECODER +static void test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder, + const int frames_out, int *mismatch_seen) { + aom_image_t enc_img, dec_img; + + if (*mismatch_seen) return; + + /* Get the internal reference frame */ + AOM_CODEC_CONTROL_TYPECHECKED(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img); + AOM_CODEC_CONTROL_TYPECHECKED(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img); + +#if CONFIG_AV1_HIGHBITDEPTH + if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) != + (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) { + if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_image_t enc_hbd_img; + aom_img_alloc(&enc_hbd_img, enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH, + enc_img.d_w, enc_img.d_h, 16); + aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img); + enc_img = enc_hbd_img; + } + if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_image_t dec_hbd_img; + aom_img_alloc(&dec_hbd_img, dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH, + dec_img.d_w, dec_img.d_h, 16); + aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img); + dec_img = dec_hbd_img; + } + } +#endif + + if (!aom_compare_img(&enc_img, &dec_img)) { + int y[4], u[4], v[4]; +#if CONFIG_AV1_HIGHBITDEPTH + if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) { + aom_find_mismatch_high(&enc_img, &dec_img, y, u, v); + } else { + aom_find_mismatch(&enc_img, &dec_img, y, u, v); + } +#else + aom_find_mismatch(&enc_img, &dec_img, y, u, v); +#endif + decoder->err = 1; + printf( + "Encode/decode mismatch on frame %d at" + " Y[%d, %d] {%d/%d}," + " U[%d, %d] {%d/%d}," + " V[%d, %d] {%d/%d}", + frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1], + v[2], v[3]); + *mismatch_seen = frames_out; + } + + aom_img_free(&enc_img); + aom_img_free(&dec_img); +} +#endif // CONFIG_AV1_DECODER + int main(int argc, const char **argv) { AppInput app_input; AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL }; @@ -1017,6 +1075,17 @@ int main(int argc, const char **argv) { aom_svc_params_t svc_params; aom_svc_ref_frame_config_t ref_frame_config; +#if CONFIG_INTERNAL_STATS + FILE *stats_file = fopen("opsnr.stt", "a"); + if (stats_file == NULL) { + die("Cannot open opsnr.stt\n"); + } +#endif +#if CONFIG_AV1_DECODER + int mismatch_seen = 0; + aom_codec_ctx_t decoder; +#endif + struct RateControlMetrics rc; int64_t cx_time = 0; int64_t cx_time_sl[3]; // max number of spatial layers. @@ -1039,11 +1108,12 @@ int main(int argc, const char **argv) { app_input.input_ctx.framerate.denominator = 1; app_input.input_ctx.only_i420 = 1; app_input.input_ctx.bit_depth = 0; + app_input.speed = 7; exec_name = argv[0]; // start with default encoder configuration - aom_codec_err_t res = - aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg, 0); + aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg, + AOM_USAGE_REALTIME); if (res) { die("Failed to get config: %s\n", aom_codec_err_to_string(res)); } @@ -1071,10 +1141,13 @@ int main(int argc, const char **argv) { unsigned int width = cfg.g_w; unsigned int height = cfg.g_h; - if (ts_number_layers != - mode_to_num_temporal_layers[app_input.layering_mode] || - ss_number_layers != mode_to_num_spatial_layers[app_input.layering_mode]) { - die("Number of layers doesn't match layering mode."); + if (app_input.layering_mode >= 0) { + if (ts_number_layers != + mode_to_num_temporal_layers[app_input.layering_mode] || + ss_number_layers != + mode_to_num_spatial_layers[app_input.layering_mode]) { + die("Number of layers doesn't match layering mode."); + } } // Y4M reader has its own allocation. @@ -1109,20 +1182,16 @@ int main(int argc, const char **argv) { svc_params.framerate_factor[2] = 1; } - framerate = cfg.g_timebase.den / cfg.g_timebase.num; - set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers); - if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) { - if (app_input.input_ctx.width != cfg.g_w || - app_input.input_ctx.height != cfg.g_h) { - die("Incorrect width or height: %d x %d", cfg.g_w, cfg.g_h); - } - if (app_input.input_ctx.framerate.numerator != cfg.g_timebase.den || - app_input.input_ctx.framerate.denominator != cfg.g_timebase.num) { - die("Incorrect framerate: numerator %d denominator %d", - cfg.g_timebase.num, cfg.g_timebase.den); - } + // Override these settings with the info from Y4M file. + cfg.g_w = app_input.input_ctx.width; + cfg.g_h = app_input.input_ctx.height; + // g_timebase is the reciprocal of frame rate. + cfg.g_timebase.num = app_input.input_ctx.framerate.denominator; + cfg.g_timebase.den = app_input.input_ctx.framerate.numerator; } + framerate = cfg.g_timebase.den / cfg.g_timebase.num; + set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers); AvxVideoInfo info; info.codec_fourcc = get_fourcc_by_aom_encoder(encoder); @@ -1162,6 +1231,12 @@ int main(int argc, const char **argv) { if (aom_codec_enc_init(&codec, encoder, &cfg, 0)) die("Failed to initialize encoder"); +#if CONFIG_AV1_DECODER + if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0)) { + die("Failed to initialize decoder"); + } +#endif + aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed); aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0); aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0); @@ -1172,6 +1247,7 @@ int main(int argc, const char **argv) { aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 3); aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 3); aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 3); + aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 3); aom_codec_control(&codec, AV1E_SET_CDF_UPDATE_MODE, 1); aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS, cfg.g_threads ? get_msb(cfg.g_threads) : 0); @@ -1196,8 +1272,8 @@ int main(int argc, const char **argv) { svc_params.scaling_factor_num[1] = 1; svc_params.scaling_factor_den[1] = 2; } - aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params); + // TODO(aomedia:3032): Configure KSVC in fixed mode. // This controls the maximum target size of the key frame. // For generating smaller key frames, use a smaller max_intra_size_pct @@ -1220,15 +1296,34 @@ int main(int argc, const char **argv) { const aom_codec_cx_pkt_t *pkt; int layer = 0; - // Set the reference/update flags, layer_id, and reference_map - // buffer index. - set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id, - &ref_frame_config, &use_svc_control, slx, is_key_frame, - (app_input.layering_mode == 10)); - aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id); - if (use_svc_control) - aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG, - &ref_frame_config); + // For flexible mode: + if (app_input.layering_mode >= 0) { + // Set the reference/update flags, layer_id, and reference_map + // buffer index. + set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id, + &ref_frame_config, &use_svc_control, slx, + is_key_frame, (app_input.layering_mode == 10)); + aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id); + if (use_svc_control) + aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG, + &ref_frame_config); + } else { + // Only up to 3 temporal layers supported in fixed mode. + // Only need to set spatial and temporal layer_id: reference + // prediction, refresh, and buffer_idx are set internally. + layer_id.spatial_layer_id = slx; + layer_id.temporal_layer_id = 0; + if (ts_number_layers == 2) { + layer_id.temporal_layer_id = (frame_cnt % 2) != 0; + } else if (ts_number_layers == 3) { + if (frame_cnt % 2 != 0) + layer_id.temporal_layer_id = 2; + else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0)) + layer_id.temporal_layer_id = 1; + } + aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id); + } + if (set_err_resil_frame) { // Set error_resilient per frame: off/0 for base layer and // on/1 for enhancement layer frames. @@ -1332,14 +1427,31 @@ int main(int argc, const char **argv) { sum_bitrate2 = 0.0; } } + +#if CONFIG_AV1_DECODER + if (aom_codec_decode(&decoder, pkt->data.frame.buf, + (unsigned int)pkt->data.frame.sz, NULL)) + die_codec(&decoder, "Failed to decode frame."); +#endif + break; default: break; } } +#if CONFIG_AV1_DECODER + // Don't look for mismatch on top spatial and top temporal layers as they + // are non reference frames. + if ((ss_number_layers > 1 || ts_number_layers > 1) && + !(layer_id.temporal_layer_id > 0 && + layer_id.temporal_layer_id == (int)ts_number_layers - 1)) { + test_decode(&codec, &decoder, frame_cnt, &mismatch_seen); + } +#endif } // loop over spatial layers ++frame_cnt; pts += frame_duration; } + close_input_file(&(app_input.input_ctx)); printout_rate_control_summary(&rc, frame_cnt, ss_number_layers, ts_number_layers); @@ -1358,6 +1470,15 @@ int main(int argc, const char **argv) { if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); +#if CONFIG_INTERNAL_STATS + if (mismatch_seen) { + fprintf(stats_file, "First mismatch occurred in frame %d\n", mismatch_seen); + } else { + fprintf(stats_file, "No mismatch detected in recon buffers\n"); + } + fclose(stats_file); +#endif + // Try to rewrite the output file headers with the actual frame count. for (i = 0; i < ss_number_layers * ts_number_layers; ++i) aom_video_writer_close(outfile[i]); |