1 files changed, 156 insertions, 35 deletions
diff --git a/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c b/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c
index 87e3aa95f1..44bed38318 100644
--- a/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c
+++ b/third_party/libaom/source/libaom/examples/svc_encoder_rtc.c
@@ -24,6 +24,7 @@
 #include "common/args.h"
 #include "common/tools_common.h"
 #include "common/video_writer.h"
+#include "examples/encoder_util.h"
 #include "aom_ports/aom_timer.h"
 
 #define OPTION_BUFFER_SIZE 1024
@@ -286,6 +287,9 @@ static void parse_command_line(int argc, const char **argv_,
       if (app_input->speed > 9) {
         warn("Mapping speed %d to speed 9.\n", app_input->speed);
       }
+      if (app_input->speed <= 6) {
+        die("Encoder speed setting should be in [7, 9].\n");
+      }
     } else if (arg_match(&arg, &aqmode_arg, argi)) {
       app_input->aq_mode = arg_parse_uint(&arg);
     } else if (arg_match(&arg, &threads_arg, argi)) {
@@ -567,7 +571,7 @@ static void set_layer_pattern(int layering_mode, int superframe_cnt,
   layer_id->spatial_layer_id = spatial_layer_id;
   int lag_index = 0;
   int base_count = superframe_cnt >> 2;
-  // Set the referende map buffer idx for the 7 references:
+  // Set the reference map buffer idx for the 7 references:
   // LAST_FRAME (0), LAST2_FRAME(1), LAST3_FRAME(2), GOLDEN_FRAME(3),
   // BWDREF_FRAME(4), ALTREF2_FRAME(5), ALTREF_FRAME(6).
   for (i = 0; i < INTER_REFS_PER_FRAME; i++) ref_frame_config->ref_idx[i] = i;
@@ -795,12 +799,10 @@ static void set_layer_pattern(int layering_mode, int superframe_cnt,
         } else if (layer_id->spatial_layer_id == 1) {
           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 1,
           // GOLDEN (and all other refs) to slot 3.
-          // Set LAST2 to slot 4 and Update slot 4.
+          // No update.
           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
             ref_frame_config->ref_idx[i] = 3;
           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 1;
-          ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
-          ref_frame_config->refresh[4] = 1;
         }
       } else if ((superframe_cnt - 2) % 4 == 0) {
         // Middle temporal enhancement layer.
@@ -837,13 +839,11 @@ static void set_layer_pattern(int layering_mode, int superframe_cnt,
           ref_frame_config->refresh[3] = 1;
         } else if (layer_id->spatial_layer_id == 1) {
           // Reference LAST and GOLDEN. Set buffer_idx for LAST to slot 6,
-          // GOLDEN to slot 3. Set LAST2 to slot 4 and update slot 4.
+          // GOLDEN to slot 3. No update.
           for (i = 0; i < INTER_REFS_PER_FRAME; i++)
             ref_frame_config->ref_idx[i] = 0;
           ref_frame_config->ref_idx[SVC_LAST_FRAME] = 6 - shift;
           ref_frame_config->ref_idx[SVC_GOLDEN_FRAME] = 3;
-          ref_frame_config->ref_idx[SVC_LAST2_FRAME] = 4;
-          ref_frame_config->refresh[4] = 1;
         }
       }
       if (layer_id->spatial_layer_id > 0 && !ksvc_mode) {
@@ -998,6 +998,64 @@ static void set_layer_pattern(int layering_mode, int superframe_cnt,
   }
 }
 
+#if CONFIG_AV1_DECODER
+static void test_decode(aom_codec_ctx_t *encoder, aom_codec_ctx_t *decoder,
+                        const int frames_out, int *mismatch_seen) {
+  aom_image_t enc_img, dec_img;
+
+  if (*mismatch_seen) return;
+
+  /* Get the internal reference frame */
+  AOM_CODEC_CONTROL_TYPECHECKED(encoder, AV1_GET_NEW_FRAME_IMAGE, &enc_img);
+  AOM_CODEC_CONTROL_TYPECHECKED(decoder, AV1_GET_NEW_FRAME_IMAGE, &dec_img);
+
+#if CONFIG_AV1_HIGHBITDEPTH
+  if ((enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) !=
+      (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH)) {
+    if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
+      aom_image_t enc_hbd_img;
+      aom_img_alloc(&enc_hbd_img, enc_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH,
+                    enc_img.d_w, enc_img.d_h, 16);
+      aom_img_truncate_16_to_8(&enc_hbd_img, &enc_img);
+      enc_img = enc_hbd_img;
+    }
+    if (dec_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
+      aom_image_t dec_hbd_img;
+      aom_img_alloc(&dec_hbd_img, dec_img.fmt - AOM_IMG_FMT_HIGHBITDEPTH,
+                    dec_img.d_w, dec_img.d_h, 16);
+      aom_img_truncate_16_to_8(&dec_hbd_img, &dec_img);
+      dec_img = dec_hbd_img;
+    }
+  }
+#endif
+
+  if (!aom_compare_img(&enc_img, &dec_img)) {
+    int y[4], u[4], v[4];
+#if CONFIG_AV1_HIGHBITDEPTH
+    if (enc_img.fmt & AOM_IMG_FMT_HIGHBITDEPTH) {
+      aom_find_mismatch_high(&enc_img, &dec_img, y, u, v);
+    } else {
+      aom_find_mismatch(&enc_img, &dec_img, y, u, v);
+    }
+#else
+    aom_find_mismatch(&enc_img, &dec_img, y, u, v);
+#endif
+    decoder->err = 1;
+    printf(
+        "Encode/decode mismatch on frame %d at"
+        " Y[%d, %d] {%d/%d},"
+        " U[%d, %d] {%d/%d},"
+        " V[%d, %d] {%d/%d}",
+        frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1],
+        v[2], v[3]);
+    *mismatch_seen = frames_out;
+  }
+
+  aom_img_free(&enc_img);
+  aom_img_free(&dec_img);
+}
+#endif  // CONFIG_AV1_DECODER
+
 int main(int argc, const char **argv) {
   AppInput app_input;
   AvxVideoWriter *outfile[AOM_MAX_LAYERS] = { NULL };
@@ -1017,6 +1075,17 @@ int main(int argc, const char **argv) {
   aom_svc_params_t svc_params;
   aom_svc_ref_frame_config_t ref_frame_config;
 
+#if CONFIG_INTERNAL_STATS
+  FILE *stats_file = fopen("opsnr.stt", "a");
+  if (stats_file == NULL) {
+    die("Cannot open opsnr.stt\n");
+  }
+#endif
+#if CONFIG_AV1_DECODER
+  int mismatch_seen = 0;
+  aom_codec_ctx_t decoder;
+#endif
+
   struct RateControlMetrics rc;
   int64_t cx_time = 0;
   int64_t cx_time_sl[3];  // max number of spatial layers.
@@ -1039,11 +1108,12 @@ int main(int argc, const char **argv) {
   app_input.input_ctx.framerate.denominator = 1;
   app_input.input_ctx.only_i420 = 1;
   app_input.input_ctx.bit_depth = 0;
+  app_input.speed = 7;
   exec_name = argv[0];
 
   // start with default encoder configuration
-  aom_codec_err_t res =
-      aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg, 0);
+  aom_codec_err_t res = aom_codec_enc_config_default(aom_codec_av1_cx(), &cfg,
+                                                     AOM_USAGE_REALTIME);
   if (res) {
     die("Failed to get config: %s\n", aom_codec_err_to_string(res));
   }
@@ -1071,10 +1141,13 @@ int main(int argc, const char **argv) {
   unsigned int width = cfg.g_w;
   unsigned int height = cfg.g_h;
 
-  if (ts_number_layers !=
-          mode_to_num_temporal_layers[app_input.layering_mode] ||
-      ss_number_layers != mode_to_num_spatial_layers[app_input.layering_mode]) {
-    die("Number of layers doesn't match layering mode.");
+  if (app_input.layering_mode >= 0) {
+    if (ts_number_layers !=
+            mode_to_num_temporal_layers[app_input.layering_mode] ||
+        ss_number_layers !=
+            mode_to_num_spatial_layers[app_input.layering_mode]) {
+      die("Number of layers doesn't match layering mode.");
+    }
   }
 
   // Y4M reader has its own allocation.
@@ -1109,20 +1182,16 @@ int main(int argc, const char **argv) {
     svc_params.framerate_factor[2] = 1;
   }
 
-  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
-  set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
-
   if (app_input.input_ctx.file_type == FILE_TYPE_Y4M) {
-    if (app_input.input_ctx.width != cfg.g_w ||
-        app_input.input_ctx.height != cfg.g_h) {
-      die("Incorrect width or height: %d x %d", cfg.g_w, cfg.g_h);
-    }
-    if (app_input.input_ctx.framerate.numerator != cfg.g_timebase.den ||
-        app_input.input_ctx.framerate.denominator != cfg.g_timebase.num) {
-      die("Incorrect framerate: numerator %d denominator %d",
-          cfg.g_timebase.num, cfg.g_timebase.den);
-    }
+    // Override these settings with the info from Y4M file.
+    cfg.g_w = app_input.input_ctx.width;
+    cfg.g_h = app_input.input_ctx.height;
+    // g_timebase is the reciprocal of frame rate.
+    cfg.g_timebase.num = app_input.input_ctx.framerate.denominator;
+    cfg.g_timebase.den = app_input.input_ctx.framerate.numerator;
   }
+  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
+  set_rate_control_metrics(&rc, framerate, ss_number_layers, ts_number_layers);
 
   AvxVideoInfo info;
   info.codec_fourcc = get_fourcc_by_aom_encoder(encoder);
@@ -1162,6 +1231,12 @@ int main(int argc, const char **argv) {
   if (aom_codec_enc_init(&codec, encoder, &cfg, 0))
     die("Failed to initialize encoder");
 
+#if CONFIG_AV1_DECODER
+  if (aom_codec_dec_init(&decoder, get_aom_decoder_by_index(0), NULL, 0)) {
+    die("Failed to initialize decoder");
+  }
+#endif
+
   aom_codec_control(&codec, AOME_SET_CPUUSED, app_input.speed);
   aom_codec_control(&codec, AV1E_SET_AQ_MODE, app_input.aq_mode ? 3 : 0);
   aom_codec_control(&codec, AV1E_SET_GF_CBR_BOOST_PCT, 0);
@@ -1172,6 +1247,7 @@ int main(int argc, const char **argv) {
   aom_codec_control(&codec, AV1E_SET_COEFF_COST_UPD_FREQ, 3);
   aom_codec_control(&codec, AV1E_SET_MODE_COST_UPD_FREQ, 3);
   aom_codec_control(&codec, AV1E_SET_MV_COST_UPD_FREQ, 3);
+  aom_codec_control(&codec, AV1E_SET_DV_COST_UPD_FREQ, 3);
   aom_codec_control(&codec, AV1E_SET_CDF_UPDATE_MODE, 1);
   aom_codec_control(&codec, AV1E_SET_TILE_COLUMNS,
                     cfg.g_threads ? get_msb(cfg.g_threads) : 0);
@@ -1196,8 +1272,8 @@ int main(int argc, const char **argv) {
     svc_params.scaling_factor_num[1] = 1;
     svc_params.scaling_factor_den[1] = 2;
   }
-
   aom_codec_control(&codec, AV1E_SET_SVC_PARAMS, &svc_params);
+  // TODO(aomedia:3032): Configure KSVC in fixed mode.
 
   // This controls the maximum target size of the key frame.
   // For generating smaller key frames, use a smaller max_intra_size_pct
@@ -1220,15 +1296,34 @@ int main(int argc, const char **argv) {
       const aom_codec_cx_pkt_t *pkt;
       int layer = 0;
 
-      // Set the reference/update flags, layer_id, and reference_map
-      // buffer index.
-      set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
-                        &ref_frame_config, &use_svc_control, slx, is_key_frame,
-                        (app_input.layering_mode == 10));
-      aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
-      if (use_svc_control)
-        aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
-                          &ref_frame_config);
+      // For flexible mode:
+      if (app_input.layering_mode >= 0) {
+        // Set the reference/update flags, layer_id, and reference_map
+        // buffer index.
+        set_layer_pattern(app_input.layering_mode, frame_cnt, &layer_id,
+                          &ref_frame_config, &use_svc_control, slx,
+                          is_key_frame, (app_input.layering_mode == 10));
+        aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
+        if (use_svc_control)
+          aom_codec_control(&codec, AV1E_SET_SVC_REF_FRAME_CONFIG,
+                            &ref_frame_config);
+      } else {
+        // Only up to 3 temporal layers supported in fixed mode.
+        // Only need to set spatial and temporal layer_id: reference
+        // prediction, refresh, and buffer_idx are set internally.
+        layer_id.spatial_layer_id = slx;
+        layer_id.temporal_layer_id = 0;
+        if (ts_number_layers == 2) {
+          layer_id.temporal_layer_id = (frame_cnt % 2) != 0;
+        } else if (ts_number_layers == 3) {
+          if (frame_cnt % 2 != 0)
+            layer_id.temporal_layer_id = 2;
+          else if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0))
+            layer_id.temporal_layer_id = 1;
+        }
+        aom_codec_control(&codec, AV1E_SET_SVC_LAYER_ID, &layer_id);
+      }
+
       if (set_err_resil_frame) {
         // Set error_resilient per frame: off/0 for base layer and
         // on/1 for enhancement layer frames.
@@ -1332,14 +1427,31 @@ int main(int argc, const char **argv) {
                 sum_bitrate2 = 0.0;
               }
             }
+
+#if CONFIG_AV1_DECODER
+            if (aom_codec_decode(&decoder, pkt->data.frame.buf,
+                                 (unsigned int)pkt->data.frame.sz, NULL))
+              die_codec(&decoder, "Failed to decode frame.");
+#endif
+
             break;
           default: break;
         }
       }
+#if CONFIG_AV1_DECODER
+      // Don't look for mismatch on top spatial and top temporal layers as they
+      // are non reference frames.
+      if ((ss_number_layers > 1 || ts_number_layers > 1) &&
+          !(layer_id.temporal_layer_id > 0 &&
+            layer_id.temporal_layer_id == (int)ts_number_layers - 1)) {
+        test_decode(&codec, &decoder, frame_cnt, &mismatch_seen);
+      }
+#endif
     }  // loop over spatial layers
     ++frame_cnt;
     pts += frame_duration;
   }
+
   close_input_file(&(app_input.input_ctx));
   printout_rate_control_summary(&rc, frame_cnt, ss_number_layers,
                                 ts_number_layers);
@@ -1358,6 +1470,15 @@ int main(int argc, const char **argv) {
 
   if (aom_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
 
+#if CONFIG_INTERNAL_STATS
+  if (mismatch_seen) {
+    fprintf(stats_file, "First mismatch occurred in frame %d\n", mismatch_seen);
+  } else {
+    fprintf(stats_file, "No mismatch detected in recon buffers\n");
+  }
+  fclose(stats_file);
+#endif
+
   // Try to rewrite the output file headers with the actual frame count.
   for (i = 0; i < ss_number_layers * ts_number_layers; ++i)
     aom_video_writer_close(outfile[i]);