1 files changed, 153 insertions, 1 deletions
diff --git a/call/rtp_payload_params.cc b/call/rtp_payload_params.cc
index 18b113852e..c6a56a389e 100644
--- a/call/rtp_payload_params.cc
+++ b/call/rtp_payload_params.cc
@@ -131,6 +131,9 @@ RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
     : ssrc_(ssrc),
       generic_picture_id_experiment_(
           absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"),
+                           "Enabled")),
+      simulate_generic_vp9_(
+          absl::StartsWith(trials.Lookup("WebRTC-Vp9DependencyDescriptor"),
                            "Enabled")) {
   for (auto& spatial_layer : last_shared_frame_id_)
     spatial_layer.fill(-1);
@@ -277,8 +280,13 @@ void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
       }
       return;
     case VideoCodecType::kVideoCodecVP9:
+      if (simulate_generic_vp9_ && codec_specific_info != nullptr) {
+        Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id,
+                     *rtp_video_header);
+      }
+      return;
     case VideoCodecType::kVideoCodecAV1:
-      // TODO(philipel): Implement VP9 and AV1 to generic descriptor.
+      // TODO(philipel): Implement AV1 to generic descriptor.
       return;
     case VideoCodecType::kVideoCodecH264:
       if (codec_specific_info) {
@@ -399,6 +407,150 @@ void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
   }
 }
 
+FrameDependencyStructure RtpPayloadParams::MinimalisticVp9Structure(
+    const CodecSpecificInfoVP9& vp9) {
+  const int num_spatial_layers = vp9.num_spatial_layers;
+  const int num_temporal_layers = kMaxTemporalStreams;
+  FrameDependencyStructure structure;
+  structure.num_decode_targets = num_spatial_layers * num_temporal_layers;
+  structure.num_chains = num_spatial_layers;
+  structure.templates.reserve(num_spatial_layers * num_temporal_layers);
+  for (int sid = 0; sid < num_spatial_layers; ++sid) {
+    for (int tid = 0; tid < num_temporal_layers; ++tid) {
+      FrameDependencyTemplate a_template;
+      a_template.spatial_id = sid;
+      a_template.temporal_id = tid;
+      for (int s = 0; s < num_spatial_layers; ++s) {
+        for (int t = 0; t < num_temporal_layers; ++t) {
+          // Prefer kSwitch for indication frame is part of the decode target
+          // because RtpPayloadParams::Vp9ToGeneric uses that indication more
+          // often that kRequired, increasing chance custom dti need not to
+          // use more bits in dependency descriptor on the wire.
+          a_template.decode_target_indications.push_back(
+              sid <= s && tid <= t ? DecodeTargetIndication::kSwitch
+                                   : DecodeTargetIndication::kNotPresent);
+        }
+      }
+      a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers *
+                                                      num_temporal_layers
+                                                : num_spatial_layers);
+      a_template.chain_diffs.assign(structure.num_chains, 1);
+      structure.templates.push_back(a_template);
+
+      structure.decode_target_protected_by_chain.push_back(sid);
+    }
+    if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
+      structure.resolutions.emplace_back(vp9.width[sid], vp9.height[sid]);
+    }
+  }
+  return structure;
+}
+
+void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
+                                    int64_t shared_frame_id,
+                                    RTPVideoHeader& rtp_video_header) {
+  const auto& vp9_header =
+      absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
+  const int num_spatial_layers = vp9_header.num_spatial_layers;
+  const int num_temporal_layers = kMaxTemporalStreams;
+
+  int spatial_index =
+      vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0;
+  int temporal_index =
+      vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0;
+
+  if (spatial_index >= num_spatial_layers ||
+      temporal_index >= num_temporal_layers ||
+      num_spatial_layers > RtpGenericFrameDescriptor::kMaxSpatialLayers) {
+    // Prefer to generate no generic layering than an inconsistent one.
+    return;
+  }
+
+  RTPVideoHeader::GenericDescriptorInfo& result =
+      rtp_video_header.generic.emplace();
+
+  result.frame_id = shared_frame_id;
+  result.spatial_index = spatial_index;
+  result.temporal_index = temporal_index;
+
+  result.decode_target_indications.reserve(num_spatial_layers *
+                                           num_temporal_layers);
+  for (int sid = 0; sid < num_spatial_layers; ++sid) {
+    for (int tid = 0; tid < num_temporal_layers; ++tid) {
+      DecodeTargetIndication dti;
+      if (sid < spatial_index || tid < temporal_index) {
+        dti = DecodeTargetIndication::kNotPresent;
+      } else if (spatial_index != sid &&
+                 vp9_header.non_ref_for_inter_layer_pred) {
+        dti = DecodeTargetIndication::kNotPresent;
+      } else if (sid == spatial_index && tid == temporal_index) {
+        // Assume that if frame is decodable, all of its own layer is decodable.
+        dti = DecodeTargetIndication::kSwitch;
+      } else if (sid == spatial_index && vp9_header.temporal_up_switch) {
+        dti = DecodeTargetIndication::kSwitch;
+      } else if (!vp9_header.inter_pic_predicted) {
+        // Key frame or spatial upswitch
+        dti = DecodeTargetIndication::kSwitch;
+      } else {
+        // Make no other assumptions. That should be safe, though suboptimal.
+        // To provide more accurate dti, encoder wrapper should fill in
+        // CodecSpecificInfo::generic_frame_info
+        dti = DecodeTargetIndication::kRequired;
+      }
+      result.decode_target_indications.push_back(dti);
+    }
+  }
+
+  // Calculate frame dependencies.
+  static constexpr int kPictureDiffLimit = 128;
+  if (last_vp9_frame_id_.empty()) {
+    // Create the array only if it is ever used.
+    last_vp9_frame_id_.resize(kPictureDiffLimit);
+  }
+  if (vp9_header.inter_layer_predicted && spatial_index > 0) {
+    result.dependencies.push_back(
+        last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
+                          [spatial_index - 1]);
+  }
+  if (vp9_header.inter_pic_predicted) {
+    for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) {
+      // picture_id is 15 bit number that wraps around. Though undeflow may
+      // produce picture that exceeds 2^15, it is ok because in this
+      // code block only last 7 bits of the picture_id are used.
+      uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i];
+      result.dependencies.push_back(
+          last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
+    }
+  }
+  last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit][spatial_index] =
+      shared_frame_id;
+
+  // Calculate chains, asuming chain includes all frames with temporal_id = 0
+  if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
+    // Assume frames without dependencies also reset chains.
+    for (int sid = spatial_index; sid < num_spatial_layers; ++sid) {
+      chain_last_frame_id_[sid] = -1;
+    }
+  }
+  result.chain_diffs.resize(num_spatial_layers);
+  for (int sid = 0; sid < num_spatial_layers; ++sid) {
+    if (chain_last_frame_id_[sid] == -1) {
+      result.chain_diffs[sid] = 0;
+      continue;
+    }
+    result.chain_diffs[sid] = shared_frame_id - chain_last_frame_id_[sid];
+  }
+
+  if (temporal_index == 0) {
+    chain_last_frame_id_[spatial_index] = shared_frame_id;
+    if (!vp9_header.non_ref_for_inter_layer_pred) {
+      for (int sid = spatial_index + 1; sid < num_spatial_layers; ++sid) {
+        chain_last_frame_id_[sid] = shared_frame_id;
+      }
+    }
+  }
+}
+
 void RtpPayloadParams::SetDependenciesVp8Deprecated(
     const CodecSpecificInfoVP8& vp8_info,
     int64_t shared_frame_id,