diff options
author | asapersson <asapersson@webrtc.org> | 2015-07-28 04:02:54 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-07-28 11:02:58 +0000 |
commit | f38ea3caa39887c63e7d4862dcf420d4a35c1073 (patch) | |
tree | 46ba3a9ba0cb895d91a45d8df0863db4fe0dcfb5 | |
parent | 95b8718dc63362470dd4ebeb428d70cd6ed780e2 (diff) | |
download | webrtc-f38ea3caa39887c63e7d4862dcf420d4a35c1073.tar.gz |
Add support for VP9 packetization/depacketization.
RTP payload format for VP9:
https://www.ietf.org/id/draft-uberti-payload-vp9-01.txt
BUG=webrtc:4148, webrtc:4168, chromium:500602
TBR=mflodman
Review URL: https://codereview.webrtc.org/1232023006
Cr-Commit-Position: refs/heads/master@{#9649}
-rw-r--r-- | webrtc/modules/interface/module_common_types.h | 83 | ||||
-rw-r--r-- | webrtc/modules/modules.gyp | 3 | ||||
-rw-r--r-- | webrtc/modules/rtp_rtcp/BUILD.gn | 2 | ||||
-rw-r--r-- | webrtc/modules/rtp_rtcp/rtp_rtcp.gypi | 2 | ||||
-rw-r--r-- | webrtc/modules/rtp_rtcp/source/rtp_format.cc | 6 | ||||
-rw-r--r-- | webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc | 764 | ||||
-rw-r--r-- | webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h | 108 | ||||
-rw-r--r-- | webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc | 664 | ||||
-rw-r--r-- | webrtc/modules/video_coding/main/source/packet.cc | 12 |
9 files changed, 1643 insertions, 1 deletions
diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h index 1202eee043..62fe69481a 100644 --- a/webrtc/modules/interface/module_common_types.h +++ b/webrtc/modules/interface/module_common_types.h @@ -32,8 +32,15 @@ struct RTPAudioHeader { }; const int16_t kNoPictureId = -1; +const int16_t kMaxOneBytePictureId = 0x7F; // 7 bits +const int16_t kMaxTwoBytePictureId = 0x7FFF; // 15 bits const int16_t kNoTl0PicIdx = -1; const uint8_t kNoTemporalIdx = 0xFF; +const uint8_t kNoSpatialIdx = 0xFF; +const uint8_t kNoGofIdx = 0xFF; +const size_t kMaxVp9RefPics = 3; +const size_t kMaxVp9FramesInGof = 16; +const size_t kMaxVp9NumberOfSpatialLayers = 8; const int kNoKeyIdx = -1; struct RTPVideoHeaderVP8 { @@ -62,6 +69,80 @@ struct RTPVideoHeaderVP8 { // in a VP8 partition. Otherwise false }; +struct GofInfoVP9 { + void CopyGofInfoVP9(const GofInfoVP9& src) { + num_frames_in_gof = src.num_frames_in_gof; + for (size_t i = 0; i < num_frames_in_gof; ++i) { + temporal_idx[i] = src.temporal_idx[i]; + temporal_up_switch[i] = src.temporal_up_switch[i]; + num_ref_pics[i] = src.num_ref_pics[i]; + for (size_t r = 0; r < num_ref_pics[i]; ++r) { + pid_diff[i][r] = src.pid_diff[i][r]; + } + } + } + + size_t num_frames_in_gof; + uint8_t temporal_idx[kMaxVp9FramesInGof]; + bool temporal_up_switch[kMaxVp9FramesInGof]; + size_t num_ref_pics[kMaxVp9FramesInGof]; + int16_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics]; +}; + +struct RTPVideoHeaderVP9 { + void InitRTPVideoHeaderVP9() { + inter_pic_predicted = false; + flexible_mode = false; + beginning_of_frame = false; + end_of_frame = false; + ss_data_available = false; + picture_id = kNoPictureId; + max_picture_id = kMaxTwoBytePictureId; + tl0_pic_idx = kNoTl0PicIdx; + temporal_idx = kNoTemporalIdx; + spatial_idx = kNoSpatialIdx; + temporal_up_switch = false; + inter_layer_predicted = false; + gof_idx = kNoGofIdx; + num_ref_pics = 0; + } + + bool inter_pic_predicted; // This layer frame is dependent on previously + // coded frame(s). + bool flexible_mode; // This frame is in flexible mode. + bool beginning_of_frame; // True if this packet is the first in a VP9 layer + // frame. + bool end_of_frame; // True if this packet is the last in a VP9 layer frame. + bool ss_data_available; // True if SS data is available in this payload + // descriptor. + int16_t picture_id; // PictureID index, 15 bits; + // kNoPictureId if PictureID does not exist. + int16_t max_picture_id; // Maximum picture ID index; either 0x7F or 0x7FFF; + int16_t tl0_pic_idx; // TL0PIC_IDX, 8 bits; + // kNoTl0PicIdx means no value provided. + uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx. + uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx. + bool temporal_up_switch; // True if upswitch to higher frame rate is possible + // starting from this frame. + bool inter_layer_predicted; // Frame is dependent on directly lower spatial + // layer frame. + + uint8_t gof_idx; // Index to predefined temporal frame info in SS data. + + size_t num_ref_pics; // Number of reference pictures used by this layer + // frame. + int16_t pid_diff[kMaxVp9RefPics]; // P_DIFF signaled to derive the PictureID + // of the reference pictures. + int16_t ref_picture_id[kMaxVp9RefPics]; // PictureID of reference pictures. + + // SS data. + size_t num_spatial_layers; + bool spatial_layer_resolution_present; + uint16_t width[kMaxVp9NumberOfSpatialLayers]; + uint16_t height[kMaxVp9NumberOfSpatialLayers]; + GofInfoVP9 gof; +}; + // The packetization types that we support: single, aggregated, and fragmented. enum H264PacketizationTypes { kH264SingleNalu, // This packet contains a single NAL unit. @@ -85,6 +166,7 @@ struct RTPVideoHeaderH264 { union RTPVideoTypeHeader { RTPVideoHeaderVP8 VP8; + RTPVideoHeaderVP9 VP9; RTPVideoHeaderH264 H264; }; @@ -92,6 +174,7 @@ enum RtpVideoCodecTypes { kRtpVideoNone, kRtpVideoGeneric, kRtpVideoVp8, + kRtpVideoVp9, kRtpVideoH264 }; // Since RTPVideoHeader is used as a member of a union, it can't have a diff --git a/webrtc/modules/modules.gyp b/webrtc/modules/modules.gyp index 24d6d165a3..232ba319e2 100644 --- a/webrtc/modules/modules.gyp +++ b/webrtc/modules/modules.gyp @@ -243,9 +243,10 @@ 'rtp_rtcp/source/rtcp_utility_unittest.cc', 'rtp_rtcp/source/rtp_fec_unittest.cc', 'rtp_rtcp/source/rtp_format_h264_unittest.cc', - 'rtp_rtcp/source/rtp_format_vp8_unittest.cc', 'rtp_rtcp/source/rtp_format_vp8_test_helper.cc', 'rtp_rtcp/source/rtp_format_vp8_test_helper.h', + 'rtp_rtcp/source/rtp_format_vp8_unittest.cc', + 'rtp_rtcp/source/rtp_format_vp9_unittest.cc', 'rtp_rtcp/source/rtp_packet_history_unittest.cc', 'rtp_rtcp/source/rtp_payload_registry_unittest.cc', 'rtp_rtcp/source/rtp_rtcp_impl_unittest.cc', diff --git a/webrtc/modules/rtp_rtcp/BUILD.gn b/webrtc/modules/rtp_rtcp/BUILD.gn index ca4b812731..48c4921ca5 100644 --- a/webrtc/modules/rtp_rtcp/BUILD.gn +++ b/webrtc/modules/rtp_rtcp/BUILD.gn @@ -60,6 +60,8 @@ source_set("rtp_rtcp") { "source/rtp_format_video_generic.h", "source/rtp_format_vp8.cc", "source/rtp_format_vp8.h", + "source/rtp_format_vp9.cc", + "source/rtp_format_vp9.h", "source/rtp_header_extension.cc", "source/rtp_header_extension.h", "source/rtp_header_parser.cc", diff --git a/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi b/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi index dcd47dfd72..2846ed666b 100644 --- a/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi +++ b/webrtc/modules/rtp_rtcp/rtp_rtcp.gypi @@ -96,6 +96,8 @@ 'source/rtp_format_h264.h', 'source/rtp_format_vp8.cc', 'source/rtp_format_vp8.h', + 'source/rtp_format_vp9.cc', + 'source/rtp_format_vp9.h', 'source/rtp_format_video_generic.cc', 'source/rtp_format_video_generic.h', 'source/vp8_partition_aggregator.cc', diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format.cc b/webrtc/modules/rtp_rtcp/source/rtp_format.cc index d03e38c387..cdb9c4920e 100644 --- a/webrtc/modules/rtp_rtcp/source/rtp_format.cc +++ b/webrtc/modules/rtp_rtcp/source/rtp_format.cc @@ -13,6 +13,7 @@ #include "webrtc/modules/rtp_rtcp/source/rtp_format_h264.h" #include "webrtc/modules/rtp_rtcp/source/rtp_format_video_generic.h" #include "webrtc/modules/rtp_rtcp/source/rtp_format_vp8.h" +#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h" namespace webrtc { RtpPacketizer* RtpPacketizer::Create(RtpVideoCodecTypes type, @@ -25,6 +26,9 @@ RtpPacketizer* RtpPacketizer::Create(RtpVideoCodecTypes type, case kRtpVideoVp8: assert(rtp_type_header != NULL); return new RtpPacketizerVp8(rtp_type_header->VP8, max_payload_len); + case kRtpVideoVp9: + assert(rtp_type_header != NULL); + return new RtpPacketizerVp9(rtp_type_header->VP9, max_payload_len); case kRtpVideoGeneric: return new RtpPacketizerGeneric(frame_type, max_payload_len); case kRtpVideoNone: @@ -39,6 +43,8 @@ RtpDepacketizer* RtpDepacketizer::Create(RtpVideoCodecTypes type) { return new RtpDepacketizerH264(); case kRtpVideoVp8: return new RtpDepacketizerVp8(); + case kRtpVideoVp9: + return new RtpDepacketizerVp9(); case kRtpVideoGeneric: return new RtpDepacketizerGeneric(); case kRtpVideoNone: diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc new file mode 100644 index 0000000000..00b2f72db8 --- /dev/null +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.cc @@ -0,0 +1,764 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h" + +#include <assert.h> +#include <string.h> + +#include <cmath> + +#include "webrtc/base/bitbuffer.h" +#include "webrtc/base/checks.h" +#include "webrtc/system_wrappers/interface/logging.h" + +#define RETURN_FALSE_ON_ERROR(x) \ + if (!(x)) { \ + return false; \ + } + +namespace webrtc { +namespace { +// Length of VP9 payload descriptors' fixed part. +const size_t kFixedPayloadDescriptorBytes = 1; + +// Packet fragmentation mode. If true, packets are split into (almost) equal +// sizes. Otherwise, as many bytes as possible are fit into one packet. +const bool kBalancedMode = true; + +const uint32_t kReservedBitValue0 = 0; + +uint8_t TemporalIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) { + return (hdr.temporal_idx == kNoTemporalIdx) ? def : hdr.temporal_idx; +} + +uint8_t SpatialIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) { + return (hdr.spatial_idx == kNoSpatialIdx) ? def : hdr.spatial_idx; +} + +int16_t Tl0PicIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) { + return (hdr.tl0_pic_idx == kNoTl0PicIdx) ? def : hdr.tl0_pic_idx; +} + +uint8_t GofIdxField(const RTPVideoHeaderVP9& hdr, uint8_t def) { + return (hdr.gof_idx == kNoGofIdx) ? def : hdr.gof_idx; +} + +// Picture ID: +// +// +-+-+-+-+-+-+-+-+ +// I: |M| PICTURE ID | M:0 => picture id is 7 bits. +// +-+-+-+-+-+-+-+-+ M:1 => picture id is 15 bits. +// M: | EXTENDED PID | +// +-+-+-+-+-+-+-+-+ +// +size_t PictureIdLength(const RTPVideoHeaderVP9& hdr) { + if (hdr.picture_id == kNoPictureId) + return 0; + return (hdr.max_picture_id == kMaxOneBytePictureId) ? 1 : 2; +} + +bool PictureIdPresent(const RTPVideoHeaderVP9& hdr) { + return PictureIdLength(hdr) > 0; +} + +// Layer indices: +// +// Flexible mode (F=1): Non-flexible mode (F=0): +// +// +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ +// L: | T |U| S |D| |GOF_IDX| S |D| +// +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ +// | TL0PICIDX | +// +-+-+-+-+-+-+-+-+ +// +size_t LayerInfoLength(const RTPVideoHeaderVP9& hdr) { + if (hdr.flexible_mode) { + return (hdr.temporal_idx == kNoTemporalIdx && + hdr.spatial_idx == kNoSpatialIdx) ? 0 : 1; + } else { + return (hdr.gof_idx == kNoGofIdx && + hdr.spatial_idx == kNoSpatialIdx) ? 0 : 2; + } +} + +bool LayerInfoPresent(const RTPVideoHeaderVP9& hdr) { + return LayerInfoLength(hdr) > 0; +} + +// Reference indices: +// +// +-+-+-+-+-+-+-+-+ -| P=1,F=1: At least one reference index +// P,F: | P_DIFF |X|N| . has to be specified. +// +-+-+-+-+-+-+-+-+ . up to 3 times +// X: |EXTENDED P_DIFF| . X=1: Extended P_DIFF is used (14 +// +-+-+-+-+-+-+-+-+ -| bits). Else 6 bits are used. +// N=1: An additional P_DIFF follows +// current P_DIFF. +size_t RefIndicesLength(const RTPVideoHeaderVP9& hdr) { + if (!hdr.inter_pic_predicted || !hdr.flexible_mode) + return 0; + + DCHECK_GT(hdr.num_ref_pics, 0U); + DCHECK_LE(hdr.num_ref_pics, kMaxVp9RefPics); + size_t length = 0; + for (size_t i = 0; i < hdr.num_ref_pics; ++i) { + length += hdr.pid_diff[i] > 0x3F ? 2 : 1; // P_DIFF > 6 bits => extended + } + return length; +} + +// Scalability structure (SS). +// +// +-+-+-+-+-+-+-+-+ +// V: | N_S |Y| N_G | +// +-+-+-+-+-+-+-+-+ -| +// Y: | WIDTH | (OPTIONAL) . +// + + . +// | | (OPTIONAL) . +// +-+-+-+-+-+-+-+-+ . N_S + 1 times +// | HEIGHT | (OPTIONAL) . +// + + . +// | | (OPTIONAL) . +// +-+-+-+-+-+-+-+-+ -| -| +// N_G: | T |U| R |-|-| (OPTIONAL) . +// +-+-+-+-+-+-+-+-+ -| . N_G + 1 times +// | P_DIFF | (OPTIONAL) . R times . +// +-+-+-+-+-+-+-+-+ -| -| +// +size_t SsDataLength(const RTPVideoHeaderVP9& hdr) { + if (!hdr.ss_data_available) + return 0; + + DCHECK_GT(hdr.num_spatial_layers, 0U); + DCHECK_LE(hdr.num_spatial_layers, kMaxVp9NumberOfSpatialLayers); + DCHECK_GT(hdr.gof.num_frames_in_gof, 0U); + DCHECK_LE(hdr.gof.num_frames_in_gof, kMaxVp9FramesInGof); + size_t length = 1; // V + if (hdr.spatial_layer_resolution_present) { + length += 4 * hdr.num_spatial_layers; // Y + } + // N_G + length += hdr.gof.num_frames_in_gof; // T, U, R + for (size_t i = 0; i < hdr.gof.num_frames_in_gof; ++i) { + DCHECK_LE(hdr.gof.num_ref_pics[i], kMaxVp9RefPics); + length += hdr.gof.num_ref_pics[i]; // R times + } + return length; +} + +size_t PayloadDescriptorLengthMinusSsData(const RTPVideoHeaderVP9& hdr) { + return kFixedPayloadDescriptorBytes + PictureIdLength(hdr) + + LayerInfoLength(hdr) + RefIndicesLength(hdr); +} + +size_t PayloadDescriptorLength(const RTPVideoHeaderVP9& hdr) { + return PayloadDescriptorLengthMinusSsData(hdr) + SsDataLength(hdr); +} + +void QueuePacket(size_t start_pos, + size_t size, + bool layer_begin, + bool layer_end, + RtpPacketizerVp9::PacketInfoQueue* packets) { + RtpPacketizerVp9::PacketInfo packet_info; + packet_info.payload_start_pos = start_pos; + packet_info.size = size; + packet_info.layer_begin = layer_begin; + packet_info.layer_end = layer_end; + packets->push(packet_info); +} + +// Picture ID: +// +// +-+-+-+-+-+-+-+-+ +// I: |M| PICTURE ID | M:0 => picture id is 7 bits. +// +-+-+-+-+-+-+-+-+ M:1 => picture id is 15 bits. +// M: | EXTENDED PID | +// +-+-+-+-+-+-+-+-+ +// +bool WritePictureId(const RTPVideoHeaderVP9& vp9, + rtc::BitBufferWriter* writer) { + bool m_bit = (PictureIdLength(vp9) == 2); + RETURN_FALSE_ON_ERROR(writer->WriteBits(m_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.picture_id, m_bit ? 15 : 7)); + return true; +} + +// Layer indices: +// +// Flexible mode (F=1): +// +// +-+-+-+-+-+-+-+-+ +// L: | T |U| S |D| +// +-+-+-+-+-+-+-+-+ +// +bool WriteLayerInfoFlexibleMode(const RTPVideoHeaderVP9& vp9, + rtc::BitBufferWriter* writer) { + RETURN_FALSE_ON_ERROR(writer->WriteBits(TemporalIdxField(vp9, 0), 3)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.temporal_up_switch ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(SpatialIdxField(vp9, 0), 3)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.inter_layer_predicted ? 1: 0, 1)); + return true; +} + +// Non-flexible mode (F=0): +// +// +-+-+-+-+-+-+-+-+ +// L: |GOF_IDX| S |D| +// +-+-+-+-+-+-+-+-+ +// | TL0PICIDX | +// +-+-+-+-+-+-+-+-+ +// +bool WriteLayerInfoNonFlexibleMode(const RTPVideoHeaderVP9& vp9, + rtc::BitBufferWriter* writer) { + RETURN_FALSE_ON_ERROR(writer->WriteBits(GofIdxField(vp9, 0), 4)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(SpatialIdxField(vp9, 0), 3)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.inter_layer_predicted ? 1: 0, 1)); + RETURN_FALSE_ON_ERROR(writer->WriteUInt8(Tl0PicIdxField(vp9, 0))); + return true; +} + +bool WriteLayerInfo(const RTPVideoHeaderVP9& vp9, + rtc::BitBufferWriter* writer) { + if (vp9.flexible_mode) { + return WriteLayerInfoFlexibleMode(vp9, writer); + } else { + return WriteLayerInfoNonFlexibleMode(vp9, writer); + } +} + +// Reference indices: +// +// +-+-+-+-+-+-+-+-+ -| P=1,F=1: At least one reference index +// P,F: | P_DIFF |X|N| . has to be specified. +// +-+-+-+-+-+-+-+-+ . up to 3 times +// X: |EXTENDED P_DIFF| . X=1: Extended P_DIFF is used (14 +// +-+-+-+-+-+-+-+-+ -| bits). Else 6 bits are used. +// N=1: An additional P_DIFF follows +// current P_DIFF. +bool WriteRefIndices(const RTPVideoHeaderVP9& vp9, + rtc::BitBufferWriter* writer) { + if (!PictureIdPresent(vp9) || + vp9.num_ref_pics == 0 || vp9.num_ref_pics > kMaxVp9RefPics) { + return false; + } + for (size_t i = 0; i < vp9.num_ref_pics; ++i) { + bool x_bit = (vp9.pid_diff[i] > 0x3F); + bool n_bit = !(i == vp9.num_ref_pics - 1); + if (x_bit) { + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.pid_diff[i] >> 8, 6)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(x_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(n_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.pid_diff[i])); + } else { + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.pid_diff[i], 6)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(x_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(n_bit ? 1 : 0, 1)); + } + } + return true; +} + +// Scalability structure (SS). +// +// +-+-+-+-+-+-+-+-+ +// V: | N_S |Y| N_G | +// +-+-+-+-+-+-+-+-+ -| +// Y: | WIDTH | (OPTIONAL) . +// + + . +// | | (OPTIONAL) . +// +-+-+-+-+-+-+-+-+ . N_S + 1 times +// | HEIGHT | (OPTIONAL) . +// + + . +// | | (OPTIONAL) . +// +-+-+-+-+-+-+-+-+ -| -| +// N_G: | T |U| R |-|-| (OPTIONAL) . +// +-+-+-+-+-+-+-+-+ -| . N_G + 1 times +// | P_DIFF | (OPTIONAL) . R times . +// +-+-+-+-+-+-+-+-+ -| -| +// +bool WriteSsData(const RTPVideoHeaderVP9& vp9, rtc::BitBufferWriter* writer) { + DCHECK_GT(vp9.num_spatial_layers, 0U); + DCHECK_LE(vp9.num_spatial_layers, kMaxVp9NumberOfSpatialLayers); + DCHECK_GT(vp9.gof.num_frames_in_gof, 0U); + DCHECK_LE(vp9.gof.num_frames_in_gof, kMaxVp9FramesInGof); + + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.num_spatial_layers - 1, 3)); + RETURN_FALSE_ON_ERROR( + writer->WriteBits(vp9.spatial_layer_resolution_present ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.num_frames_in_gof - 1, 4)); + + if (vp9.spatial_layer_resolution_present) { + for (size_t i = 0; i < vp9.num_spatial_layers; ++i) { + RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.width[i])); + RETURN_FALSE_ON_ERROR(writer->WriteUInt16(vp9.height[i])); + } + } + for (size_t i = 0; i < vp9.gof.num_frames_in_gof; ++i) { + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.temporal_idx[i], 3)); + RETURN_FALSE_ON_ERROR( + writer->WriteBits(vp9.gof.temporal_up_switch[i] ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(vp9.gof.num_ref_pics[i], 2)); + RETURN_FALSE_ON_ERROR(writer->WriteBits(kReservedBitValue0, 2)); + for (size_t r = 0; r < vp9.gof.num_ref_pics[i]; ++r) { + RETURN_FALSE_ON_ERROR(writer->WriteUInt8(vp9.gof.pid_diff[i][r])); + } + } + return true; +} + +// Picture ID: +// +// +-+-+-+-+-+-+-+-+ +// I: |M| PICTURE ID | M:0 => picture id is 7 bits. +// +-+-+-+-+-+-+-+-+ M:1 => picture id is 15 bits. +// M: | EXTENDED PID | +// +-+-+-+-+-+-+-+-+ +// +bool ParsePictureId(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { + uint32_t picture_id; + uint32_t m_bit; + RETURN_FALSE_ON_ERROR(parser->ReadBits(&m_bit, 1)); + if (m_bit) { + RETURN_FALSE_ON_ERROR(parser->ReadBits(&picture_id, 15)); + vp9->max_picture_id = kMaxTwoBytePictureId; + } else { + RETURN_FALSE_ON_ERROR(parser->ReadBits(&picture_id, 7)); + vp9->max_picture_id = kMaxOneBytePictureId; + } + vp9->picture_id = picture_id; + return true; +} + +// Layer indices (flexible mode): +// +// +-+-+-+-+-+-+-+-+ +// L: | T |U| S |D| +// +-+-+-+-+-+-+-+-+ +// +bool ParseLayerInfoFlexibleMode(rtc::BitBuffer* parser, + RTPVideoHeaderVP9* vp9) { + uint32_t t, u_bit, s, d_bit; + RETURN_FALSE_ON_ERROR(parser->ReadBits(&t, 3)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&u_bit, 1)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&s, 3)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&d_bit, 1)); + vp9->temporal_idx = t; + vp9->temporal_up_switch = u_bit ? true : false; + vp9->spatial_idx = s; + vp9->inter_layer_predicted = d_bit ? true : false; + return true; +} + +// Layer indices (non-flexible mode): +// +// +-+-+-+-+-+-+-+-+ +// L: |GOF_IDX| S |D| +// +-+-+-+-+-+-+-+-+ +// | TL0PICIDX | +// +-+-+-+-+-+-+-+-+ +// +bool ParseLayerInfoNonFlexibleMode(rtc::BitBuffer* parser, + RTPVideoHeaderVP9* vp9) { + uint32_t gof_idx, s, d_bit; + uint8_t tl0picidx; + RETURN_FALSE_ON_ERROR(parser->ReadBits(&gof_idx, 4)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&s, 3)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&d_bit, 1)); + RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&tl0picidx)); + vp9->gof_idx = gof_idx; + vp9->spatial_idx = s; + vp9->inter_layer_predicted = d_bit ? true : false; + vp9->tl0_pic_idx = tl0picidx; + return true; +} + +bool ParseLayerInfo(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { + if (vp9->flexible_mode) { + return ParseLayerInfoFlexibleMode(parser, vp9); + } else { + return ParseLayerInfoNonFlexibleMode(parser, vp9); + } +} + +// Reference indices: +// +// +-+-+-+-+-+-+-+-+ -| P=1,F=1: At least one reference index +// P,F: | P_DIFF |X|N| . has to be specified. +// +-+-+-+-+-+-+-+-+ . up to 3 times +// X: |EXTENDED P_DIFF| . X=1: Extended P_DIFF is used (14 +// +-+-+-+-+-+-+-+-+ -| bits). Else 6 bits are used. +// N=1: An additional P_DIFF follows +// current P_DIFF. +bool ParseRefIndices(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { + if (vp9->picture_id == kNoPictureId) + return false; + + vp9->num_ref_pics = 0; + uint32_t n_bit; + do { + if (vp9->num_ref_pics == kMaxVp9RefPics) + return false; + + uint32_t p_diff, x_bit; + RETURN_FALSE_ON_ERROR(parser->ReadBits(&p_diff, 6)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&x_bit, 1)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_bit, 1)); + + if (x_bit) { + // P_DIFF is 14 bits. + uint8_t ext_p_diff; + RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&ext_p_diff)); + p_diff = (p_diff << 8) + ext_p_diff; + } + + vp9->pid_diff[vp9->num_ref_pics] = p_diff; + uint32_t scaled_pid = vp9->picture_id; + while (p_diff > scaled_pid) { + scaled_pid += vp9->max_picture_id + 1; + } + vp9->ref_picture_id[vp9->num_ref_pics++] = scaled_pid - p_diff; + } while (n_bit); + + return true; +} + +// Scalability structure (SS). +// +// +-+-+-+-+-+-+-+-+ +// V: | N_S |Y| N_G | +// +-+-+-+-+-+-+-+-+ -| +// Y: | WIDTH | (OPTIONAL) . +// + + . +// | | (OPTIONAL) . +// +-+-+-+-+-+-+-+-+ . N_S + 1 times +// | HEIGHT | (OPTIONAL) . +// + + . +// | | (OPTIONAL) . +// +-+-+-+-+-+-+-+-+ -| -| +// N_G: | T |U| R |-|-| (OPTIONAL) . +// +-+-+-+-+-+-+-+-+ -| . N_G + 1 times +// | P_DIFF | (OPTIONAL) . R times . +// +-+-+-+-+-+-+-+-+ -| -| +// +bool ParseSsData(rtc::BitBuffer* parser, RTPVideoHeaderVP9* vp9) { + uint32_t n_s, y_bit, n_g; + RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_s, 3)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&y_bit, 1)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&n_g, 4)); + vp9->num_spatial_layers = n_s + 1; + vp9->spatial_layer_resolution_present = y_bit ? true : false; + vp9->gof.num_frames_in_gof = n_g + 1; + + if (y_bit) { + for (size_t i = 0; i < vp9->num_spatial_layers; ++i) { + RETURN_FALSE_ON_ERROR(parser->ReadUInt16(&vp9->width[i])); + RETURN_FALSE_ON_ERROR(parser->ReadUInt16(&vp9->height[i])); + } + } + for (size_t i = 0; i < vp9->gof.num_frames_in_gof; ++i) { + uint32_t t, u_bit, r; + RETURN_FALSE_ON_ERROR(parser->ReadBits(&t, 3)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&u_bit, 1)); + RETURN_FALSE_ON_ERROR(parser->ReadBits(&r, 2)); + RETURN_FALSE_ON_ERROR(parser->ConsumeBits(2)); + vp9->gof.temporal_idx[i] = t; + vp9->gof.temporal_up_switch[i] = u_bit ? true : false; + vp9->gof.num_ref_pics[i] = r; + + for (size_t p = 0; p < vp9->gof.num_ref_pics[i]; ++p) { + uint8_t p_diff; + RETURN_FALSE_ON_ERROR(parser->ReadUInt8(&p_diff)); + vp9->gof.pid_diff[i][p] = p_diff; + } + } + return true; +} + +// Gets the size of next payload chunk to send. Returns 0 on error. +size_t CalcNextSize(size_t max_length, size_t rem_bytes) { + if (max_length == 0 || rem_bytes == 0) { + return 0; + } + if (kBalancedMode) { + size_t num_frags = std::ceil(static_cast<double>(rem_bytes) / max_length); + return static_cast<size_t>( + static_cast<double>(rem_bytes) / num_frags + 0.5); + } + return max_length >= rem_bytes ? rem_bytes : max_length; +} +} // namespace + + +RtpPacketizerVp9::RtpPacketizerVp9(const RTPVideoHeaderVP9& hdr, + size_t max_payload_length) + : hdr_(hdr), + max_payload_length_(max_payload_length), + payload_(nullptr), + payload_size_(0) { +} + +RtpPacketizerVp9::~RtpPacketizerVp9() { +} + +ProtectionType RtpPacketizerVp9::GetProtectionType() { + bool protect = + hdr_.temporal_idx == 0 || hdr_.temporal_idx == kNoTemporalIdx; + return protect ? kProtectedPacket : kUnprotectedPacket; +} + +StorageType RtpPacketizerVp9::GetStorageType(uint32_t retransmission_settings) { + StorageType storage = kAllowRetransmission; + if (hdr_.temporal_idx == 0 && + !(retransmission_settings & kRetransmitBaseLayer)) { + storage = kDontRetransmit; + } else if (hdr_.temporal_idx != kNoTemporalIdx && hdr_.temporal_idx > 0 && + !(retransmission_settings & kRetransmitHigherLayers)) { + storage = kDontRetransmit; + } + return storage; +} + +std::string RtpPacketizerVp9::ToString() { + return "RtpPacketizerVp9"; +} + +void RtpPacketizerVp9::SetPayloadData( + const uint8_t* payload, + size_t payload_size, + const RTPFragmentationHeader* fragmentation) { + payload_ = payload; + payload_size_ = payload_size; + GeneratePackets(); +} + +void RtpPacketizerVp9::GeneratePackets() { + if (max_payload_length_ < PayloadDescriptorLength(hdr_) + 1) { + LOG(LS_ERROR) << "Payload header and one payload byte won't fit."; + return; + } + size_t bytes_processed = 0; + while (bytes_processed < payload_size_) { + size_t rem_bytes = payload_size_ - bytes_processed; + size_t rem_payload_len = max_payload_length_ - + (bytes_processed ? PayloadDescriptorLengthMinusSsData(hdr_) + : PayloadDescriptorLength(hdr_)); + + size_t packet_bytes = CalcNextSize(rem_payload_len, rem_bytes); + if (packet_bytes == 0) { + LOG(LS_ERROR) << "Failed to generate VP9 packets."; + while (!packets_.empty()) + packets_.pop(); + return; + } + QueuePacket(bytes_processed, packet_bytes, bytes_processed == 0, + rem_bytes == packet_bytes, &packets_); + bytes_processed += packet_bytes; + } + assert(bytes_processed == payload_size_); +} + +bool RtpPacketizerVp9::NextPacket(uint8_t* buffer, + size_t* bytes_to_send, + bool* last_packet) { + if (packets_.empty()) { + return false; + } + PacketInfo packet_info = packets_.front(); + packets_.pop(); + + if (!WriteHeaderAndPayload(packet_info, buffer, bytes_to_send)) { + return false; + } + *last_packet = packets_.empty(); + return true; +} + +// VP9 format: +// +// Payload descriptor for F = 1 (flexible mode) +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |I|P|L|F|B|E|V|-| (REQUIRED) +// +-+-+-+-+-+-+-+-+ +// I: |M| PICTURE ID | (RECOMMENDED) +// +-+-+-+-+-+-+-+-+ +// M: | EXTENDED PID | (RECOMMENDED) +// +-+-+-+-+-+-+-+-+ +// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED) +// +-+-+-+-+-+-+-+-+ -| +// P,F: | P_DIFF |X|N| (CONDITIONALLY RECOMMENDED) . +// +-+-+-+-+-+-+-+-+ . up to 3 times +// X: |EXTENDED P_DIFF| . +// +-+-+-+-+-+-+-+-+ -| +// V: | SS | +// | .. | +// +-+-+-+-+-+-+-+-+ +// +// Payload descriptor for F = 0 (non-flexible mode) +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |I|P|L|F|B|E|V|-| (REQUIRED) +// +-+-+-+-+-+-+-+-+ +// I: |M| PICTURE ID | (RECOMMENDED) +// +-+-+-+-+-+-+-+-+ +// M: | EXTENDED PID | (RECOMMENDED) +// +-+-+-+-+-+-+-+-+ +// L: |GOF_IDX| S |D| (CONDITIONALLY RECOMMENDED) +// +-+-+-+-+-+-+-+-+ +// | TL0PICIDX | (CONDITIONALLY REQUIRED) +// +-+-+-+-+-+-+-+-+ +// V: | SS | +// | .. | +// +-+-+-+-+-+-+-+-+ + +bool RtpPacketizerVp9::WriteHeaderAndPayload(const PacketInfo& packet_info, + uint8_t* buffer, + size_t* bytes_to_send) const { + size_t header_length; + if (!WriteHeader(packet_info, buffer, &header_length)) + return false; + + // Copy payload data. + memcpy(&buffer[header_length], + &payload_[packet_info.payload_start_pos], packet_info.size); + + *bytes_to_send = header_length + packet_info.size; + return true; +} + +bool RtpPacketizerVp9::WriteHeader(const PacketInfo& packet_info, + uint8_t* buffer, + size_t* header_length) const { + // Required payload descriptor byte. + bool i_bit = PictureIdPresent(hdr_); + bool p_bit = hdr_.inter_pic_predicted; + bool l_bit = LayerInfoPresent(hdr_); + bool f_bit = hdr_.flexible_mode; + bool b_bit = hdr_.beginning_of_frame && packet_info.layer_begin; + bool e_bit = hdr_.end_of_frame && packet_info.layer_end; + bool v_bit = hdr_.ss_data_available && b_bit; + + rtc::BitBufferWriter writer(buffer, max_payload_length_); + RETURN_FALSE_ON_ERROR(writer.WriteBits(i_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer.WriteBits(p_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer.WriteBits(l_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer.WriteBits(f_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer.WriteBits(b_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer.WriteBits(e_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer.WriteBits(v_bit ? 1 : 0, 1)); + RETURN_FALSE_ON_ERROR(writer.WriteBits(kReservedBitValue0, 1)); + + // Add fields that are present. + if (i_bit && !WritePictureId(hdr_, &writer)) { + LOG(LS_ERROR) << "Failed writing VP9 picture id."; + return false; + } + if (l_bit && !WriteLayerInfo(hdr_, &writer)) { + LOG(LS_ERROR) << "Failed writing VP9 layer info."; + return false; + } + if (p_bit && f_bit && !WriteRefIndices(hdr_, &writer)) { + LOG(LS_ERROR) << "Failed writing VP9 ref indices."; + return false; + } + if (v_bit && !WriteSsData(hdr_, &writer)) { + LOG(LS_ERROR) << "Failed writing VP9 SS data."; + return false; + } + + size_t offset_bytes = 0; + size_t offset_bits = 0; + writer.GetCurrentOffset(&offset_bytes, &offset_bits); + assert(offset_bits == 0); + + *header_length = offset_bytes; + return true; +} + +bool RtpDepacketizerVp9::Parse(ParsedPayload* parsed_payload, + const uint8_t* payload, + size_t payload_length) { + assert(parsed_payload != nullptr); + if (payload_length == 0) { + LOG(LS_ERROR) << "Payload length is zero."; + return false; + } + + // Parse mandatory first byte of payload descriptor. + rtc::BitBuffer parser(payload, payload_length); + uint32_t i_bit, p_bit, l_bit, f_bit, b_bit, e_bit, v_bit; + RETURN_FALSE_ON_ERROR(parser.ReadBits(&i_bit, 1)); + RETURN_FALSE_ON_ERROR(parser.ReadBits(&p_bit, 1)); + RETURN_FALSE_ON_ERROR(parser.ReadBits(&l_bit, 1)); + RETURN_FALSE_ON_ERROR(parser.ReadBits(&f_bit, 1)); + RETURN_FALSE_ON_ERROR(parser.ReadBits(&b_bit, 1)); + RETURN_FALSE_ON_ERROR(parser.ReadBits(&e_bit, 1)); + RETURN_FALSE_ON_ERROR(parser.ReadBits(&v_bit, 1)); + RETURN_FALSE_ON_ERROR(parser.ConsumeBits(1)); + + // Parsed payload. + parsed_payload->type.Video.width = 0; + parsed_payload->type.Video.height = 0; + parsed_payload->type.Video.simulcastIdx = 0; + parsed_payload->type.Video.codec = kRtpVideoVp9; + + parsed_payload->frame_type = p_bit ? kVideoFrameDelta : kVideoFrameKey; + + RTPVideoHeaderVP9* vp9 = &parsed_payload->type.Video.codecHeader.VP9; + vp9->InitRTPVideoHeaderVP9(); + vp9->inter_pic_predicted = p_bit ? true : false; + vp9->flexible_mode = f_bit ? true : false; + vp9->beginning_of_frame = b_bit ? true : false; + vp9->end_of_frame = e_bit ? true : false; + vp9->ss_data_available = v_bit ? true : false; + vp9->temporal_idx = 0; + vp9->spatial_idx = 0; + + // Parse fields that are present. + if (i_bit && !ParsePictureId(&parser, vp9)) { + LOG(LS_ERROR) << "Failed parsing VP9 picture id."; + return false; + } + if (l_bit && !ParseLayerInfo(&parser, vp9)) { + LOG(LS_ERROR) << "Failed parsing VP9 layer info."; + return false; + } + if (p_bit && f_bit && !ParseRefIndices(&parser, vp9)) { + LOG(LS_ERROR) << "Failed parsing VP9 ref indices."; + return false; + } + if (v_bit) { + if (!ParseSsData(&parser, vp9)) { + LOG(LS_ERROR) << "Failed parsing VP9 SS data."; + return false; + } + if (vp9->spatial_layer_resolution_present) { + // TODO(asapersson): Add support for spatial layers. + parsed_payload->type.Video.width = vp9->width[0]; + parsed_payload->type.Video.height = vp9->height[0]; + } + } + parsed_payload->type.Video.isFirstPacket = b_bit && (vp9->spatial_idx == 0); + + uint64_t rem_bits = parser.RemainingBitCount(); + assert(rem_bits % 8 == 0); + parsed_payload->payload_length = rem_bits / 8; + if (parsed_payload->payload_length == 0) { + LOG(LS_ERROR) << "Failed parsing VP9 payload data."; + return false; + } + parsed_payload->payload = + payload + payload_length - parsed_payload->payload_length; + + return true; +} +} // namespace webrtc diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h new file mode 100644 index 0000000000..883fbce5c8 --- /dev/null +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// +// This file contains the declaration of the VP9 packetizer class. +// A packetizer object is created for each encoded video frame. The +// constructor is called with the payload data and size. +// +// After creating the packetizer, the method NextPacket is called +// repeatedly to get all packets for the frame. The method returns +// false as long as there are more packets left to fetch. +// + +#ifndef WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VP9_H_ +#define WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VP9_H_ + +#include <queue> +#include <string> + +#include "webrtc/base/constructormagic.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/rtp_rtcp/source/rtp_format.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class RtpPacketizerVp9 : public RtpPacketizer { + public: + RtpPacketizerVp9(const RTPVideoHeaderVP9& hdr, size_t max_payload_length); + + virtual ~RtpPacketizerVp9(); + + ProtectionType GetProtectionType() override; + + StorageType GetStorageType(uint32_t retransmission_settings) override; + + std::string ToString() override; + + // The payload data must be one encoded VP9 frame. + void SetPayloadData(const uint8_t* payload, + size_t payload_size, + const RTPFragmentationHeader* fragmentation) override; + + // Gets the next payload with VP9 payload header. + // |buffer| is a pointer to where the output will be written. + // |bytes_to_send| is an output variable that will contain number of bytes + // written to buffer. + // |last_packet| is true for the last packet of the frame, false otherwise + // (i.e. call the function again to get the next packet). + // Returns true on success, false otherwise. + bool NextPacket(uint8_t* buffer, + size_t* bytes_to_send, + bool* last_packet) override; + + typedef struct { + size_t payload_start_pos; + size_t size; + bool layer_begin; + bool layer_end; + } PacketInfo; + typedef std::queue<PacketInfo> PacketInfoQueue; + + private: + // Calculates all packet sizes and loads info to packet queue. + void GeneratePackets(); + + // Writes the payload descriptor header and copies payload to the |buffer|. + // |packet_info| determines which part of the payload to write. + // |bytes_to_send| contains the number of written bytes to the buffer. + // Returns true on success, false otherwise. + bool WriteHeaderAndPayload(const PacketInfo& packet_info, + uint8_t* buffer, + size_t* bytes_to_send) const; + + // Writes payload descriptor header to |buffer|. + // Returns true on success, false otherwise. + bool WriteHeader(const PacketInfo& packet_info, + uint8_t* buffer, + size_t* header_length) const; + + const RTPVideoHeaderVP9 hdr_; + const size_t max_payload_length_; // The max length in bytes of one packet. + const uint8_t* payload_; // The payload data to be packetized. + size_t payload_size_; // The size in bytes of the payload data. + PacketInfoQueue packets_; + + DISALLOW_COPY_AND_ASSIGN(RtpPacketizerVp9); +}; + + +class RtpDepacketizerVp9 : public RtpDepacketizer { + public: + virtual ~RtpDepacketizerVp9() {} + + bool Parse(ParsedPayload* parsed_payload, + const uint8_t* payload, + size_t payload_length) override; +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_RTP_RTCP_SOURCE_RTP_FORMAT_VP9_H_ diff --git a/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc new file mode 100644 index 0000000000..a052e3eb28 --- /dev/null +++ b/webrtc/modules/rtp_rtcp/source/rtp_format_vp9_unittest.cc @@ -0,0 +1,664 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <vector> + +#include "testing/gmock/include/gmock/gmock.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/modules/rtp_rtcp/source/rtp_format_vp9.h" +#include "webrtc/typedefs.h" + +namespace webrtc { +namespace { +void VerifyHeader(const RTPVideoHeaderVP9& expected, + const RTPVideoHeaderVP9& actual) { + EXPECT_EQ(expected.inter_layer_predicted, actual.inter_layer_predicted); + EXPECT_EQ(expected.inter_pic_predicted, actual.inter_pic_predicted); + EXPECT_EQ(expected.flexible_mode, actual.flexible_mode); + EXPECT_EQ(expected.beginning_of_frame, actual.beginning_of_frame); + EXPECT_EQ(expected.end_of_frame, actual.end_of_frame); + EXPECT_EQ(expected.ss_data_available, actual.ss_data_available); + EXPECT_EQ(expected.picture_id, actual.picture_id); + EXPECT_EQ(expected.max_picture_id, actual.max_picture_id); + EXPECT_EQ(expected.temporal_idx == kNoTemporalIdx ? 0 : expected.temporal_idx, + actual.temporal_idx); + EXPECT_EQ(expected.spatial_idx == kNoSpatialIdx ? 0 : expected.spatial_idx, + actual.spatial_idx); + EXPECT_EQ(expected.gof_idx, actual.gof_idx); + EXPECT_EQ(expected.tl0_pic_idx, actual.tl0_pic_idx); + EXPECT_EQ(expected.temporal_up_switch, actual.temporal_up_switch); + + EXPECT_EQ(expected.num_ref_pics, actual.num_ref_pics); + for (uint8_t i = 0; i < expected.num_ref_pics; ++i) { + EXPECT_EQ(expected.pid_diff[i], actual.pid_diff[i]); + EXPECT_EQ(expected.ref_picture_id[i], actual.ref_picture_id[i]); + } + if (expected.ss_data_available) { + EXPECT_EQ(expected.spatial_layer_resolution_present, + actual.spatial_layer_resolution_present); + EXPECT_EQ(expected.num_spatial_layers, actual.num_spatial_layers); + if (expected.spatial_layer_resolution_present) { + for (size_t i = 0; i < expected.num_spatial_layers; i++) { + EXPECT_EQ(expected.width[i], actual.width[i]); + EXPECT_EQ(expected.height[i], actual.height[i]); + } + } + EXPECT_EQ(expected.gof.num_frames_in_gof, actual.gof.num_frames_in_gof); + for (size_t i = 0; i < expected.gof.num_frames_in_gof; i++) { + EXPECT_EQ(expected.gof.temporal_up_switch[i], + actual.gof.temporal_up_switch[i]); + EXPECT_EQ(expected.gof.temporal_idx[i], actual.gof.temporal_idx[i]); + EXPECT_EQ(expected.gof.num_ref_pics[i], actual.gof.num_ref_pics[i]); + for (size_t j = 0; j < expected.gof.num_ref_pics[i]; j++) { + EXPECT_EQ(expected.gof.pid_diff[i][j], actual.gof.pid_diff[i][j]); + } + } + } +} + +void VerifyPayload(const RtpDepacketizer::ParsedPayload& parsed, + const uint8_t* payload, + size_t payload_length) { + EXPECT_EQ(payload, parsed.payload); + EXPECT_EQ(payload_length, parsed.payload_length); + EXPECT_THAT(std::vector<uint8_t>(parsed.payload, + parsed.payload + parsed.payload_length), + ::testing::ElementsAreArray(payload, payload_length)); +} + +void ParseAndCheckPacket(const uint8_t* packet, + const RTPVideoHeaderVP9& expected, + size_t expected_hdr_length, + size_t expected_length) { + rtc::scoped_ptr<RtpDepacketizer> depacketizer(new RtpDepacketizerVp9()); + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer->Parse(&parsed, packet, expected_length)); + EXPECT_EQ(kRtpVideoVp9, parsed.type.Video.codec); + VerifyHeader(expected, parsed.type.Video.codecHeader.VP9); + const size_t kExpectedPayloadLength = expected_length - expected_hdr_length; + VerifyPayload(parsed, packet + expected_hdr_length, kExpectedPayloadLength); +} +} // namespace + +// Payload descriptor for flexible mode +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |I|P|L|F|B|E|V|-| (REQUIRED) +// +-+-+-+-+-+-+-+-+ +// I: |M| PICTURE ID | (RECOMMENDED) +// +-+-+-+-+-+-+-+-+ +// M: | EXTENDED PID | (RECOMMENDED) +// +-+-+-+-+-+-+-+-+ +// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED) +// +-+-+-+-+-+-+-+-+ -| +// P,F: | P_DIFF |X|N| (CONDITIONALLY RECOMMENDED) . +// +-+-+-+-+-+-+-+-+ . up to 3 times +// X: |EXTENDED P_DIFF| (OPTIONAL) . +// +-+-+-+-+-+-+-+-+ -| +// V: | SS | +// | .. | +// +-+-+-+-+-+-+-+-+ +// +// Payload descriptor for non-flexible mode +// 0 1 2 3 4 5 6 7 +// +-+-+-+-+-+-+-+-+ +// |I|P|L|F|B|E|V|-| (REQUIRED) +// +-+-+-+-+-+-+-+-+ +// I: |M| PICTURE ID | (RECOMMENDED) +// +-+-+-+-+-+-+-+-+ +// M: | EXTENDED PID | (RECOMMENDED) +// +-+-+-+-+-+-+-+-+ +// L: |GOF_IDX| S |D| (CONDITIONALLY RECOMMENDED) +// +-+-+-+-+-+-+-+-+ +// | TL0PICIDX | (CONDITIONALLY REQUIRED) +// +-+-+-+-+-+-+-+-+ +// V: | SS | +// | .. | +// +-+-+-+-+-+-+-+-+ + +class RtpPacketizerVp9Test : public ::testing::Test { + protected: + RtpPacketizerVp9Test() {} + virtual void SetUp() { + expected_.InitRTPVideoHeaderVP9(); + // Always input one layer frame at a time. + expected_.beginning_of_frame = true; + expected_.end_of_frame = true; + } + + rtc::scoped_ptr<uint8_t[]> packet_; + rtc::scoped_ptr<uint8_t[]> payload_; + size_t payload_size_; + size_t payload_pos_; + RTPVideoHeaderVP9 expected_; + rtc::scoped_ptr<RtpPacketizerVp9> packetizer_; + + void Init(size_t payload_size, size_t packet_size) { + payload_.reset(new uint8_t[payload_size]); + memset(payload_.get(), 7, payload_size); + payload_size_ = payload_size; + payload_pos_ = 0; + packetizer_.reset(new RtpPacketizerVp9(expected_, packet_size)); + packetizer_->SetPayloadData(payload_.get(), payload_size_, NULL); + + const int kMaxPayloadDescriptorLength = 100; + packet_.reset(new uint8_t[payload_size_ + kMaxPayloadDescriptorLength]); + } + + void CheckPayload(const uint8_t* packet, + size_t start_pos, + size_t end_pos, + bool last) { + for (size_t i = start_pos; i < end_pos; ++i) { + EXPECT_EQ(packet[i], payload_[payload_pos_++]); + } + EXPECT_EQ(last, payload_pos_ == payload_size_); + } + + void CreateParseAndCheckPackets(const size_t* expected_hdr_sizes, + const size_t* expected_sizes, + size_t expected_num_packets) { + ASSERT_TRUE(packetizer_.get() != NULL); + size_t length = 0; + bool last = false; + if (expected_num_packets == 0) { + EXPECT_FALSE(packetizer_->NextPacket(packet_.get(), &length, &last)); + return; + } + for (size_t i = 0; i < expected_num_packets; ++i) { + EXPECT_TRUE(packetizer_->NextPacket(packet_.get(), &length, &last)); + EXPECT_EQ(expected_sizes[i], length); + RTPVideoHeaderVP9 hdr = expected_; + hdr.beginning_of_frame = (i == 0); + hdr.end_of_frame = last; + ParseAndCheckPacket(packet_.get(), hdr, expected_hdr_sizes[i], length); + CheckPayload(packet_.get(), expected_hdr_sizes[i], length, last); + } + EXPECT_TRUE(last); + } +}; + +TEST_F(RtpPacketizerVp9Test, TestEqualSizedMode_OnePacket) { + const size_t kFrameSize = 25; + const size_t kPacketSize = 26; + Init(kFrameSize, kPacketSize); + + // One packet: + // I:0, P:0, L:0, F:0, B:1, E:1, V:0 (1hdr + 25 payload) + const size_t kExpectedHdrSizes[] = {1}; + const size_t kExpectedSizes[] = {26}; + const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); + CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestEqualSizedMode_TwoPackets) { + const size_t kFrameSize = 27; + const size_t kPacketSize = 27; + Init(kFrameSize, kPacketSize); + + // Two packets: + // I:0, P:0, L:0, F:0, B:1, E:0, V:0 (1hdr + 14 payload) + // I:0, P:0, L:0, F:0, B:0, E:1, V:0 (1hdr + 13 payload) + const size_t kExpectedHdrSizes[] = {1, 1}; + const size_t kExpectedSizes[] = {15, 14}; + const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); + CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestTooShortBufferToFitPayload) { + const size_t kFrameSize = 1; + const size_t kPacketSize = 1; + Init(kFrameSize, kPacketSize); // 1hdr + 1 payload + + const size_t kExpectedNum = 0; + CreateParseAndCheckPackets(NULL, NULL, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestOneBytePictureId) { + const size_t kFrameSize = 30; + const size_t kPacketSize = 12; + + expected_.picture_id = kMaxOneBytePictureId; // 2 byte payload descriptor + expected_.max_picture_id = kMaxOneBytePictureId; + Init(kFrameSize, kPacketSize); + + // Three packets: + // I:1, P:0, L:0, F:0, B:1, E:0, V:0 (2hdr + 10 payload) + // I:1, P:0, L:0, F:0, B:0, E:0, V:0 (2hdr + 10 payload) + // I:1, P:0, L:0, F:0, B:0, E:1, V:0 (2hdr + 10 payload) + const size_t kExpectedHdrSizes[] = {2, 2, 2}; + const size_t kExpectedSizes[] = {12, 12, 12}; + const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); + CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestTwoBytePictureId) { + const size_t kFrameSize = 31; + const size_t kPacketSize = 13; + + expected_.picture_id = kMaxTwoBytePictureId; // 3 byte payload descriptor + Init(kFrameSize, kPacketSize); + + // Four packets: + // I:1, P:0, L:0, F:0, B:1, E:0, V:0 (3hdr + 8 payload) + // I:1, P:0, L:0, F:0, B:0, E:0, V:0 (3hdr + 8 payload) + // I:1, P:0, L:0, F:0, B:0, E:0, V:0 (3hdr + 8 payload) + // I:1, P:0, L:0, F:0, B:0, E:1, V:0 (3hdr + 7 payload) + const size_t kExpectedHdrSizes[] = {3, 3, 3, 3}; + const size_t kExpectedSizes[] = {11, 11, 11, 10}; + const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); + CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithNonFlexibleMode) { + const size_t kFrameSize = 30; + const size_t kPacketSize = 25; + + expected_.gof_idx = 3; + expected_.spatial_idx = 2; + expected_.inter_layer_predicted = true; // D + expected_.tl0_pic_idx = 117; + Init(kFrameSize, kPacketSize); + + // Two packets: + // | I:0, P:0, L:1, F:0, B:1, E:0, V:0 | (3hdr + 15 payload) + // L: | GOF_IDX:3, S:2, D:1 | TL0PICIDX:117 | + // | I:0, P:0, L:1, F:0, B:0, E:1, V:0 | (3hdr + 15 payload) + // L: | GOF_IDX:3, S:2, D:1 | TL0PICIDX:117 | + const size_t kExpectedHdrSizes[] = {3, 3}; + const size_t kExpectedSizes[] = {18, 18}; + const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); + CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestLayerInfoWithFlexibleMode) { + const size_t kFrameSize = 21; + const size_t kPacketSize = 23; + + expected_.flexible_mode = true; + expected_.temporal_idx = 3; + expected_.temporal_up_switch = true; // U + expected_.spatial_idx = 2; + expected_.inter_layer_predicted = false; // D + Init(kFrameSize, kPacketSize); + + // One packet: + // I:0, P:0, L:1, F:1, B:1, E:1, V:0 (2hdr + 21 payload) + // L: T:3, U:1, S:2, D:0 + const size_t kExpectedHdrSizes[] = {2}; + const size_t kExpectedSizes[] = {23}; + const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); + CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestRefIdx) { + const size_t kFrameSize = 16; + const size_t kPacketSize = 22; + + expected_.inter_pic_predicted = true; // P + expected_.flexible_mode = true; // F + expected_.picture_id = 100; + expected_.num_ref_pics = 2; + expected_.pid_diff[0] = 3; + expected_.pid_diff[1] = 1171; + expected_.ref_picture_id[0] = 97; // 100 - 3 = 97 + expected_.ref_picture_id[1] = 31697; // 0x7FFF + 1 + 100 - 1171 = 31697 + Init(kFrameSize, kPacketSize); + + // Two packets: + // I:1, P:1, L:0, F:1, B:1, E:1, V:0 (6hdr + 16 payload) + // I: 100 (2 bytes) + // P,F: P_DIFF:3, X:0, N:1 + // P_DIFF:1171, X:1, N:0 (2 bytes) + const size_t kExpectedHdrSizes[] = {6}; + const size_t kExpectedSizes[] = {22}; + const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); + CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestRefIdxFailsWithoutPictureId) { + const size_t kFrameSize = 16; + const size_t kPacketSize = 22; + + expected_.inter_pic_predicted = true; + expected_.flexible_mode = true; + expected_.num_ref_pics = 1; + expected_.pid_diff[0] = 3; + Init(kFrameSize, kPacketSize); + + const size_t kExpectedNum = 0; + CreateParseAndCheckPackets(NULL, NULL, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestSsDataWithoutSpatialResolutionPresent) { + const size_t kFrameSize = 21; + const size_t kPacketSize = 25; + + expected_.ss_data_available = true; + expected_.num_spatial_layers = 1; + expected_.spatial_layer_resolution_present = false; + expected_.gof.num_frames_in_gof = 1; + expected_.gof.temporal_idx[0] = 0; + expected_.gof.temporal_up_switch[0] = true; + expected_.gof.num_ref_pics[0] = 1; + expected_.gof.pid_diff[0][0] = 4; + Init(kFrameSize, kPacketSize); + + // One packet: + // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (4hdr + 21 payload) + // N_S:0, Y:0, N_G:0 + // T:0, U:1, R:1 | P_DIFF[0][0]:4 + const size_t kExpectedHdrSizes[] = {4}; + const size_t kExpectedSizes[] = {25}; + const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); + CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestSsData) { + const size_t kFrameSize = 21; + const size_t kPacketSize = 39; + + expected_.ss_data_available = true; + expected_.num_spatial_layers = 2; + expected_.spatial_layer_resolution_present = true; + expected_.width[0] = 640; + expected_.width[1] = 1280; + expected_.height[0] = 360; + expected_.height[1] = 720; + expected_.gof.num_frames_in_gof = 3; + expected_.gof.temporal_idx[0] = 0; + expected_.gof.temporal_idx[1] = 1; + expected_.gof.temporal_idx[2] = 2; + expected_.gof.temporal_up_switch[0] = true; + expected_.gof.temporal_up_switch[1] = true; + expected_.gof.temporal_up_switch[2] = false; + expected_.gof.num_ref_pics[0] = 0; + expected_.gof.num_ref_pics[1] = 3; + expected_.gof.num_ref_pics[2] = 2; + expected_.gof.pid_diff[1][0] = 5; + expected_.gof.pid_diff[1][1] = 6; + expected_.gof.pid_diff[1][2] = 7; + expected_.gof.pid_diff[2][0] = 8; + expected_.gof.pid_diff[2][1] = 9; + Init(kFrameSize, kPacketSize); + + // One packet: + // I:0, P:0, L:0, F:0, B:1, E:1, V:1 (18hdr + 21 payload) + // N_S:1, Y:1, N_G:2 + // WIDTH:640 // 2 bytes + // HEIGHT:360 // 2 bytes + // WIDTH:1280 // 2 bytes + // HEIGHT:720 // 2 bytes + // T:0, U:1, R:0 + // T:1, U:1, R:3 | P_DIFF[1][0]:5 | P_DIFF[1][1]:6 | P_DIFF[1][2]:7 + // T:2, U:0, R:2 | P_DIFF[2][0]:8 | P_DIFF[2][0]:9 + const size_t kExpectedHdrSizes[] = {18}; + const size_t kExpectedSizes[] = {39}; + const size_t kExpectedNum = GTEST_ARRAY_SIZE_(kExpectedSizes); + CreateParseAndCheckPackets(kExpectedHdrSizes, kExpectedSizes, kExpectedNum); +} + +TEST_F(RtpPacketizerVp9Test, TestBaseLayerProtectionAndStorageType) { + const size_t kFrameSize = 10; + const size_t kPacketSize = 12; + + // I:0, P:0, L:1, F:1, B:1, E:1, V:0 (2hdr + 10 payload) + // L: T:0, U:0, S:0, D:0 + expected_.flexible_mode = true; + expected_.temporal_idx = 0; + Init(kFrameSize, kPacketSize); + EXPECT_EQ(kProtectedPacket, packetizer_->GetProtectionType()); + EXPECT_EQ(kAllowRetransmission, + packetizer_->GetStorageType(kRetransmitBaseLayer)); + EXPECT_EQ(kDontRetransmit, packetizer_->GetStorageType(kRetransmitOff)); +} + +TEST_F(RtpPacketizerVp9Test, TestHigherLayerProtectionAndStorageType) { + const size_t kFrameSize = 10; + const size_t kPacketSize = 12; + + // I:0, P:0, L:1, F:1, B:1, E:1, V:0 (2hdr + 10 payload) + // L: T:1, U:0, S:0, D:0 + expected_.flexible_mode = true; + expected_.temporal_idx = 1; + Init(kFrameSize, kPacketSize); + EXPECT_EQ(kUnprotectedPacket, packetizer_->GetProtectionType()); + EXPECT_EQ(kDontRetransmit, packetizer_->GetStorageType(kRetransmitBaseLayer)); + EXPECT_EQ(kAllowRetransmission, + packetizer_->GetStorageType(kRetransmitHigherLayers)); +} + + +class RtpDepacketizerVp9Test : public ::testing::Test { + protected: + RtpDepacketizerVp9Test() + : depacketizer_(new RtpDepacketizerVp9()) {} + + virtual void SetUp() { + expected_.InitRTPVideoHeaderVP9(); + } + + RTPVideoHeaderVP9 expected_; + rtc::scoped_ptr<RtpDepacketizer> depacketizer_; +}; + +TEST_F(RtpDepacketizerVp9Test, ParseBasicHeader) { + const uint8_t kHeaderLength = 1; + uint8_t packet[4] = {0}; + packet[0] = 0x0C; // I:0 P:0 L:0 F:0 B:1 E:1 V:0 R:0 + expected_.beginning_of_frame = true; + expected_.end_of_frame = true; + ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet)); +} + +TEST_F(RtpDepacketizerVp9Test, ParseOneBytePictureId) { + const uint8_t kHeaderLength = 2; + uint8_t packet[10] = {0}; + packet[0] = 0x80; // I:1 P:0 L:0 F:0 B:0 E:0 V:0 R:0 + packet[1] = kMaxOneBytePictureId; + + expected_.picture_id = kMaxOneBytePictureId; + expected_.max_picture_id = kMaxOneBytePictureId; + ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet)); +} + +TEST_F(RtpDepacketizerVp9Test, ParseTwoBytePictureId) { + const uint8_t kHeaderLength = 3; + uint8_t packet[10] = {0}; + packet[0] = 0x80; // I:1 P:0 L:0 F:0 B:0 E:0 V:0 R:0 + packet[1] = 0x80 | ((kMaxTwoBytePictureId >> 8) & 0x7F); + packet[2] = kMaxTwoBytePictureId & 0xFF; + + expected_.picture_id = kMaxTwoBytePictureId; + expected_.max_picture_id = kMaxTwoBytePictureId; + ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet)); +} + +TEST_F(RtpDepacketizerVp9Test, ParseLayerInfoWithNonFlexibleMode) { + const uint8_t kHeaderLength = 3; + const uint8_t kGofIdx = 7; + const uint8_t kSpatialIdx = 1; + const uint8_t kDbit = 1; + const uint8_t kTl0PicIdx = 17; + uint8_t packet[13] = {0}; + packet[0] = 0x20; // I:0 P:0 L:1 F:0 B:0 E:0 V:0 R:0 + packet[1] = (kGofIdx << 4) | (kSpatialIdx << 1) | kDbit; // GOF_IDX:7 S:1 D:1 + packet[2] = kTl0PicIdx; // TL0PICIDX:17 + + expected_.gof_idx = kGofIdx; + expected_.spatial_idx = kSpatialIdx; + expected_.inter_layer_predicted = kDbit ? true : false; + expected_.tl0_pic_idx = kTl0PicIdx; + ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet)); +} + +TEST_F(RtpDepacketizerVp9Test, ParseLayerInfoWithFlexibleMode) { + const uint8_t kHeaderLength = 2; + const uint8_t kTemporalIdx = 2; + const uint8_t kUbit = 1; + const uint8_t kSpatialIdx = 0; + const uint8_t kDbit = 0; + uint8_t packet[13] = {0}; + packet[0] = 0x38; // I:0 P:0 L:1 F:1 B:1 E:0 V:0 R:0 + packet[1] = (kTemporalIdx << 5) | (kUbit << 4) | (kSpatialIdx << 1) | kDbit; + + // I:0 P:0 L:1 F:1 B:1 E:0 V:0 + // L: T:2 U:1 S:0 D:0 + expected_.beginning_of_frame = true; + expected_.flexible_mode = true; + expected_.temporal_idx = kTemporalIdx; + expected_.temporal_up_switch = kUbit ? true : false; + expected_.spatial_idx = kSpatialIdx; + expected_.inter_layer_predicted = kDbit ? true : false; + ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet)); +} + +TEST_F(RtpDepacketizerVp9Test, ParseRefIdx) { + const uint8_t kHeaderLength = 7; + const int16_t kPictureId = 17; + const int16_t kPdiff1 = 17; + const int16_t kPdiff2 = 18; + const int16_t kExtPdiff3 = 2171; + uint8_t packet[13] = {0}; + packet[0] = 0xD8; // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0 + packet[1] = 0x80 | ((kPictureId >> 8) & 0x7F); // Two byte pictureID. + packet[2] = kPictureId; + packet[3] = (kPdiff1 << 2) | (0 << 1) | 1; // P_DIFF X:0 N:1 + packet[4] = (kPdiff2 << 2) | (0 << 1) | 1; // P_DIFF X:0 N:1 + packet[5] = ((kExtPdiff3 >> 8) << 2) | (1 << 1) | 0; // P_DIFF X:1 N:0 + packet[6] = kExtPdiff3 & 0xff; // EXTENDED P_DIFF + + // I:1 P:1 L:0 F:1 B:1 E:0 V:0 + // I: PICTURE ID:17 + // I: + // P,F: P_DIFF:17 X:0 N:1 => refPictureId = 17 - 17 = 0 + // P,F: P_DIFF:18 X:0 N:1 => refPictureId = 0x7FFF + 1 + 17 - 18 = 0x7FFF + // P,F: P_DIFF:2171 X:1 N:0 => refPictureId = 0x7FFF + 1 + 17 - 2171 = 30614 + expected_.beginning_of_frame = true; + expected_.inter_pic_predicted = true; + expected_.flexible_mode = true; + expected_.picture_id = kPictureId; + expected_.num_ref_pics = 3; + expected_.pid_diff[0] = kPdiff1; + expected_.pid_diff[1] = kPdiff2; + expected_.pid_diff[2] = kExtPdiff3; + expected_.ref_picture_id[0] = 0; + expected_.ref_picture_id[1] = 0x7FFF; + expected_.ref_picture_id[2] = 30614; + ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet)); +} + +TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithNoPictureId) { + const int16_t kPdiff = 3; + uint8_t packet[13] = {0}; + packet[0] = 0x58; // I:0 P:1 L:0 F:1 B:1 E:0 V:0 R:0 + packet[1] = (kPdiff << 2) | (0 << 1) | 0; // P,F: P_DIFF:3 X:0 N:0 + + RtpDepacketizer::ParsedPayload parsed; + EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet))); +} + +TEST_F(RtpDepacketizerVp9Test, ParseRefIdxFailsWithTooManyRefPics) { + const int16_t kPdiff = 3; + uint8_t packet[13] = {0}; + packet[0] = 0xD8; // I:1 P:1 L:0 F:1 B:1 E:0 V:0 R:0 + packet[1] = kMaxOneBytePictureId; // I: PICTURE ID:127 + packet[2] = (kPdiff << 2) | (0 << 1) | 1; // P,F: P_DIFF:3 X:0 N:1 + packet[3] = (kPdiff << 2) | (0 << 1) | 1; // P,F: P_DIFF:3 X:0 N:1 + packet[4] = (kPdiff << 2) | (0 << 1) | 1; // P,F: P_DIFF:3 X:0 N:1 + packet[5] = (kPdiff << 2) | (0 << 1) | 0; // P,F: P_DIFF:3 X:0 N:0 + + RtpDepacketizer::ParsedPayload parsed; + EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet))); +} + +TEST_F(RtpDepacketizerVp9Test, ParseSsData) { + const uint8_t kHeaderLength = 5; + const uint8_t kYbit = 0; + const size_t kNs = 2; + const size_t kNg = 2; + uint8_t packet[23] = {0}; + packet[0] = 0x0A; // I:0 P:0 L:0 F:0 B:1 E:0 V:1 R:0 + packet[1] = ((kNs - 1) << 5) | (kYbit << 4) | (kNg - 1); // N_S Y N_G + packet[2] = (0 << 5) | (1 << 4) | (0 << 2) | 0; // T:0 U:1 R:0 - + packet[3] = (2 << 5) | (0 << 4) | (1 << 2) | 0; // T:2 U:0 R:1 - + packet[4] = 33; + + expected_.beginning_of_frame = true; + expected_.ss_data_available = true; + expected_.num_spatial_layers = kNs; + expected_.spatial_layer_resolution_present = kYbit ? true : false; + expected_.gof.num_frames_in_gof = kNg; + expected_.gof.temporal_idx[0] = 0; + expected_.gof.temporal_idx[1] = 2; + expected_.gof.temporal_up_switch[0] = true; + expected_.gof.temporal_up_switch[1] = false; + expected_.gof.num_ref_pics[0] = 0; + expected_.gof.num_ref_pics[1] = 1; + expected_.gof.pid_diff[1][0] = 33; + ParseAndCheckPacket(packet, expected_, kHeaderLength, sizeof(packet)); +} + +TEST_F(RtpDepacketizerVp9Test, ParseFirstPacketInKeyFrame) { + uint8_t packet[2] = {0}; + packet[0] = 0x08; // I:0 P:0 L:0 F:0 B:1 E:0 V:0 R:0 + + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet))); + EXPECT_EQ(kVideoFrameKey, parsed.frame_type); + EXPECT_TRUE(parsed.type.Video.isFirstPacket); +} + +TEST_F(RtpDepacketizerVp9Test, ParseLastPacketInDeltaFrame) { + uint8_t packet[2] = {0}; + packet[0] = 0x44; // I:0 P:1 L:0 F:0 B:0 E:1 V:0 R:0 + + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet))); + EXPECT_EQ(kVideoFrameDelta, parsed.frame_type); + EXPECT_FALSE(parsed.type.Video.isFirstPacket); +} + +TEST_F(RtpDepacketizerVp9Test, ParseResolution) { + const uint16_t kWidth[2] = {640, 1280}; + const uint16_t kHeight[2] = {360, 720}; + uint8_t packet[20] = {0}; + packet[0] = 0x0A; // I:0 P:0 L:0 F:0 B:1 E:0 V:1 R:0 + packet[1] = (1 << 5) | (1 << 4) | 0; // N_S:1 Y:1 N_G:0 + packet[2] = kWidth[0] >> 8; + packet[3] = kWidth[0] & 0xFF; + packet[4] = kHeight[0] >> 8; + packet[5] = kHeight[0] & 0xFF; + packet[6] = kWidth[1] >> 8; + packet[7] = kWidth[1] & 0xFF; + packet[8] = kHeight[1] >> 8; + packet[9] = kHeight[1] & 0xFF; + packet[10] = 0; // T:0 U:0 R:0 - + + RtpDepacketizer::ParsedPayload parsed; + ASSERT_TRUE(depacketizer_->Parse(&parsed, packet, sizeof(packet))); + EXPECT_EQ(kWidth[0], parsed.type.Video.width); + EXPECT_EQ(kHeight[0], parsed.type.Video.height); +} + +TEST_F(RtpDepacketizerVp9Test, ParseFailsForNoPayloadLength) { + uint8_t packet[1] = {0}; + RtpDepacketizer::ParsedPayload parsed; + EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, 0)); +} + +TEST_F(RtpDepacketizerVp9Test, ParseFailsForTooShortBufferToFitPayload) { + const uint8_t kHeaderLength = 1; + uint8_t packet[kHeaderLength] = {0}; + RtpDepacketizer::ParsedPayload parsed; + EXPECT_FALSE(depacketizer_->Parse(&parsed, packet, sizeof(packet))); +} + +} // namespace webrtc diff --git a/webrtc/modules/video_coding/main/source/packet.cc b/webrtc/modules/video_coding/main/source/packet.cc index c9eb482ed0..88838f35f8 100644 --- a/webrtc/modules/video_coding/main/source/packet.cc +++ b/webrtc/modules/video_coding/main/source/packet.cc @@ -118,6 +118,18 @@ void VCMPacket::CopyCodecSpecifics(const RTPVideoHeader& videoHeader) { codec = kVideoCodecVP8; return; + case kRtpVideoVp9: + if (isFirstPacket && markerBit) + completeNALU = kNaluComplete; + else if (isFirstPacket) + completeNALU = kNaluStart; + else if (markerBit) + completeNALU = kNaluEnd; + else + completeNALU = kNaluIncomplete; + + codec = kVideoCodecVP9; + return; case kRtpVideoH264: isFirstPacket = videoHeader.isFirstPacket; if (isFirstPacket) |