v4l2_codec2: Expand NALParser to support parsing color aspects.

This CL expands the simple NALParser to support parsing color aspects from the SPS NAL unit. The V4L2 decode component is adapted to use this new NALParser rather than the Chrome parser. This allows us to remove the Chrome H.264 parser and its associated dependencies in a subsequent CL, removing a lot of code. Bug: 155138142 Test: arc.VideoDecodeAccel.* and arc.VideoDecodeAccelPerf.* on hatch Change-Id: Icbfa63478980ab2b2d1bf2d46697359a0ac92937
author: David Staessens <dstaessens@google.com> 2021-03-18 11:19:17 +0900
committer: Chih-Yu Huang <akahuang@google.com> 2021-05-12 11:36:43 +0900
commit: fdbe4007aaf88a23f8b790efebbb345308f30738 (patch)
tree: 09efecdb86c131a4e04bdc1d06f2afd1136aeb08
parent: a95d4ed18782db611a9c3c8b02c9adfbbb33a7aa (diff)
download: v4l2_codec2-fdbe4007aaf88a23f8b790efebbb345308f30738.tar.gz
5 files changed, 212 insertions, 35 deletions
diff --git a/common/Android.bp b/common/Android.bp
index af26b49..9c795ab 100644
--- a/common/Android.bp
+++ b/common/Android.bp
@@ -31,6 +31,7 @@ cc_library {
         "libchrome",
         "libcutils",
         "liblog",
+        "libstagefright_foundation",
         "libui",
         "libutils",
         "libv4l2_codec2_accel"
diff --git a/common/EncodeHelpers.cpp b/common/EncodeHelpers.cpp
index 31646eb..2ab0e71 100644
--- a/common/EncodeHelpers.cpp
+++ b/common/EncodeHelpers.cpp
@@ -124,9 +124,6 @@ android_ycbcr getGraphicBlockInfo(const C2ConstGraphicBlock& block) {
 
 void extractCSDInfo(std::unique_ptr<C2StreamInitDataInfo::output>* const csd, const uint8_t* data,
                     size_t length) {
-    constexpr uint8_t kTypeSeqParamSet = 7;
-    constexpr uint8_t kTypePicParamSet = 8;
-
     // Android frameworks needs 4 bytes start code.
     constexpr uint8_t kStartCode[] = {0x00, 0x00, 0x00, 0x01};
     constexpr int kStartCodeLength = 4;
@@ -142,9 +139,9 @@ void extractCSDInfo(std::unique_ptr<C2StreamInitDataInfo::output>* const csd, co
     NalParser parser(data, length);
     while (parser.locateNextNal()) {
         if (parser.length() == 0) continue;
-        uint8_t nalType = *parser.data() & 0x1f;
+        uint8_t nalType = parser.type();
         ALOGV("find next NAL: type=%d, length=%zu", nalType, parser.length());
-        if (nalType != kTypeSeqParamSet && nalType != kTypePicParamSet) continue;
+        if (nalType != NalParser::kSPSType && nalType != NalParser::kPPSType) continue;
 
         if (tmpOutput + kStartCodeLength + parser.length() > tmpConfigDataEnd) {
             ALOGE("Buffer overflow on extracting codec config data (length=%zu)", length);
diff --git a/common/NalParser.cpp b/common/NalParser.cpp
index 4682775..3216574 100644
--- a/common/NalParser.cpp
+++ b/common/NalParser.cpp
@@ -2,12 +2,85 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
+//#define LOG_NDEBUG 0
+#define LOG_TAG "NalParser"
+
 #include <v4l2_codec2/common/NalParser.h>
 
 #include <algorithm>
 
+#include <media/stagefright/foundation/ABitReader.h>
+#include <utils/Log.h>
+
 namespace android {
 
+namespace {
+
+enum H264ProfileIDC {
+    kProfileIDCAVLC444 = 44,
+    kProfileIDScalableBaseline = 83,
+    kProfileIDScalableHigh = 86,
+    kProfileIDCHigh = 100,
+    kProfileIDHigh10 = 110,
+    kProfileIDSMultiviewHigh = 118,
+    kProfileIDHigh422 = 122,
+    kProfileIDStereoHigh = 128,
+    kProfileIDHigh444Predictive = 244,
+};
+
+constexpr uint32_t kYUV444Idc = 3;
+
+// Read unsigned int encoded with exponential-golomb.
+uint32_t parseUE(ABitReader* br) {
+    uint32_t numZeroes = 0;
+    while (br->getBits(1) == 0) {
+        ++numZeroes;
+    }
+    uint32_t val = br->getBits(numZeroes);
+    return val + (1u << numZeroes) - 1;
+}
+
+// Read signed int encoded with exponential-golomb.
+int32_t parseSE(ABitReader* br) {
+    uint32_t codeNum = parseUE(br);
+    return (codeNum & 1) ? (codeNum + 1) >> 1 : -static_cast<int32_t>(codeNum >> 1);
+}
+
+// Skip a H.264 sequence scaling list in the specified bitstream.
+void skipScalingList(ABitReader* br, size_t scalingListSize) {
+    size_t nextScale = 8;
+    size_t lastScale = 8;
+    for (size_t j = 0; j < scalingListSize; ++j) {
+        if (nextScale != 0) {
+            int32_t deltaScale = parseSE(br);  // delta_sl
+            if (deltaScale < -128) {
+                ALOGW("delta scale (%d) is below range, capping to -128", deltaScale);
+                deltaScale = -128;
+            } else if (deltaScale > 127) {
+                ALOGW("delta scale (%d) is above range, capping to 127", deltaScale);
+                deltaScale = 127;
+            }
+            nextScale = (lastScale + (deltaScale + 256)) % 256;
+        }
+        lastScale = (nextScale == 0) ? lastScale : nextScale;
+    }
+}
+
+// Skip the H.264 sequence scaling matrix in the specified bitstream.
+void skipScalingMatrix(ABitReader* br, size_t numScalingLists) {
+    for (size_t i = 0; i < numScalingLists; ++i) {
+        if (br->getBits(1)) {  // seq_scaling_list_present_flag
+            if (i < 6) {
+                skipScalingList(br, 16);
+            } else {
+                skipScalingList(br, 64);
+            }
+        }
+    }
+}
+
+}  // namespace
+
 NalParser::NalParser(const uint8_t* data, size_t length)
       : mCurrNalDataPos(data), mDataEnd(data + length) {
     mNextNalStartCodePos = findNextStartCodePos();
@@ -20,6 +93,16 @@ bool NalParser::locateNextNal() {
     return true;
 }
 
+bool NalParser::locateSPS() {
+    while (locateNextNal()) {
+        if (length() == 0) continue;
+        if (type() != kSPSType) continue;
+        return true;
+    }
+
+    return false;
+}
+
 const uint8_t* NalParser::data() const {
     return mCurrNalDataPos;
 }
@@ -31,9 +114,104 @@ size_t NalParser::length() const {
     return *(mNextNalStartCodePos - 1) == 0x00 ? length - 1 : length;
 }
 
+uint8_t NalParser::type() const {
+    // First byte is forbidden_zero_bit (1) + nal_ref_idc (2) + nal_unit_type (5)
+    constexpr uint8_t kNALTypeMask = 0x1f;
+    return *mCurrNalDataPos & kNALTypeMask;
+}
+
 const uint8_t* NalParser::findNextStartCodePos() const {
     return std::search(mCurrNalDataPos, mDataEnd, kNalStartCode,
                        kNalStartCode + kNalStartCodeLength);
 }
 
+bool NalParser::findCodedColorAspects(ColorAspects* colorAspects) {
+    ALOG_ASSERT(colorAspects);
+    ALOG_ASSERT(type() == kSPSType);
+
+    // Unfortunately we can't directly jump to the Video Usability Information (VUI) parameters that
+    // contain the color aspects. We need to parse the entire SPS header up until the values we
+    // need.
+
+    // Skip first byte containing type.
+    ABitReader br(mCurrNalDataPos + 1, length() - 1);
+
+    uint32_t profileIDC = br.getBits(8);  // profile_idc
+    br.skipBits(16);                      // constraint flags + reserved bits + level_idc
+    parseUE(&br);                         // seq_parameter_set_id
+
+    if (profileIDC == kProfileIDCHigh || profileIDC == kProfileIDHigh10 ||
+        profileIDC == kProfileIDHigh422 || profileIDC == kProfileIDHigh444Predictive ||
+        profileIDC == kProfileIDCAVLC444 || profileIDC == kProfileIDScalableBaseline ||
+        profileIDC == kProfileIDScalableHigh || profileIDC == kProfileIDSMultiviewHigh ||
+        profileIDC == kProfileIDStereoHigh) {
+        uint32_t chromaFormatIDC = parseUE(&br);
+        if (chromaFormatIDC == kYUV444Idc) {  // chroma_format_idc
+            br.skipBits(1);                   // separate_colour_plane_flag
+        }
+        parseUE(&br);    // bit_depth_luma_minus8
+        parseUE(&br);    // bit_depth_chroma_minus8
+        br.skipBits(1);  // lossless_qpprime_y_zero_flag
+
+        if (br.getBits(1)) {  // seq_scaling_matrix_present_flag
+            const size_t numScalingLists = (chromaFormatIDC != kYUV444Idc) ? 8 : 12;
+            skipScalingMatrix(&br, numScalingLists);
+        }
+    }
+
+    parseUE(&br);                                   // log2_max_frame_num_minus4
+    uint32_t pictureOrderCountType = parseUE(&br);  // pic_order_cnt_type
+    if (pictureOrderCountType == 0) {
+        parseUE(&br);  // log2_max_pic_order_cnt_lsb_minus4
+    } else if (pictureOrderCountType == 1) {
+        br.skipBits(1);                              // delta_pic_order_always_zero_flag
+        parseSE(&br);                                // offset_for_non_ref_pic
+        parseSE(&br);                                // offset_for_top_to_bottom_field
+        uint32_t numReferenceFrames = parseUE(&br);  // num_ref_frames_in_pic_order_cnt_cycle
+        for (uint32_t i = 0; i < numReferenceFrames; ++i) {
+            parseUE(&br);  // offset_for_ref_frame
+        }
+    }
+
+    parseUE(&br);          // num_ref_frames
+    br.skipBits(1);        // gaps_in_frame_num_value_allowed_flag
+    parseUE(&br);          // pic_width_in_mbs_minus1
+    parseUE(&br);          // pic_height_in_map_units_minus1
+    if (!br.getBits(1)) {  // frame_mbs_only_flag
+        br.skipBits(1);    // mb_adaptive_frame_field_flag
+    }
+    br.skipBits(1);  // direct_8x8_inference_flag
+
+    if (br.getBits(1)) {  // frame_cropping_flag
+        parseUE(&br);     // frame_cropping_rect_left_offset
+        parseUE(&br);     // frame_cropping_rect_right_offset
+        parseUE(&br);     // frame_cropping_rect_top_offset
+        parseUE(&br);     // frame_cropping_rect_bottom_offset
+    }
+
+    if (br.getBits(1)) {                 // vui_parameters_present_flag
+        if (br.getBits(1)) {             // VUI aspect_ratio_info_present_flag
+            if (br.getBits(8) == 255) {  // VUI aspect_ratio_idc == extended sample aspect ratio
+                br.skipBits(32);         // VUI sar_width + sar_height
+            }
+        }
+
+        if (br.getBits(1)) {  // VUI overscan_info_present_flag
+            br.skipBits(1);   // VUI overscan_appropriate_flag
+        }
+        if (br.getBits(1)) {                              // VUI video_signal_type_present_flag
+            br.skipBits(3);                               // VUI video_format
+            colorAspects->fullRange = br.getBits(1);      // VUI video_full_range_flag
+            if (br.getBits(1)) {                          // VUI color_description_present_flag
+                colorAspects->primaries = br.getBits(8);  // VUI colour_primaries
+                colorAspects->transfer = br.getBits(8);   // VUI transfer_characteristics
+                colorAspects->coeffs = br.getBits(8);     // VUI matrix_coefficients
+                return !br.overRead();
+            }
+        }
+    }
+
+    return false;
+}
+
 }  // namespace android
diff --git a/common/include/v4l2_codec2/common/NalParser.h b/common/include/v4l2_codec2/common/NalParser.h
index ba323ee..69f56c3 100644
--- a/common/include/v4l2_codec2/common/NalParser.h
+++ b/common/include/v4l2_codec2/common/NalParser.h
@@ -12,6 +12,19 @@ namespace android {
 // Helper class to parse H264 NAL units from data.
 class NalParser {
 public:
+    // Type of a SPS NAL unit.
+    static constexpr uint8_t kSPSType = 7;
+    // Type of a PPS NAL unit.
+    static constexpr uint8_t kPPSType = 8;
+
+    // Parameters related to a video's color aspects.
+    struct ColorAspects {
+        int32_t primaries;
+        int32_t transfer;
+        int32_t coeffs;
+        bool fullRange;
+    };
+
     NalParser(const uint8_t* data, size_t length);
 
     // Locates the next NAL after |mNextNalStartCodePos|. If there is one, updates |mCurrNalDataPos|
@@ -22,12 +35,21 @@ public:
     // Note: This method must be called prior to data() and length().
     bool locateNextNal();
 
+    // Locate the sequence parameter set (SPS).
+    bool locateSPS();
+
     // Gets current NAL data (start code is not included).
     const uint8_t* data() const;
 
     // Gets the byte length of current NAL data (start code is not included).
     size_t length() const;
 
+    // Get the type of the current NAL unit.
+    uint8_t type() const;
+
+    // Find the H.264 video's color aspects in the current SPS NAL.
+    bool findCodedColorAspects(ColorAspects* colorAspects);
+
 private:
     const uint8_t* findNextStartCodePos() const;
 
diff --git a/components/V4L2DecodeComponent.cpp b/components/V4L2DecodeComponent.cpp
index 97cfc36..c48878b 100644
--- a/components/V4L2DecodeComponent.cpp
+++ b/components/V4L2DecodeComponent.cpp
@@ -24,7 +24,7 @@
 #include <log/log.h>
 #include <media/stagefright/foundation/ColorUtils.h>
 
-#include <h264_parser.h>
+#include <v4l2_codec2/common/NalParser.h>
 #include <v4l2_codec2/common/VideoTypes.h>
 #include <v4l2_codec2/components/BitstreamBuffer.h>
 #include <v4l2_codec2/components/V4L2Decoder.h>
@@ -42,44 +42,23 @@ int32_t frameIndexToBitstreamId(c2_cntr64_t frameIndex) {
 bool parseCodedColorAspects(const C2ConstLinearBlock& input,
                             C2StreamColorAspectsInfo::input* codedAspects) {
     C2ReadView view = input.map().get();
-    const uint8_t* data = view.data();
-    const uint32_t size = view.capacity();
-
-    std::unique_ptr<media::H264Parser> h264Parser = std::make_unique<media::H264Parser>();
-    h264Parser->SetStream(data, static_cast<off_t>(size));
-    media::H264NALU nalu;
-    media::H264Parser::Result parRes = h264Parser->AdvanceToNextNALU(&nalu);
-    if (parRes != media::H264Parser::kEOStream && parRes != media::H264Parser::kOk) {
-        ALOGE("H264 AdvanceToNextNALU error: %d", static_cast<int>(parRes));
-        return false;
-    }
-    if (nalu.nal_unit_type != media::H264NALU::kSPS) {
-        ALOGV("NALU is not SPS");
-        return false;
-    }
+    NalParser parser(view.data(), view.capacity());
 
-    int spsId;
-    parRes = h264Parser->ParseSPS(&spsId);
-    if (parRes != media::H264Parser::kEOStream && parRes != media::H264Parser::kOk) {
-        ALOGE("H264 ParseSPS error: %d", static_cast<int>(parRes));
+    if (!parser.locateSPS()) {
+        ALOGV("Couldn't find SPS");
         return false;
     }
 
-    // Parse ISO color aspects from H264 SPS bitstream.
-    const media::H264SPS* sps = h264Parser->GetSPS(spsId);
-    if (!sps->colour_description_present_flag) {
-        ALOGV("No Color Description in SPS");
+    NalParser::ColorAspects aspects;
+    if (!parser.findCodedColorAspects(&aspects)) {
+        ALOGV("Couldn't find color description in SPS");
         return false;
     }
-    int32_t primaries = sps->colour_primaries;
-    int32_t transfer = sps->transfer_characteristics;
-    int32_t coeffs = sps->matrix_coefficients;
-    bool fullRange = sps->video_full_range_flag;
 
     // Convert ISO color aspects to ColorUtils::ColorAspects.
     ColorAspects colorAspects;
-    ColorUtils::convertIsoColorAspectsToCodecAspects(primaries, transfer, coeffs, fullRange,
-                                                     colorAspects);
+    ColorUtils::convertIsoColorAspectsToCodecAspects(
+            aspects.primaries, aspects.transfer, aspects.coeffs, aspects.fullRange, colorAspects);
     ALOGV("Parsed ColorAspects from bitstream: (R:%d, P:%d, M:%d, T:%d)", colorAspects.mRange,
           colorAspects.mPrimaries, colorAspects.mMatrixCoeffs, colorAspects.mTransfer);
author	David Staessens <dstaessens@google.com>	2021-03-18 11:19:17 +0900
committer	Chih-Yu Huang <akahuang@google.com>	2021-05-12 11:36:43 +0900
commit	fdbe4007aaf88a23f8b790efebbb345308f30738 (patch)
tree	09efecdb86c131a4e04bdc1d06f2afd1136aeb08
parent	a95d4ed18782db611a9c3c8b02c9adfbbb33a7aa (diff)
download	v4l2_codec2-fdbe4007aaf88a23f8b790efebbb345308f30738.tar.gz