diff options
author | minyue@webrtc.org <minyue@webrtc.org> | 2015-03-16 12:30:37 +0000 |
---|---|---|
committer | minyue@webrtc.org <minyue@webrtc.org> | 2015-03-16 12:31:19 +0000 |
commit | 7f7d7e3427cc70e1b8b050283ef031e28c83699a (patch) | |
tree | 5d88ef3852d2358317b27ba8c0aac4faaeb8b877 | |
parent | 4b89aa03bb9c817cf2274f2035d613a70c5298eb (diff) | |
download | webrtc-7f7d7e3427cc70e1b8b050283ef031e28c83699a.tar.gz |
Prevent crash in NetEQ when decoder overflow.
NetEQ can crash when decoder gives too many output samples than it can handle. A practical case this happens is when multiple opus packets are combined.
The best solution is to pass the max size to the ACM decode function and let it return a failure if the max size if too small.
BUG=4361
R=henrik.lundin@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/45619004
Cr-Commit-Position: refs/heads/master@{#8730}
git-svn-id: http://webrtc.googlecode.com/svn/trunk@8730 4adac7df-926f-26a2-2b94-8c16560cd09d
19 files changed, 452 insertions, 187 deletions
diff --git a/webrtc/modules/audio_coding/codecs/audio_decoder.cc b/webrtc/modules/audio_coding/codecs/audio_decoder.cc index 08178735c7..1ab2a7fec1 100644 --- a/webrtc/modules/audio_coding/codecs/audio_decoder.cc +++ b/webrtc/modules/audio_coding/codecs/audio_decoder.cc @@ -16,12 +16,40 @@ namespace webrtc { -int AudioDecoder::DecodeRedundant(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) { - return Decode(encoded, encoded_len, sample_rate_hz, decoded, speech_type); +int AudioDecoder::Decode(const uint8_t* encoded, size_t encoded_len, + int sample_rate_hz, size_t max_decoded_bytes, + int16_t* decoded, SpeechType* speech_type) { + int duration = PacketDuration(encoded, encoded_len); + if (duration >= 0 && duration * sizeof(int16_t) > max_decoded_bytes) { + return -1; + } + return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded, + speech_type); +} + +int AudioDecoder::DecodeRedundant(const uint8_t* encoded, size_t encoded_len, + int sample_rate_hz, size_t max_decoded_bytes, + int16_t* decoded, SpeechType* speech_type) { + int duration = PacketDurationRedundant(encoded, encoded_len); + if (duration >= 0 && duration * sizeof(int16_t) > max_decoded_bytes) { + return -1; + } + return DecodeRedundantInternal(encoded, encoded_len, sample_rate_hz, decoded, + speech_type); +} + +int AudioDecoder::DecodeInternal(const uint8_t* encoded, size_t encoded_len, + int sample_rate_hz, int16_t* decoded, + SpeechType* speech_type) { + return kNotImplemented; +} + +int AudioDecoder::DecodeRedundantInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, int16_t* decoded, + SpeechType* speech_type) { + return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded, + speech_type); } bool AudioDecoder::HasDecodePlc() const { return false; } diff --git a/webrtc/modules/audio_coding/codecs/audio_decoder.h b/webrtc/modules/audio_coding/codecs/audio_decoder.h index 8c83e61e5b..22e44a4483 100644 --- a/webrtc/modules/audio_coding/codecs/audio_decoder.h +++ b/webrtc/modules/audio_coding/codecs/audio_decoder.h @@ -35,22 +35,25 @@ class AudioDecoder { virtual ~AudioDecoder() {} // Decodes |encode_len| bytes from |encoded| and writes the result in - // |decoded|. The number of samples from all channels produced is in - // the return value. If the decoder produced comfort noise, |speech_type| + // |decoded|. The maximum bytes allowed to be written into |decoded| is + // |max_decoded_bytes|. The number of samples from all channels produced is + // in the return value. If the decoder produced comfort noise, |speech_type| // is set to kComfortNoise, otherwise it is kSpeech. The desired output // sample rate is provided in |sample_rate_hz|, which must be valid for the // codec at hand. virtual int Decode(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, + size_t max_decoded_bytes, int16_t* decoded, - SpeechType* speech_type) = 0; + SpeechType* speech_type); // Same as Decode(), but interfaces to the decoders redundant decode function. // The default implementation simply calls the regular Decode() method. virtual int DecodeRedundant(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, + size_t max_decoded_bytes, int16_t* decoded, SpeechType* speech_type); @@ -99,6 +102,18 @@ class AudioDecoder { protected: static SpeechType ConvertSpeechType(int16_t type); + virtual int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type); + + virtual int DecodeRedundantInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type); + size_t channels_; private: diff --git a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h index acd8c2f8b2..6b197bc2d0 100644 --- a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h +++ b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t.h @@ -72,11 +72,6 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder { int Max10MsFramesInAPacket() const override; // AudioDecoder methods. - int Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) override; bool HasDecodePlc() const override; int DecodePlc(int num_frames, int16_t* decoded) override; int Init() override; @@ -95,6 +90,13 @@ class AudioEncoderDecoderIsacT : public AudioEncoder, public AudioDecoder { uint8_t* encoded, EncodedInfo* info) override; + // AudioDecoder protected method. + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + private: // This value is taken from STREAM_SIZE_MAX_60 for iSAC float (60 ms) and // STREAM_MAXW16_60MS for iSAC fix (60 ms). diff --git a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h index a1ffa809ba..87d71ab384 100644 --- a/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h +++ b/webrtc/modules/audio_coding/codecs/isac/audio_encoder_isac_t_impl.h @@ -218,11 +218,11 @@ void AudioEncoderDecoderIsacT<T>::EncodeInternal(uint32_t rtp_timestamp, } template <typename T> -int AudioEncoderDecoderIsacT<T>::Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) { +int AudioEncoderDecoderIsacT<T>::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { CriticalSectionScoped cs(state_lock_.get()); CHECK(sample_rate_hz == 16000 || sample_rate_hz == 32000) << "Unsupported sample rate " << sample_rate_hz; @@ -232,8 +232,8 @@ int AudioEncoderDecoderIsacT<T>::Decode(const uint8_t* encoded, } int16_t temp_type = 1; // Default is speech. int16_t ret = - T::Decode(isac_state_, encoded, static_cast<int16_t>(encoded_len), - decoded, &temp_type); + T::DecodeInternal(isac_state_, encoded, static_cast<int16_t>(encoded_len), + decoded, &temp_type); *speech_type = ConvertSpeechType(temp_type); return ret; } diff --git a/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h b/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h index b34f768f33..d12c1678b8 100644 --- a/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h +++ b/webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h @@ -36,11 +36,11 @@ struct IsacFix { static inline int16_t Create(instance_type** inst) { return WebRtcIsacfix_Create(inst); } - static inline int16_t Decode(instance_type* inst, - const uint8_t* encoded, - int16_t len, - int16_t* decoded, - int16_t* speech_type) { + static inline int16_t DecodeInternal(instance_type* inst, + const uint8_t* encoded, + int16_t len, + int16_t* decoded, + int16_t* speech_type) { return WebRtcIsacfix_Decode(inst, encoded, len, decoded, speech_type); } static inline int16_t DecodePlc(instance_type* inst, diff --git a/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h b/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h index 7b9ec9c71c..7d8ac7951b 100644 --- a/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h +++ b/webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h @@ -35,11 +35,11 @@ struct IsacFloat { static inline int16_t Create(instance_type** inst) { return WebRtcIsac_Create(inst); } - static inline int16_t Decode(instance_type* inst, - const uint8_t* encoded, - int16_t len, - int16_t* decoded, - int16_t* speech_type) { + static inline int16_t DecodeInternal(instance_type* inst, + const uint8_t* encoded, + int16_t len, + int16_t* decoded, + int16_t* speech_type) { return WebRtcIsac_Decode(inst, encoded, len, decoded, speech_type); } static inline int16_t DecodePlc(instance_type* inst, diff --git a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc index e93cd7c511..00c88a5e7c 100644 --- a/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc +++ b/webrtc/modules/audio_coding/codecs/opus/opus_unittest.cc @@ -554,6 +554,54 @@ TEST_P(OpusTest, OpusDurationEstimation) { EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); } +TEST_P(OpusTest, OpusDecodeRepacketized) { + const int kPackets = 6; + + PrepareSpeechData(channels_, 20, 20 * kPackets); + + // Create encoder memory. + ASSERT_EQ(0, WebRtcOpus_EncoderCreate(&opus_encoder_, + channels_, + application_)); + ASSERT_EQ(0, WebRtcOpus_DecoderCreate(&opus_decoder_, + channels_)); + + // Set bitrate. + EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, + channels_ == 1 ? 32000 : 64000)); + + // Check number of channels for decoder. + EXPECT_EQ(channels_, WebRtcOpus_DecoderChannels(opus_decoder_)); + + // Encode & decode. + int16_t audio_type; + rtc::scoped_ptr<int16_t[]> output_data_decode( + new int16_t[kPackets * kOpus20msFrameSamples * channels_]); + OpusRepacketizer* rp = opus_repacketizer_create(); + + for (int idx = 0; idx < kPackets; idx++) { + encoded_bytes_ = WebRtcOpus_Encode(opus_encoder_, + speech_data_.GetNextBlock(), + kOpus20msFrameSamples, kMaxBytes, + bitstream_); + EXPECT_EQ(OPUS_OK, opus_repacketizer_cat(rp, bitstream_, encoded_bytes_)); + } + + encoded_bytes_ = opus_repacketizer_out(rp, bitstream_, kMaxBytes); + + EXPECT_EQ(kOpus20msFrameSamples * kPackets, + WebRtcOpus_DurationEst(opus_decoder_, bitstream_, encoded_bytes_)); + + EXPECT_EQ(kOpus20msFrameSamples * kPackets, + WebRtcOpus_Decode(opus_decoder_, bitstream_, encoded_bytes_, + output_data_decode.get(), &audio_type)); + + // Free memory. + opus_repacketizer_destroy(rp); + EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + EXPECT_EQ(0, WebRtcOpus_DecoderFree(opus_decoder_)); +} + INSTANTIATE_TEST_CASE_P(VariousMode, OpusTest, Combine(Values(1, 2), Values(0, 1))); diff --git a/webrtc/modules/audio_coding/main/acm2/acm_generic_codec.cc b/webrtc/modules/audio_coding/main/acm2/acm_generic_codec.cc index 32569452b8..0f35fe10f8 100644 --- a/webrtc/modules/audio_coding/main/acm2/acm_generic_codec.cc +++ b/webrtc/modules/audio_coding/main/acm2/acm_generic_codec.cc @@ -140,21 +140,23 @@ bool AudioDecoderProxy::IsSet() const { int AudioDecoderProxy::Decode(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, + size_t max_decoded_bytes, int16_t* decoded, SpeechType* speech_type) { CriticalSectionScoped decoder_lock(decoder_lock_.get()); - return decoder_->Decode(encoded, encoded_len, sample_rate_hz, decoded, - speech_type); + return decoder_->Decode(encoded, encoded_len, sample_rate_hz, + max_decoded_bytes, decoded, speech_type); } int AudioDecoderProxy::DecodeRedundant(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, + size_t max_decoded_bytes, int16_t* decoded, SpeechType* speech_type) { CriticalSectionScoped decoder_lock(decoder_lock_.get()); return decoder_->DecodeRedundant(encoded, encoded_len, sample_rate_hz, - decoded, speech_type); + max_decoded_bytes, decoded, speech_type); } bool AudioDecoderProxy::HasDecodePlc() const { diff --git a/webrtc/modules/audio_coding/main/acm2/acm_generic_codec.h b/webrtc/modules/audio_coding/main/acm2/acm_generic_codec.h index 9ae99bf868..8849647c94 100644 --- a/webrtc/modules/audio_coding/main/acm2/acm_generic_codec.h +++ b/webrtc/modules/audio_coding/main/acm2/acm_generic_codec.h @@ -49,11 +49,13 @@ class AudioDecoderProxy final : public AudioDecoder { int Decode(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, + size_t max_decoded_bytes, int16_t* decoded, SpeechType* speech_type) override; int DecodeRedundant(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, + size_t max_decoded_bytes, int16_t* decoded, SpeechType* speech_type) override; bool HasDecodePlc() const override; diff --git a/webrtc/modules/audio_coding/neteq/audio_decoder_impl.cc b/webrtc/modules/audio_coding/neteq/audio_decoder_impl.cc index f77dead9df..8ffb761d09 100644 --- a/webrtc/modules/audio_coding/neteq/audio_decoder_impl.cc +++ b/webrtc/modules/audio_coding/neteq/audio_decoder_impl.cc @@ -38,11 +38,11 @@ namespace webrtc { // PCMu -int AudioDecoderPcmU::Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) { +int AudioDecoderPcmU::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { DCHECK_EQ(sample_rate_hz, 8000); int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcG711_DecodeU(encoded, static_cast<int16_t>(encoded_len), @@ -58,11 +58,11 @@ int AudioDecoderPcmU::PacketDuration(const uint8_t* encoded, } // PCMa -int AudioDecoderPcmA::Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) { +int AudioDecoderPcmA::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { DCHECK_EQ(sample_rate_hz, 8000); int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcG711_DecodeA(encoded, static_cast<int16_t>(encoded_len), @@ -81,11 +81,11 @@ int AudioDecoderPcmA::PacketDuration(const uint8_t* encoded, #ifdef WEBRTC_CODEC_PCM16 AudioDecoderPcm16B::AudioDecoderPcm16B() {} -int AudioDecoderPcm16B::Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) { +int AudioDecoderPcm16B::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { DCHECK(sample_rate_hz == 8000 || sample_rate_hz == 16000 || sample_rate_hz == 32000 || sample_rate_hz == 48000) << "Unsupported sample rate " << sample_rate_hz; @@ -117,11 +117,11 @@ AudioDecoderIlbc::~AudioDecoderIlbc() { WebRtcIlbcfix_DecoderFree(dec_state_); } -int AudioDecoderIlbc::Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) { +int AudioDecoderIlbc::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { DCHECK_EQ(sample_rate_hz, 8000); int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcIlbcfix_Decode(dec_state_, encoded, @@ -150,11 +150,11 @@ AudioDecoderG722::~AudioDecoderG722() { WebRtcG722_FreeDecoder(dec_state_); } -int AudioDecoderG722::Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) { +int AudioDecoderG722::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { DCHECK_EQ(sample_rate_hz, 16000); int16_t temp_type = 1; // Default is speech. int16_t ret = @@ -185,11 +185,11 @@ AudioDecoderG722Stereo::~AudioDecoderG722Stereo() { WebRtcG722_FreeDecoder(dec_state_right_); } -int AudioDecoderG722Stereo::Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) { +int AudioDecoderG722Stereo::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { DCHECK_EQ(sample_rate_hz, 16000); int16_t temp_type = 1; // Default is speech. // De-interleave the bit-stream into two separate payloads. @@ -270,11 +270,11 @@ AudioDecoderOpus::~AudioDecoderOpus() { WebRtcOpus_DecoderFree(dec_state_); } -int AudioDecoderOpus::Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) { +int AudioDecoderOpus::DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { DCHECK_EQ(sample_rate_hz, 48000); int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcOpus_Decode(dec_state_, encoded, @@ -286,16 +286,18 @@ int AudioDecoderOpus::Decode(const uint8_t* encoded, return ret; } -int AudioDecoderOpus::DecodeRedundant(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) { +int AudioDecoderOpus::DecodeRedundantInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) { if (!PacketHasFec(encoded, encoded_len)) { // This packet is a RED packet. - return Decode(encoded, encoded_len, sample_rate_hz, decoded, speech_type); + return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded, + speech_type); } + DCHECK_EQ(sample_rate_hz, 48000); int16_t temp_type = 1; // Default is speech. int16_t ret = WebRtcOpus_DecodeFec(dec_state_, encoded, static_cast<int16_t>(encoded_len), decoded, diff --git a/webrtc/modules/audio_coding/neteq/audio_decoder_impl.h b/webrtc/modules/audio_coding/neteq/audio_decoder_impl.h index 7e80a368de..2222b62775 100644 --- a/webrtc/modules/audio_coding/neteq/audio_decoder_impl.h +++ b/webrtc/modules/audio_coding/neteq/audio_decoder_impl.h @@ -37,13 +37,15 @@ namespace webrtc { class AudioDecoderPcmU : public AudioDecoder { public: AudioDecoderPcmU() {} - virtual int Decode(const uint8_t* encoded, + virtual int Init() { return 0; } + virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; + + protected: + int DecodeInternal(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, int16_t* decoded, - SpeechType* speech_type); - virtual int Init() { return 0; } - virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; + SpeechType* speech_type) override; private: DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmU); @@ -52,13 +54,15 @@ class AudioDecoderPcmU : public AudioDecoder { class AudioDecoderPcmA : public AudioDecoder { public: AudioDecoderPcmA() {} - virtual int Decode(const uint8_t* encoded, + virtual int Init() { return 0; } + virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; + + protected: + int DecodeInternal(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, int16_t* decoded, - SpeechType* speech_type); - virtual int Init() { return 0; } - virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; + SpeechType* speech_type) override; private: DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcmA); @@ -92,13 +96,15 @@ class AudioDecoderPcmAMultiCh : public AudioDecoderPcmA { class AudioDecoderPcm16B : public AudioDecoder { public: AudioDecoderPcm16B(); - virtual int Decode(const uint8_t* encoded, + virtual int Init() { return 0; } + virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; + + protected: + int DecodeInternal(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, int16_t* decoded, - SpeechType* speech_type); - virtual int Init() { return 0; } - virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; + SpeechType* speech_type) override; private: DISALLOW_COPY_AND_ASSIGN(AudioDecoderPcm16B); @@ -121,15 +127,17 @@ class AudioDecoderIlbc : public AudioDecoder { public: AudioDecoderIlbc(); virtual ~AudioDecoderIlbc(); - virtual int Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type); virtual bool HasDecodePlc() const { return true; } virtual int DecodePlc(int num_frames, int16_t* decoded); virtual int Init(); + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + private: IlbcDecoderInstance* dec_state_; DISALLOW_COPY_AND_ASSIGN(AudioDecoderIlbc); @@ -141,15 +149,17 @@ class AudioDecoderG722 : public AudioDecoder { public: AudioDecoderG722(); virtual ~AudioDecoderG722(); - virtual int Decode(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type); virtual bool HasDecodePlc() const { return false; } virtual int Init(); virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override; + private: G722DecInst* dec_state_; DISALLOW_COPY_AND_ASSIGN(AudioDecoderG722); @@ -159,12 +169,14 @@ class AudioDecoderG722Stereo : public AudioDecoder { public: AudioDecoderG722Stereo(); virtual ~AudioDecoderG722Stereo(); - virtual int Decode(const uint8_t* encoded, + virtual int Init(); + + protected: + int DecodeInternal(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, int16_t* decoded, - SpeechType* speech_type); - virtual int Init(); + SpeechType* speech_type) override; private: // Splits the stereo-interleaved payload in |encoded| into separate payloads @@ -187,21 +199,24 @@ class AudioDecoderOpus : public AudioDecoder { public: explicit AudioDecoderOpus(int num_channels); virtual ~AudioDecoderOpus(); - virtual int Decode(const uint8_t* encoded, + + virtual int Init(); + virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; + virtual int PacketDurationRedundant(const uint8_t* encoded, + size_t encoded_len) const; + virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const; + + protected: + int DecodeInternal(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, int16_t* decoded, - SpeechType* speech_type); - virtual int DecodeRedundant(const uint8_t* encoded, + SpeechType* speech_type) override; + int DecodeRedundantInternal(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, int16_t* decoded, - SpeechType* speech_type); - virtual int Init(); - virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const; - virtual int PacketDurationRedundant(const uint8_t* encoded, - size_t encoded_len) const; - virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const; + SpeechType* speech_type) override; private: OpusDecInst* dec_state_; @@ -219,13 +234,6 @@ class AudioDecoderCng : public AudioDecoder { public: explicit AudioDecoderCng(); virtual ~AudioDecoderCng(); - virtual int Decode(const uint8_t* encoded, - size_t encoded_len, - int /*sample_rate_hz*/, - int16_t* decoded, - SpeechType* speech_type) { - return -1; - } virtual int Init(); virtual int IncomingPacket(const uint8_t* payload, size_t payload_len, @@ -235,6 +243,15 @@ class AudioDecoderCng : public AudioDecoder { CNG_dec_inst* CngDecoderInstance() override { return dec_state_; } + protected: + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override { + return -1; + } + private: CNG_dec_inst* dec_state_; DISALLOW_COPY_AND_ASSIGN(AudioDecoderCng); diff --git a/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc index e8823b3e57..e319b000fb 100644 --- a/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/audio_decoder_unittest.cc @@ -188,6 +188,7 @@ class AudioDecoderTest : public ::testing::Test { AudioDecoder::SpeechType speech_type; size_t dec_len = decoder_->Decode( &encoded_[encoded_bytes_], enc_len, codec_input_rate_hz_, + frame_size_ * channels_ * sizeof(int16_t), &decoded[processed_samples * channels_], &speech_type); EXPECT_EQ(frame_size_ * channels_, dec_len); encoded_bytes_ += enc_len; @@ -222,6 +223,7 @@ class AudioDecoderTest : public ::testing::Test { EXPECT_EQ(0, decoder_->Init()); rtc::scoped_ptr<int16_t[]> output1(new int16_t[frame_size_ * channels_]); dec_len = decoder_->Decode(encoded_, enc_len, codec_input_rate_hz_, + frame_size_ * channels_ * sizeof(int16_t), output1.get(), &speech_type1); ASSERT_LE(dec_len, frame_size_ * channels_); EXPECT_EQ(frame_size_ * channels_, dec_len); @@ -229,6 +231,7 @@ class AudioDecoderTest : public ::testing::Test { EXPECT_EQ(0, decoder_->Init()); rtc::scoped_ptr<int16_t[]> output2(new int16_t[frame_size_ * channels_]); dec_len = decoder_->Decode(encoded_, enc_len, codec_input_rate_hz_, + frame_size_ * channels_ * sizeof(int16_t), output2.get(), &speech_type2); ASSERT_LE(dec_len, frame_size_ * channels_); EXPECT_EQ(frame_size_ * channels_, dec_len); @@ -249,6 +252,7 @@ class AudioDecoderTest : public ::testing::Test { EXPECT_EQ(0, decoder_->Init()); rtc::scoped_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]); size_t dec_len = decoder_->Decode(encoded_, enc_len, codec_input_rate_hz_, + frame_size_ * channels_ * sizeof(int16_t), output.get(), &speech_type); EXPECT_EQ(frame_size_ * channels_, dec_len); // Call DecodePlc and verify that we get one frame of data. @@ -340,6 +344,7 @@ class AudioDecoderIlbcTest : public AudioDecoderTest { EXPECT_EQ(0, decoder_->Init()); rtc::scoped_ptr<int16_t[]> output(new int16_t[frame_size_ * channels_]); size_t dec_len = decoder_->Decode(encoded_, enc_len, codec_input_rate_hz_, + frame_size_ * channels_ * sizeof(int16_t), output.get(), &speech_type); EXPECT_EQ(frame_size_, dec_len); // Simply call DecodePlc and verify that we get 0 as return value. diff --git a/webrtc/modules/audio_coding/neteq/mock/mock_audio_decoder.h b/webrtc/modules/audio_coding/neteq/mock/mock_audio_decoder.h index 7288f116ce..b113e4af1c 100644 --- a/webrtc/modules/audio_coding/neteq/mock/mock_audio_decoder.h +++ b/webrtc/modules/audio_coding/neteq/mock/mock_audio_decoder.h @@ -22,9 +22,9 @@ class MockAudioDecoder : public AudioDecoder { MockAudioDecoder() {} virtual ~MockAudioDecoder() { Die(); } MOCK_METHOD0(Die, void()); - MOCK_METHOD5( + MOCK_METHOD6( Decode, - int(const uint8_t*, size_t, int, int16_t*, AudioDecoder::SpeechType*)); + int(const uint8_t*, size_t, int, size_t, int16_t*, SpeechType*)); MOCK_CONST_METHOD0(HasDecodePlc, bool()); MOCK_METHOD2(DecodePlc, int(int, int16_t*)); MOCK_METHOD0(Init, int()); diff --git a/webrtc/modules/audio_coding/neteq/mock/mock_external_decoder_pcm16b.h b/webrtc/modules/audio_coding/neteq/mock/mock_external_decoder_pcm16b.h index 22d28167b7..beff6ae61e 100644 --- a/webrtc/modules/audio_coding/neteq/mock/mock_external_decoder_pcm16b.h +++ b/webrtc/modules/audio_coding/neteq/mock/mock_external_decoder_pcm16b.h @@ -28,20 +28,20 @@ using ::testing::Invoke; class ExternalPcm16B : public AudioDecoder { public: ExternalPcm16B() {} + virtual int Init() { return 0; } - virtual int Decode(const uint8_t* encoded, + protected: + int DecodeInternal(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, int16_t* decoded, - SpeechType* speech_type) { + SpeechType* speech_type) override { int16_t ret = WebRtcPcm16b_Decode( encoded, static_cast<int16_t>(encoded_len), decoded); *speech_type = ConvertSpeechType(1); return ret; } - virtual int Init() { return 0; } - private: DISALLOW_COPY_AND_ASSIGN(ExternalPcm16B); }; @@ -52,7 +52,7 @@ class MockExternalPcm16B : public ExternalPcm16B { public: MockExternalPcm16B() { // By default, all calls are delegated to the real object. - ON_CALL(*this, Decode(_, _, _, _, _)) + ON_CALL(*this, Decode(_, _, _, _, _, _)) .WillByDefault(Invoke(&real_, &ExternalPcm16B::Decode)); ON_CALL(*this, HasDecodePlc()) .WillByDefault(Invoke(&real_, &ExternalPcm16B::HasDecodePlc)); @@ -68,10 +68,11 @@ class MockExternalPcm16B : public ExternalPcm16B { virtual ~MockExternalPcm16B() { Die(); } MOCK_METHOD0(Die, void()); - MOCK_METHOD5(Decode, + MOCK_METHOD6(Decode, int(const uint8_t* encoded, size_t encoded_len, int sample_rate_hz, + size_t max_decoded_bytes, int16_t* decoded, SpeechType* speech_type)); MOCK_CONST_METHOD0(HasDecodePlc, diff --git a/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc index 28e901ea22..f0158b972c 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_external_decoder_unittest.cc @@ -101,7 +101,7 @@ class NetEqExternalDecoderUnitTest : public test::NetEqExternalDecoderTest { } while (Lost()); // If lost, immediately read the next packet. EXPECT_CALL(*external_decoder_, - Decode(_, payload_size_bytes_, 1000 * samples_per_ms_, _, _)) + Decode(_, payload_size_bytes_, 1000 * samples_per_ms_, _, _, _)) .Times(NumExpectedDecodeCalls(num_loops)); uint32_t time_now = 0; diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc index 73708256f1..fb9656bf5c 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc @@ -1267,6 +1267,7 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation, ", len=" << packet->payload_length; decode_length = decoder->DecodeRedundant( packet->payload, packet->payload_length, fs_hz_, + (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t), &decoded_buffer_[*decoded_length], speech_type); } else { LOG(LS_VERBOSE) << "Decoding packet: ts=" << packet->header.timestamp << @@ -1275,8 +1276,10 @@ int NetEqImpl::DecodeLoop(PacketList* packet_list, Operations* operation, ", ssrc=" << packet->header.ssrc << ", len=" << packet->payload_length; decode_length = - decoder->Decode(packet->payload, packet->payload_length, fs_hz_, - &decoded_buffer_[*decoded_length], speech_type); + decoder->Decode( + packet->payload, packet->payload_length, fs_hz_, + (decoded_buffer_length_ - *decoded_length) * sizeof(int16_t), + &decoded_buffer_[*decoded_length], speech_type); } delete[] packet->payload; @@ -1606,8 +1609,9 @@ void NetEqImpl::DoCodecInternalCng() { if (decoder) { const uint8_t* dummy_payload = NULL; AudioDecoder::SpeechType speech_type; - length = - decoder->Decode(dummy_payload, 0, fs_hz_, decoded_buffer, &speech_type); + length = decoder->Decode( + dummy_payload, 0, fs_hz_, kMaxFrameSize * sizeof(int16_t), + decoded_buffer, &speech_type); } assert(mute_factor_array_.get()); normal_->Process(decoded_buffer, length, last_mode_, mute_factor_array_.get(), diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc index 54b393b959..0a5c6a4166 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_impl_unittest.cc @@ -28,6 +28,7 @@ #include "webrtc/modules/audio_coding/neteq/sync_buffer.h" #include "webrtc/modules/audio_coding/neteq/timestamp_scaler.h" +using ::testing::AtLeast; using ::testing::Return; using ::testing::ReturnNull; using ::testing::_; @@ -428,11 +429,12 @@ TEST_F(NetEqImplTest, VerifyTimestampPropagation) { CountingSamplesDecoder() : next_value_(1) {} // Produce as many samples as input bytes (|encoded_len|). - virtual int Decode(const uint8_t* encoded, - size_t encoded_len, - int /*sample_rate_hz*/, - int16_t* decoded, - SpeechType* speech_type) { + int Decode(const uint8_t* encoded, + size_t encoded_len, + int /* sample_rate_hz */, + size_t /* max_decoded_bytes */, + int16_t* decoded, + SpeechType* speech_type) override { for (size_t i = 0; i < encoded_len; ++i) { decoded[i] = next_value_++; } @@ -523,10 +525,10 @@ TEST_F(NetEqImplTest, ReorderedPacket) { // The below expectation will make the mock decoder write // |kPayloadLengthSamples| zeros to the output array, and mark it as speech. EXPECT_CALL(mock_decoder, - Decode(Pointee(0), kPayloadLengthBytes, kSampleRateHz, _, _)) - .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + Decode(Pointee(0), kPayloadLengthBytes, kSampleRateHz, _, _, _)) + .WillOnce(DoAll(SetArrayArgument<4>(dummy_output, dummy_output + kPayloadLengthSamples), - SetArgPointee<4>(AudioDecoder::kSpeech), + SetArgPointee<5>(AudioDecoder::kSpeech), Return(kPayloadLengthSamples))); EXPECT_EQ(NetEq::kOK, neteq_->RegisterExternalDecoder( @@ -569,10 +571,10 @@ TEST_F(NetEqImplTest, ReorderedPacket) { // Expect only the second packet to be decoded (the one with "2" as the first // payload byte). EXPECT_CALL(mock_decoder, - Decode(Pointee(2), kPayloadLengthBytes, kSampleRateHz, _, _)) - .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + Decode(Pointee(2), kPayloadLengthBytes, kSampleRateHz, _, _, _)) + .WillOnce(DoAll(SetArrayArgument<4>(dummy_output, dummy_output + kPayloadLengthSamples), - SetArgPointee<4>(AudioDecoder::kSpeech), + SetArgPointee<5>(AudioDecoder::kSpeech), Return(kPayloadLengthSamples))); // Pull audio once. @@ -686,30 +688,30 @@ TEST_F(NetEqImplTest, CodecInternalCng) { // Pointee(x) verifies that first byte of the payload equals x, this makes it // possible to verify that the correct payload is fed to Decode(). EXPECT_CALL(mock_decoder, Decode(Pointee(0), kPayloadLengthBytes, - kSampleRateKhz * 1000, _, _)) - .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + kSampleRateKhz * 1000, _, _, _)) + .WillOnce(DoAll(SetArrayArgument<4>(dummy_output, dummy_output + kPayloadLengthSamples), - SetArgPointee<4>(AudioDecoder::kSpeech), + SetArgPointee<5>(AudioDecoder::kSpeech), Return(kPayloadLengthSamples))); EXPECT_CALL(mock_decoder, Decode(Pointee(1), kPayloadLengthBytes, - kSampleRateKhz * 1000, _, _)) - .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + kSampleRateKhz * 1000, _, _, _)) + .WillOnce(DoAll(SetArrayArgument<4>(dummy_output, dummy_output + kPayloadLengthSamples), - SetArgPointee<4>(AudioDecoder::kComfortNoise), + SetArgPointee<5>(AudioDecoder::kComfortNoise), Return(kPayloadLengthSamples))); - EXPECT_CALL(mock_decoder, Decode(IsNull(), 0, kSampleRateKhz * 1000, _, _)) - .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + EXPECT_CALL(mock_decoder, Decode(IsNull(), 0, kSampleRateKhz * 1000, _, _, _)) + .WillOnce(DoAll(SetArrayArgument<4>(dummy_output, dummy_output + kPayloadLengthSamples), - SetArgPointee<4>(AudioDecoder::kComfortNoise), + SetArgPointee<5>(AudioDecoder::kComfortNoise), Return(kPayloadLengthSamples))); EXPECT_CALL(mock_decoder, Decode(Pointee(2), kPayloadLengthBytes, - kSampleRateKhz * 1000, _, _)) - .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + kSampleRateKhz * 1000, _, _, _)) + .WillOnce(DoAll(SetArrayArgument<4>(dummy_output, dummy_output + kPayloadLengthSamples), - SetArgPointee<4>(AudioDecoder::kSpeech), + SetArgPointee<5>(AudioDecoder::kSpeech), Return(kPayloadLengthSamples))); EXPECT_EQ(NetEq::kOK, @@ -793,4 +795,101 @@ TEST_F(NetEqImplTest, CodecInternalCng) { EXPECT_CALL(mock_decoder, Die()); } + +TEST_F(NetEqImplTest, UnsupportedDecoder) { + UseNoMocks(); + CreateInstance(); + static const size_t kNetEqMaxFrameSize = 2880; // 60 ms @ 48 kHz. + + const uint8_t kPayloadType = 17; // Just an arbitrary number. + const uint32_t kReceiveTime = 17; // Value doesn't matter for this test. + const int kSampleRateHz = 8000; + const int kChannles = 1; + + const int kPayloadLengthSamples = 10 * kSampleRateHz / 1000; // 10 ms. + const size_t kPayloadLengthBytes = 1; + uint8_t payload[kPayloadLengthBytes]= {0}; + int16_t dummy_output[kPayloadLengthSamples] = {0}; + WebRtcRTPHeader rtp_header; + rtp_header.header.payloadType = kPayloadType; + rtp_header.header.sequenceNumber = 0x1234; + rtp_header.header.timestamp = 0x12345678; + rtp_header.header.ssrc = 0x87654321; + + class MockAudioDecoder : public AudioDecoder { + public: + int Init() override { + return 0; + } + MOCK_CONST_METHOD2(PacketDuration, int(const uint8_t*, size_t)); + MOCK_METHOD5(DecodeInternal, int(const uint8_t*, size_t, int, int16_t*, + SpeechType*)); + } decoder_; + + const uint8_t kFirstPayloadValue = 1; + const uint8_t kSecondPayloadValue = 2; + + EXPECT_CALL(decoder_, PacketDuration(Pointee(kFirstPayloadValue), + kPayloadLengthBytes)) + .Times(AtLeast(1)) + .WillRepeatedly(Return(kNetEqMaxFrameSize * kChannles + 1)); + + EXPECT_CALL(decoder_, + DecodeInternal(Pointee(kFirstPayloadValue), _, _, _, _)) + .Times(0); + + EXPECT_CALL(decoder_, DecodeInternal(Pointee(kSecondPayloadValue), + kPayloadLengthBytes, + kSampleRateHz, _, _)) + .Times(1) + .WillOnce(DoAll(SetArrayArgument<3>(dummy_output, + dummy_output + kPayloadLengthSamples), + SetArgPointee<4>(AudioDecoder::kSpeech), + Return(kPayloadLengthSamples))); + + EXPECT_CALL(decoder_, PacketDuration(Pointee(kSecondPayloadValue), + kPayloadLengthBytes)) + .Times(AtLeast(1)) + .WillRepeatedly(Return(kNetEqMaxFrameSize * kChannles)); + + EXPECT_EQ(NetEq::kOK, + neteq_->RegisterExternalDecoder( + &decoder_, kDecoderPCM16B, kPayloadType)); + + // Insert one packet. + payload[0] = kFirstPayloadValue; // This will make Decode() fail. + EXPECT_EQ(NetEq::kOK, + neteq_->InsertPacket( + rtp_header, payload, kPayloadLengthBytes, kReceiveTime)); + + // Insert another packet. + payload[0] = kSecondPayloadValue; // This will make Decode() successful. + rtp_header.header.sequenceNumber++; + // The second timestamp needs to be at least 30 ms after the first to make + // the second packet get decoded. + rtp_header.header.timestamp += 3 * kPayloadLengthSamples; + EXPECT_EQ(NetEq::kOK, + neteq_->InsertPacket( + rtp_header, payload, kPayloadLengthBytes, kReceiveTime)); + + const int kMaxOutputSize = 10 * kSampleRateHz / 1000; + int16_t output[kMaxOutputSize]; + int samples_per_channel; + int num_channels; + NetEqOutputType type; + + EXPECT_EQ(NetEq::kFail, neteq_->GetAudio(kMaxOutputSize, output, + &samples_per_channel, &num_channels, + &type)); + EXPECT_EQ(NetEq::kOtherDecoderError, neteq_->LastError()); + EXPECT_EQ(kMaxOutputSize, samples_per_channel); + EXPECT_EQ(kChannles, num_channels); + + EXPECT_EQ(NetEq::kOK, neteq_->GetAudio(kMaxOutputSize, output, + &samples_per_channel, &num_channels, + &type)); + EXPECT_EQ(kMaxOutputSize, samples_per_channel); + EXPECT_EQ(kChannles, num_channels); +} + } // namespace webrtc diff --git a/webrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc b/webrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc index c6195d03dc..a49f957818 100644 --- a/webrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc +++ b/webrtc/modules/audio_coding/neteq/neteq_network_stats_unittest.cc @@ -35,25 +35,6 @@ class MockAudioDecoderOpus : public AudioDecoderOpus { MOCK_METHOD0(Init, int()); - // Override the following methods such that no actual payload is needed. - int Decode(const uint8_t* encoded, - size_t encoded_len, - int /*sample_rate_hz*/, - int16_t* decoded, - SpeechType* speech_type) override { - *speech_type = kSpeech; - memset(decoded, 0, sizeof(int16_t) * kPacketDuration * channels_); - return kPacketDuration * channels_; - } - - int DecodeRedundant(const uint8_t* encoded, - size_t encoded_len, - int sample_rate_hz, - int16_t* decoded, - SpeechType* speech_type) override { - return Decode(encoded, encoded_len, sample_rate_hz, decoded, speech_type); - } - int PacketDuration(const uint8_t* encoded, size_t encoded_len) const override { return kPacketDuration; @@ -72,6 +53,27 @@ class MockAudioDecoderOpus : public AudioDecoderOpus { bool fec_enabled() const { return fec_enabled_; } + protected: + // Override the following methods such that no actual payload is needed. + int DecodeInternal(const uint8_t* encoded, + size_t encoded_len, + int /*sample_rate_hz*/, + int16_t* decoded, + SpeechType* speech_type) override { + *speech_type = kSpeech; + memset(decoded, 0, sizeof(int16_t) * kPacketDuration * channels_); + return kPacketDuration * channels_; + } + + int DecodeRedundantInternal(const uint8_t* encoded, + size_t encoded_len, + int sample_rate_hz, + int16_t* decoded, + SpeechType* speech_type) override { + return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded, + speech_type); + } + private: bool fec_enabled_; }; diff --git a/webrtc/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc b/webrtc/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc index 55ef0c7c0c..3edf89cf52 100644 --- a/webrtc/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc +++ b/webrtc/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc @@ -9,6 +9,7 @@ */ #include "webrtc/modules/audio_coding/codecs/opus/interface/opus_interface.h" +#include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h" #include "webrtc/modules/audio_coding/neteq/tools/neteq_quality_test.h" #include "webrtc/test/testsupport/fileutils.h" @@ -52,7 +53,7 @@ static bool ValidateOutFilename(const char* flagname, const string& value) { return false; } -DEFINE_string(out_filename, OutputPath() + "neteq4_opus_fec_quality_test.pcm", +DEFINE_string(out_filename, OutputPath() + "neteq_opus_quality_test.pcm", "Name of output audio file."); static const bool out_filename_dummy = @@ -113,43 +114,66 @@ DEFINE_bool(fec, true, "Whether to enable FEC for encoding."); DEFINE_bool(dtx, true, "Whether to enable DTX for encoding."); -class NetEqOpusFecQualityTest : public NetEqQualityTest { +// Define switch for number of sub packets to repacketize. +static bool ValidateSubPackets(const char* flagname, int32_t value) { + if (value >= 1 && value <= 3) + return true; + printf("Invalid number of sub packets, should be between 1 and 3."); + return false; +} +DEFINE_int32(sub_packets, 1, "Number of sub packets to repacketize."); +static const bool sub_packets_dummy = + RegisterFlagValidator(&FLAGS_sub_packets, &ValidateSubPackets); + +class NetEqOpusQualityTest : public NetEqQualityTest { protected: - NetEqOpusFecQualityTest(); + NetEqOpusQualityTest(); void SetUp() override; void TearDown() override; virtual int EncodeBlock(int16_t* in_data, int block_size_samples, uint8_t* payload, int max_bytes); private: WebRtcOpusEncInst* opus_encoder_; + OpusRepacketizer* repacketizer_; + int sub_block_size_samples_; int channels_; int bit_rate_kbps_; bool fec_; bool dtx_; int target_loss_rate_; + int sub_packets_; }; -NetEqOpusFecQualityTest::NetEqOpusFecQualityTest() - : NetEqQualityTest(kOpusBlockDurationMs, kOpusSamplingKhz, +NetEqOpusQualityTest::NetEqOpusQualityTest() + : NetEqQualityTest(kOpusBlockDurationMs * FLAGS_sub_packets, + kOpusSamplingKhz, kOpusSamplingKhz, (FLAGS_channels == 1) ? kDecoderOpus : kDecoderOpus_2ch, FLAGS_channels, FLAGS_in_filename, FLAGS_out_filename), opus_encoder_(NULL), + repacketizer_(NULL), + sub_block_size_samples_(kOpusBlockDurationMs * kOpusSamplingKhz), channels_(FLAGS_channels), bit_rate_kbps_(FLAGS_bit_rate_kbps), fec_(FLAGS_fec), dtx_(FLAGS_dtx), - target_loss_rate_(FLAGS_reported_loss_rate) { + target_loss_rate_(FLAGS_reported_loss_rate), + sub_packets_(FLAGS_sub_packets) { } -void NetEqOpusFecQualityTest::SetUp() { +void NetEqOpusQualityTest::SetUp() { // If channels_ == 1, use Opus VOIP mode, otherwise, audio mode. int app = channels_ == 1 ? 0 : 1; // Create encoder memory. WebRtcOpus_EncoderCreate(&opus_encoder_, channels_, app); - ASSERT_TRUE(opus_encoder_ != NULL); + ASSERT_TRUE(opus_encoder_); + + // Create repacketizer. + repacketizer_ = opus_repacketizer_create(); + ASSERT_TRUE(repacketizer_); + // Set bitrate. EXPECT_EQ(0, WebRtcOpus_SetBitRate(opus_encoder_, bit_rate_kbps_ * 1000)); if (fec_) { @@ -163,22 +187,36 @@ void NetEqOpusFecQualityTest::SetUp() { NetEqQualityTest::SetUp(); } -void NetEqOpusFecQualityTest::TearDown() { +void NetEqOpusQualityTest::TearDown() { // Free memory. EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_encoder_)); + opus_repacketizer_destroy(repacketizer_); NetEqQualityTest::TearDown(); } -int NetEqOpusFecQualityTest::EncodeBlock(int16_t* in_data, - int block_size_samples, - uint8_t* payload, int max_bytes) { - int value = WebRtcOpus_Encode(opus_encoder_, in_data, - block_size_samples, max_bytes, - payload); +int NetEqOpusQualityTest::EncodeBlock(int16_t* in_data, + int block_size_samples, + uint8_t* payload, int max_bytes) { + EXPECT_EQ(block_size_samples, sub_block_size_samples_ * sub_packets_); + int16_t* pointer = in_data; + int value; + opus_repacketizer_init(repacketizer_); + for (int idx = 0; idx < sub_packets_; idx++) { + value = WebRtcOpus_Encode(opus_encoder_, pointer, sub_block_size_samples_, + max_bytes, payload); + if (OPUS_OK != opus_repacketizer_cat(repacketizer_, payload, value)) { + opus_repacketizer_init(repacketizer_); + // If the repacketization fails, we discard this frame. + return 0; + } + pointer += sub_block_size_samples_ * channels_; + } + value = opus_repacketizer_out(repacketizer_, payload, max_bytes); + EXPECT_GE(value, 0); return value; } -TEST_F(NetEqOpusFecQualityTest, Test) { +TEST_F(NetEqOpusQualityTest, Test) { Simulate(FLAGS_runtime_ms); } |