diff options
author | minyue@webrtc.org <minyue@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> | 2014-06-17 09:50:12 +0000 |
---|---|---|
committer | minyue@webrtc.org <minyue@webrtc.org@4adac7df-926f-26a2-2b94-8c16560cd09d> | 2014-06-17 09:50:12 +0000 |
commit | 28f69bb7c1dca28fa3274f61d8ecb64099d535db (patch) | |
tree | a06558d255dcd5aa041d9b6ad3a5efc393aabe14 | |
parent | d6d5bffab60c9fc3413ded251aa8513346d252c4 (diff) | |
download | webrtc-28f69bb7c1dca28fa3274f61d8ecb64099d535db.tar.gz |
Revert 6458 "Since NetEq4 is ready to handle 48 kHz codec, it is..."
> Since NetEq4 is ready to handle 48 kHz codec, it is good to remove the 48-to-32kHz downsampling of Opus output. This facilitates webrtc to make full use of Opus's bandwidth and eliminates unneeded computation in resampling.
>
> TEST=passed_all_trybots
> R=henrik.lundin@webrtc.org, tina.legrand@webrtc.org
>
> Review URL: https://webrtc-codereview.appspot.com/16619005
TBR=minyue@webrtc.org
Review URL: https://webrtc-codereview.appspot.com/17719004
git-svn-id: http://webrtc.googlecode.com/svn/trunk/webrtc@6462 4adac7df-926f-26a2-2b94-8c16560cd09d
12 files changed, 431 insertions, 247 deletions
diff --git a/modules/audio_coding/codecs/opus/opus_fec_test.cc b/modules/audio_coding/codecs/opus/opus_fec_test.cc index ee027e80..fb4cb04f 100644 --- a/modules/audio_coding/codecs/opus/opus_fec_test.cc +++ b/modules/audio_coding/codecs/opus/opus_fec_test.cc @@ -32,7 +32,8 @@ struct mode { }; const int kOpusBlockDurationMs = 20; -const int kOpusSamplingKhz = 48; +const int kOpusInputSamplingKhz = 48; +const int kOpusOutputSamplingKhz = 32; class OpusFecTest : public TestWithParam<coding_param> { protected: @@ -46,8 +47,14 @@ class OpusFecTest : public TestWithParam<coding_param> { virtual void DecodeABlock(bool lost_previous, bool lost_current); int block_duration_ms_; - int sampling_khz_; - int block_length_sample_; + int input_sampling_khz_; + int output_sampling_khz_; + + // Number of samples-per-channel in a frame. + int input_length_sample_; + + // Expected output number of samples-per-channel in a frame. + int output_length_sample_; int channels_; int bit_rate_; @@ -84,7 +91,7 @@ void OpusFecTest::SetUp() { // Allocate memory to contain the whole file. in_data_.reset(new int16_t[loop_length_samples_ + - block_length_sample_ * channels_]); + input_length_sample_ * channels_]); // Copy the file into the buffer. ASSERT_EQ(fread(&in_data_[0], sizeof(int16_t), loop_length_samples_, fp), @@ -97,12 +104,12 @@ void OpusFecTest::SetUp() { // beginning of the array. Audio frames cross the end of the excerpt always // appear as a continuum of memory. memcpy(&in_data_[loop_length_samples_], &in_data_[0], - block_length_sample_ * channels_ * sizeof(int16_t)); + input_length_sample_ * channels_ * sizeof(int16_t)); // Maximum number of bytes in output bitstream. - max_bytes_ = block_length_sample_ * channels_ * sizeof(int16_t); + max_bytes_ = input_length_sample_ * channels_ * sizeof(int16_t); - out_data_.reset(new int16_t[2 * block_length_sample_ * channels_]); + out_data_.reset(new int16_t[2 * output_length_sample_ * channels_]); bit_stream_.reset(new uint8_t[max_bytes_]); // Create encoder memory. @@ -120,8 +127,10 @@ void OpusFecTest::TearDown() { OpusFecTest::OpusFecTest() : block_duration_ms_(kOpusBlockDurationMs), - sampling_khz_(kOpusSamplingKhz), - block_length_sample_(block_duration_ms_ * sampling_khz_), + input_sampling_khz_(kOpusInputSamplingKhz), + output_sampling_khz_(kOpusOutputSamplingKhz), + input_length_sample_(block_duration_ms_ * input_sampling_khz_), + output_length_sample_(block_duration_ms_ * output_sampling_khz_), data_pointer_(0), max_bytes_(0), encoded_bytes_(0), @@ -132,7 +141,7 @@ OpusFecTest::OpusFecTest() void OpusFecTest::EncodeABlock() { int16_t value = WebRtcOpus_Encode(opus_encoder_, &in_data_[data_pointer_], - block_length_sample_, + input_length_sample_, max_bytes_, &bit_stream_[0]); EXPECT_GT(value, 0); @@ -153,7 +162,7 @@ void OpusFecTest::DecodeABlock(bool lost_previous, bool lost_current) { } else { value_1 = WebRtcOpus_DecodePlc(opus_decoder_, &out_data_[0], 1); } - EXPECT_EQ(block_length_sample_, value_1); + EXPECT_EQ(output_length_sample_, value_1); } if (!lost_current) { @@ -162,7 +171,7 @@ void OpusFecTest::DecodeABlock(bool lost_previous, bool lost_current) { encoded_bytes_, &out_data_[value_1 * channels_], &audio_type); - EXPECT_EQ(block_length_sample_, value_2); + EXPECT_EQ(output_length_sample_, value_2); } } @@ -215,7 +224,7 @@ TEST_P(OpusFecTest, RandomPacketLossTest) { // |data_pointer_| is incremented and wrapped across // |loop_length_samples_|. - data_pointer_ = (data_pointer_ + block_length_sample_ * channels_) % + data_pointer_ = (data_pointer_ + input_length_sample_ * channels_) % loop_length_samples_; } if (mode_set[i].fec) { diff --git a/modules/audio_coding/codecs/opus/opus_interface.c b/modules/audio_coding/codecs/opus/opus_interface.c index ea535ea9..24fc4fc4 100644 --- a/modules/audio_coding/codecs/opus/opus_interface.c +++ b/modules/audio_coding/codecs/opus/opus_interface.c @@ -15,6 +15,9 @@ #include "opus.h" +#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + enum { /* Maximum supported frame size in WebRTC is 60 ms. */ kWebRtcOpusMaxEncodeFrameSizeMs = 60, @@ -28,6 +31,17 @@ enum { * milliseconds. */ kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs, + /* Maximum sample count per frame is 48 kHz * maximum frame size in + * milliseconds * maximum number of channels. */ + kWebRtcOpusMaxFrameSize = kWebRtcOpusMaxFrameSizePerChannel * 2, + + /* Maximum sample count per channel for output resampled to 32 kHz, + * 32 kHz * maximum frame size in milliseconds. */ + kWebRtcOpusMaxFrameSizePerChannel32kHz = 32 * kWebRtcOpusMaxDecodeFrameSizeMs, + + /* Number of samples in resampler state. */ + kWebRtcOpusStateSize = 7, + /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */ kWebRtcOpusDefaultFrameSize = 960, }; @@ -129,6 +143,8 @@ int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) { } struct WebRtcOpusDecInst { + int16_t state_48_32_left[8]; + int16_t state_48_32_right[8]; OpusDecoder* decoder_left; OpusDecoder* decoder_right; int prev_decoded_samples; @@ -189,6 +205,8 @@ int WebRtcOpus_DecoderChannels(OpusDecInst* inst) { int16_t WebRtcOpus_DecoderInitNew(OpusDecInst* inst) { int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE); if (error == OPUS_OK) { + memset(inst->state_48_32_left, 0, sizeof(inst->state_48_32_left)); + memset(inst->state_48_32_right, 0, sizeof(inst->state_48_32_right)); return 0; } return -1; @@ -197,6 +215,7 @@ int16_t WebRtcOpus_DecoderInitNew(OpusDecInst* inst) { int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) { int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE); if (error == OPUS_OK) { + memset(inst->state_48_32_left, 0, sizeof(inst->state_48_32_left)); return 0; } return -1; @@ -205,6 +224,7 @@ int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) { int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst) { int error = opus_decoder_ctl(inst->decoder_right, OPUS_RESET_STATE); if (error == OPUS_OK) { + memset(inst->state_48_32_right, 0, sizeof(inst->state_48_32_right)); return 0; } return -1; @@ -247,29 +267,124 @@ static int DecodeFec(OpusDecoder* inst, const int16_t* encoded, return -1; } +/* Resample from 48 to 32 kHz. Length of state is assumed to be + * kWebRtcOpusStateSize (7). + */ +static int WebRtcOpus_Resample48to32(const int16_t* samples_in, int length, + int16_t* state, int16_t* samples_out) { + int i; + int blocks; + int16_t output_samples; + int32_t buffer32[kWebRtcOpusMaxFrameSizePerChannel + kWebRtcOpusStateSize]; + + /* Resample from 48 kHz to 32 kHz. */ + for (i = 0; i < kWebRtcOpusStateSize; i++) { + buffer32[i] = state[i]; + state[i] = samples_in[length - kWebRtcOpusStateSize + i]; + } + for (i = 0; i < length; i++) { + buffer32[kWebRtcOpusStateSize + i] = samples_in[i]; + } + /* Resampling 3 samples to 2. Function divides the input in |blocks| number + * of 3-sample groups, and output is |blocks| number of 2-sample groups. + * When this is removed, the compensation in WebRtcOpus_DurationEst should be + * removed too. */ + blocks = length / 3; + WebRtcSpl_Resample48khzTo32khz(buffer32, buffer32, blocks); + output_samples = (int16_t) (blocks * 2); + WebRtcSpl_VectorBitShiftW32ToW16(samples_out, output_samples, buffer32, 15); + + return output_samples; +} + +static int WebRtcOpus_DeInterleaveResample(OpusDecInst* inst, int16_t* input, + int sample_pairs, int16_t* output) { + int i; + int16_t buffer_left[kWebRtcOpusMaxFrameSizePerChannel]; + int16_t buffer_right[kWebRtcOpusMaxFrameSizePerChannel]; + int16_t buffer_out[kWebRtcOpusMaxFrameSizePerChannel32kHz]; + int resampled_samples; + + /* De-interleave the signal in left and right channel. */ + for (i = 0; i < sample_pairs; i++) { + /* Take every second sample, starting at the first sample. */ + buffer_left[i] = input[i * 2]; + buffer_right[i] = input[i * 2 + 1]; + } + + /* Resample from 48 kHz to 32 kHz for left channel. */ + resampled_samples = WebRtcOpus_Resample48to32( + buffer_left, sample_pairs, inst->state_48_32_left, buffer_out); + + /* Add samples interleaved to output vector. */ + for (i = 0; i < resampled_samples; i++) { + output[i * 2] = buffer_out[i]; + } + + /* Resample from 48 kHz to 32 kHz for right channel. */ + resampled_samples = WebRtcOpus_Resample48to32( + buffer_right, sample_pairs, inst->state_48_32_right, buffer_out); + + /* Add samples interleaved to output vector. */ + for (i = 0; i < resampled_samples; i++) { + output[i * 2 + 1] = buffer_out[i]; + } + + return resampled_samples; +} + int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { + /* |buffer| is big enough for 120 ms (the largest Opus packet size) of stereo + * audio at 48 kHz. */ + int16_t buffer[kWebRtcOpusMaxFrameSize]; int16_t* coded = (int16_t*)encoded; int decoded_samples; + int resampled_samples; + + /* If mono case, just do a regular call to the decoder. + * If stereo, we need to de-interleave the stereo output into blocks with + * left and right channel. Each block is resampled to 32 kHz, and then + * interleaved again. */ + /* Decode to a temporary buffer. */ decoded_samples = DecodeNative(inst->decoder_left, coded, encoded_bytes, kWebRtcOpusMaxFrameSizePerChannel, - decoded, audio_type); + buffer, audio_type); if (decoded_samples < 0) { return -1; } + if (inst->channels == 2) { + /* De-interleave and resample. */ + resampled_samples = WebRtcOpus_DeInterleaveResample(inst, + buffer, + decoded_samples, + decoded); + } else { + /* Resample from 48 kHz to 32 kHz. Filter state memory for left channel is + * used for mono signals. */ + resampled_samples = WebRtcOpus_Resample48to32(buffer, + decoded_samples, + inst->state_48_32_left, + decoded); + } + /* Update decoded sample memory, to be used by the PLC in case of losses. */ inst->prev_decoded_samples = decoded_samples; - return decoded_samples; + return resampled_samples; } int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { + /* |buffer16| is big enough for 120 ms (the largestOpus packet size) of + * stereo audio at 48 kHz. */ + int16_t buffer16[kWebRtcOpusMaxFrameSize]; int decoded_samples; + int16_t output_samples; int i; /* If mono case, just do a regular call to the decoder. @@ -278,82 +393,120 @@ int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded, * This is to make stereo work with the current setup of NetEQ, which * requires two calls to the decoder to produce stereo. */ + /* Decode to a temporary buffer. */ decoded_samples = DecodeNative(inst->decoder_left, encoded, encoded_bytes, - kWebRtcOpusMaxFrameSizePerChannel, decoded, + kWebRtcOpusMaxFrameSizePerChannel, buffer16, audio_type); if (decoded_samples < 0) { return -1; } if (inst->channels == 2) { /* The parameter |decoded_samples| holds the number of samples pairs, in - * case of stereo. Number of samples in |decoded| equals |decoded_samples| + * case of stereo. Number of samples in |buffer16| equals |decoded_samples| * times 2. */ for (i = 0; i < decoded_samples; i++) { /* Take every second sample, starting at the first sample. This gives * the left channel. */ - decoded[i] = decoded[i * 2]; + buffer16[i] = buffer16[i * 2]; } } + /* Resample from 48 kHz to 32 kHz. */ + output_samples = WebRtcOpus_Resample48to32(buffer16, decoded_samples, + inst->state_48_32_left, decoded); + /* Update decoded sample memory, to be used by the PLC in case of losses. */ inst->prev_decoded_samples = decoded_samples; - return decoded_samples; + return output_samples; } int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { + /* |buffer16| is big enough for 120 ms (the largestOpus packet size) of + * stereo audio at 48 kHz. */ + int16_t buffer16[kWebRtcOpusMaxFrameSize]; int decoded_samples; + int16_t output_samples; int i; + /* Decode to a temporary buffer. */ decoded_samples = DecodeNative(inst->decoder_right, encoded, encoded_bytes, - kWebRtcOpusMaxFrameSizePerChannel, decoded, + kWebRtcOpusMaxFrameSizePerChannel, buffer16, audio_type); if (decoded_samples < 0) { return -1; } if (inst->channels == 2) { /* The parameter |decoded_samples| holds the number of samples pairs, in - * case of stereo. Number of samples in |decoded| equals |decoded_samples| + * case of stereo. Number of samples in |buffer16| equals |decoded_samples| * times 2. */ for (i = 0; i < decoded_samples; i++) { /* Take every second sample, starting at the second sample. This gives * the right channel. */ - decoded[i] = decoded[i * 2 + 1]; + buffer16[i] = buffer16[i * 2 + 1]; } } else { /* Decode slave should never be called for mono packets. */ return -1; } + /* Resample from 48 kHz to 32 kHz. */ + output_samples = WebRtcOpus_Resample48to32(buffer16, decoded_samples, + inst->state_48_32_right, decoded); - return decoded_samples; + return output_samples; } int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded, int16_t number_of_lost_frames) { + int16_t buffer[kWebRtcOpusMaxFrameSize]; int16_t audio_type = 0; int decoded_samples; + int resampled_samples; int plc_samples; - /* The number of samples we ask for is |number_of_lost_frames| times - * |prev_decoded_samples_|. Limit the number of samples to maximum - * |kWebRtcOpusMaxFrameSizePerChannel|. */ + /* If mono case, just do a regular call to the plc function, before + * resampling. + * If stereo, we need to de-interleave the stereo output into blocks with + * left and right channel. Each block is resampled to 32 kHz, and then + * interleaved again. */ + + /* Decode to a temporary buffer. The number of samples we ask for is + * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number + * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */ plc_samples = number_of_lost_frames * inst->prev_decoded_samples; plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel; decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples, - decoded, &audio_type); + buffer, &audio_type); if (decoded_samples < 0) { return -1; } - return decoded_samples; + if (inst->channels == 2) { + /* De-interleave and resample. */ + resampled_samples = WebRtcOpus_DeInterleaveResample(inst, + buffer, + decoded_samples, + decoded); + } else { + /* Resample from 48 kHz to 32 kHz. Filter state memory for left channel is + * used for mono signals. */ + resampled_samples = WebRtcOpus_Resample48to32(buffer, + decoded_samples, + inst->state_48_32_left, + decoded); + } + + return resampled_samples; } int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded, int16_t number_of_lost_frames) { + int16_t buffer[kWebRtcOpusMaxFrameSize]; int decoded_samples; + int resampled_samples; int16_t audio_type = 0; int plc_samples; int i; @@ -364,35 +517,42 @@ int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded, * output. This is to make stereo work with the current setup of NetEQ, which * requires two calls to the decoder to produce stereo. */ - /* The number of samples we ask for is |number_of_lost_frames| times - * |prev_decoded_samples_|. Limit the number of samples to maximum - * |kWebRtcOpusMaxFrameSizePerChannel|. */ + /* Decode to a temporary buffer. The number of samples we ask for is + * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number + * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */ plc_samples = number_of_lost_frames * inst->prev_decoded_samples; plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel; decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples, - decoded, &audio_type); + buffer, &audio_type); if (decoded_samples < 0) { return -1; } if (inst->channels == 2) { /* The parameter |decoded_samples| holds the number of sample pairs, in - * case of stereo. The original number of samples in |decoded| equals + * case of stereo. The original number of samples in |buffer| equals * |decoded_samples| times 2. */ for (i = 0; i < decoded_samples; i++) { /* Take every second sample, starting at the first sample. This gives * the left channel. */ - decoded[i] = decoded[i * 2]; + buffer[i] = buffer[i * 2]; } } - return decoded_samples; + /* Resample from 48 kHz to 32 kHz for left channel. */ + resampled_samples = WebRtcOpus_Resample48to32(buffer, + decoded_samples, + inst->state_48_32_left, + decoded); + return resampled_samples; } int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded, int16_t number_of_lost_frames) { + int16_t buffer[kWebRtcOpusMaxFrameSize]; int decoded_samples; + int resampled_samples; int16_t audio_type = 0; int plc_samples; int i; @@ -403,35 +563,44 @@ int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded, return -1; } - /* The number of samples we ask for is |number_of_lost_frames| times - * |prev_decoded_samples_|. Limit the number of samples to maximum - * |kWebRtcOpusMaxFrameSizePerChannel|. */ + /* Decode to a temporary buffer. The number of samples we ask for is + * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number + * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */ plc_samples = number_of_lost_frames * inst->prev_decoded_samples; plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel; decoded_samples = DecodeNative(inst->decoder_right, NULL, 0, plc_samples, - decoded, &audio_type); + buffer, &audio_type); if (decoded_samples < 0) { return -1; } /* The parameter |decoded_samples| holds the number of sample pairs, - * The original number of samples in |decoded| equals |decoded_samples| + * The original number of samples in |buffer| equals |decoded_samples| * times 2. */ for (i = 0; i < decoded_samples; i++) { /* Take every second sample, starting at the second sample. This gives * the right channel. */ - decoded[i] = decoded[i * 2 + 1]; + buffer[i] = buffer[i * 2 + 1]; } - return decoded_samples; + /* Resample from 48 kHz to 32 kHz for left channel. */ + resampled_samples = WebRtcOpus_Resample48to32(buffer, + decoded_samples, + inst->state_48_32_right, + decoded); + return resampled_samples; } int16_t WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { + /* |buffer| is big enough for 120 ms (the largest Opus packet size) of stereo + * audio at 48 kHz. */ + int16_t buffer[kWebRtcOpusMaxFrameSize]; int16_t* coded = (int16_t*)encoded; int decoded_samples; + int resampled_samples; int fec_samples; if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) { @@ -440,13 +609,33 @@ int16_t WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded, fec_samples = opus_packet_get_samples_per_frame(encoded, 48000); + /* Decode to a temporary buffer. */ decoded_samples = DecodeFec(inst->decoder_left, coded, encoded_bytes, - fec_samples, decoded, audio_type); + fec_samples, buffer, audio_type); if (decoded_samples < 0) { return -1; } - return decoded_samples; + /* If mono case, just do a regular call to the decoder. + * If stereo, we need to de-interleave the stereo output into blocks with + * left and right channel. Each block is resampled to 32 kHz, and then + * interleaved again. */ + if (inst->channels == 2) { + /* De-interleave and resample. */ + resampled_samples = WebRtcOpus_DeInterleaveResample(inst, + buffer, + decoded_samples, + decoded); + } else { + /* Resample from 48 kHz to 32 kHz. Filter state memory for left channel is + * used for mono signals. */ + resampled_samples = WebRtcOpus_Resample48to32(buffer, + decoded_samples, + inst->state_48_32_left, + decoded); + } + + return resampled_samples; } int WebRtcOpus_DurationEst(OpusDecInst* inst, @@ -463,6 +652,10 @@ int WebRtcOpus_DurationEst(OpusDecInst* inst, /* Invalid payload duration. */ return 0; } + /* Compensate for the down-sampling from 48 kHz to 32 kHz. + * This should be removed when the resampling in WebRtcOpus_Decode is + * removed. */ + samples = samples * 2 / 3; return samples; } @@ -478,6 +671,10 @@ int WebRtcOpus_FecDurationEst(const uint8_t* payload, /* Invalid payload duration. */ return 0; } + /* Compensate for the down-sampling from 48 kHz to 32 kHz. + * This should be removed when the resampling in WebRtcOpus_Decode is + * removed. */ + samples = samples * 2 / 3; return samples; } diff --git a/modules/audio_coding/codecs/opus/opus_speed_test.cc b/modules/audio_coding/codecs/opus/opus_speed_test.cc index e2439cf5..16099c6d 100644 --- a/modules/audio_coding/codecs/opus/opus_speed_test.cc +++ b/modules/audio_coding/codecs/opus/opus_speed_test.cc @@ -18,7 +18,8 @@ using ::testing::ValuesIn; namespace webrtc { static const int kOpusBlockDurationMs = 20; -static const int kOpusSamplingKhz = 48; +static const int kOpusInputSamplingKhz = 48; +static const int kOpustOutputSamplingKhz = 32; class OpusSpeedTest : public AudioCodecSpeedTest { protected: @@ -35,8 +36,8 @@ class OpusSpeedTest : public AudioCodecSpeedTest { OpusSpeedTest::OpusSpeedTest() : AudioCodecSpeedTest(kOpusBlockDurationMs, - kOpusSamplingKhz, - kOpusSamplingKhz), + kOpusInputSamplingKhz, + kOpustOutputSamplingKhz), opus_encoder_(NULL), opus_decoder_(NULL) { } diff --git a/modules/audio_coding/codecs/opus/opus_unittest.cc b/modules/audio_coding/codecs/opus/opus_unittest.cc index 2ec77a53..ed876cd1 100644 --- a/modules/audio_coding/codecs/opus/opus_unittest.cc +++ b/modules/audio_coding/codecs/opus/opus_unittest.cc @@ -19,13 +19,9 @@ struct WebRtcOpusDecInst; namespace webrtc { // Number of samples in a 60 ms stereo frame, sampled at 48 kHz. -const int kOpusMaxFrameSamples = 48 * 60 * 2; +const int kOpusNumberOfSamples = 480 * 6 * 2; // Maximum number of bytes in output bitstream. const size_t kMaxBytes = 1000; -// Number of samples-per-channel in a 20 ms frame, sampled at 48 kHz. -const int kOpus20msFrameSamples = 48 * 20; -// Number of samples-per-channel in a 10 ms frame, sampled at 48 kHz. -const int kOpus10msFrameSamples = 48 * 10; class OpusTest : public ::testing::Test { protected: @@ -39,8 +35,8 @@ class OpusTest : public ::testing::Test { WebRtcOpusDecInst* opus_stereo_decoder_; WebRtcOpusDecInst* opus_stereo_decoder_new_; - int16_t speech_data_[kOpusMaxFrameSamples]; - int16_t output_data_[kOpusMaxFrameSamples]; + int16_t speech_data_[kOpusNumberOfSamples]; + int16_t output_data_[kOpusNumberOfSamples]; uint8_t bitstream_[kMaxBytes]; }; @@ -54,14 +50,17 @@ OpusTest::OpusTest() } void OpusTest::SetUp() { + // Read some samples from a speech file, to be used in the encode test. + // In this test we do not care that the sampling frequency of the file is + // really 32000 Hz. We pretend that it is 48000 Hz. FILE* input_file; const std::string file_name = - webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm"); + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); input_file = fopen(file_name.c_str(), "rb"); ASSERT_TRUE(input_file != NULL); - ASSERT_EQ(kOpusMaxFrameSamples, + ASSERT_EQ(kOpusNumberOfSamples, static_cast<int32_t>(fread(speech_data_, sizeof(int16_t), - kOpusMaxFrameSamples, input_file))); + kOpusNumberOfSamples, input_file))); fclose(input_file); input_file = NULL; } @@ -115,24 +114,21 @@ TEST_F(OpusTest, OpusEncodeDecodeMono) { // Encode & decode. int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode_new[kOpusMaxFrameSamples]; - int16_t output_data_decode[kOpusMaxFrameSamples]; + int16_t output_data_decode_new[kOpusNumberOfSamples]; + int16_t output_data_decode[kOpusNumberOfSamples]; int16_t* coded = reinterpret_cast<int16_t*>(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodeNew(opus_mono_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_mono_decoder_, coded, - encoded_bytes, output_data_decode, - &audio_type)); + encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); + EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_mono_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(640, WebRtcOpus_Decode(opus_mono_decoder_, coded, + encoded_bytes, output_data_decode, + &audio_type)); // Data in |output_data_decode_new| should be the same as in // |output_data_decode|. - for (int i = 0; i < kOpus20msFrameSamples; i++) { + for (int i = 0; i < 640; i++) { EXPECT_EQ(output_data_decode_new[i], output_data_decode[i]); } @@ -158,30 +154,26 @@ TEST_F(OpusTest, OpusEncodeDecodeStereo) { // Encode & decode. int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode_new[kOpusMaxFrameSamples]; - int16_t output_data_decode[kOpusMaxFrameSamples]; - int16_t output_data_decode_slave[kOpusMaxFrameSamples]; + int16_t output_data_decode_new[kOpusNumberOfSamples]; + int16_t output_data_decode[kOpusNumberOfSamples]; + int16_t output_data_decode_slave[kOpusNumberOfSamples]; int16_t* coded = reinterpret_cast<int16_t*>(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode, - &audio_type)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode_slave, + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); + EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(640, WebRtcOpus_Decode(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode, &audio_type)); + EXPECT_EQ(640, WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode_slave, + &audio_type)); // Data in |output_data_decode_new| should be the same as in // |output_data_decode| and |output_data_decode_slave| interleaved to a // stereo signal. - for (int i = 0; i < kOpus20msFrameSamples; i++) { + for (int i = 0; i < 640; i++) { EXPECT_EQ(output_data_decode_new[i * 2], output_data_decode[i]); EXPECT_EQ(output_data_decode_new[i * 2 + 1], output_data_decode_slave[i]); } @@ -242,30 +234,26 @@ TEST_F(OpusTest, OpusDecodeInit) { // Encode & decode. int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode_new[kOpusMaxFrameSamples]; - int16_t output_data_decode[kOpusMaxFrameSamples]; - int16_t output_data_decode_slave[kOpusMaxFrameSamples]; + int16_t output_data_decode_new[kOpusNumberOfSamples]; + int16_t output_data_decode[kOpusNumberOfSamples]; + int16_t output_data_decode_slave[kOpusNumberOfSamples]; int16_t* coded = reinterpret_cast<int16_t*>(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode, - &audio_type)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode_slave, + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); + EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(640, WebRtcOpus_Decode(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode, &audio_type)); + EXPECT_EQ(640, WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode_slave, + &audio_type)); // Data in |output_data_decode_new| should be the same as in // |output_data_decode| and |output_data_decode_slave| interleaved to a // stereo signal. - for (int i = 0; i < kOpus20msFrameSamples; i++) { + for (int i = 0; i < 640; i++) { EXPECT_EQ(output_data_decode_new[i * 2], output_data_decode[i]); EXPECT_EQ(output_data_decode_new[i * 2 + 1], output_data_decode_slave[i]); } @@ -274,23 +262,20 @@ TEST_F(OpusTest, OpusDecodeInit) { EXPECT_EQ(0, WebRtcOpus_DecoderInit(opus_stereo_decoder_)); EXPECT_EQ(0, WebRtcOpus_DecoderInitSlave(opus_stereo_decoder_)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode, - &audio_type)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode_slave, + EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(640, WebRtcOpus_Decode(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode, &audio_type)); + EXPECT_EQ(640, WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode_slave, + &audio_type)); // Data in |output_data_decode_new| should be the same as in // |output_data_decode| and |output_data_decode_slave| interleaved to a // stereo signal. - for (int i = 0; i < kOpus20msFrameSamples; i++) { + for (int i = 0; i < 640; i++) { EXPECT_EQ(output_data_decode_new[i * 2], output_data_decode[i]); EXPECT_EQ(output_data_decode_new[i * 2 + 1], output_data_decode_slave[i]); } @@ -359,31 +344,27 @@ TEST_F(OpusTest, OpusDecodePlcMono) { // Encode & decode. int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode_new[kOpusMaxFrameSamples]; - int16_t output_data_decode[kOpusMaxFrameSamples]; + int16_t output_data_decode_new[kOpusNumberOfSamples]; + int16_t output_data_decode[kOpusNumberOfSamples]; int16_t* coded = reinterpret_cast<int16_t*>(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodeNew(opus_mono_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_mono_decoder_, coded, - encoded_bytes, output_data_decode, - &audio_type)); + encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); + EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_mono_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(640, WebRtcOpus_Decode(opus_mono_decoder_, coded, + encoded_bytes, output_data_decode, + &audio_type)); // Call decoder PLC for both versions of the decoder. - int16_t plc_buffer[kOpusMaxFrameSamples]; - int16_t plc_buffer_new[kOpusMaxFrameSamples]; - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodePlcMaster(opus_mono_decoder_, plc_buffer, 1)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodePlc(opus_mono_decoder_new_, plc_buffer_new, 1)); + int16_t plc_buffer[kOpusNumberOfSamples]; + int16_t plc_buffer_new[kOpusNumberOfSamples]; + EXPECT_EQ(640, WebRtcOpus_DecodePlcMaster(opus_mono_decoder_, plc_buffer, 1)); + EXPECT_EQ(640, WebRtcOpus_DecodePlc(opus_mono_decoder_new_, + plc_buffer_new, 1)); // Data in |plc_buffer| should be the same as in |plc_buffer_new|. - for (int i = 0; i < kOpus20msFrameSamples; i++) { + for (int i = 0; i < 640; i++) { EXPECT_EQ(plc_buffer[i], plc_buffer_new[i]); } @@ -410,42 +391,36 @@ TEST_F(OpusTest, OpusDecodePlcStereo) { // Encode & decode. int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode_new[kOpusMaxFrameSamples]; - int16_t output_data_decode[kOpusMaxFrameSamples]; - int16_t output_data_decode_slave[kOpusMaxFrameSamples]; + int16_t output_data_decode_new[kOpusNumberOfSamples]; + int16_t output_data_decode[kOpusNumberOfSamples]; + int16_t output_data_decode_slave[kOpusNumberOfSamples]; int16_t* coded = reinterpret_cast<int16_t*>(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_Decode(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode, - &audio_type)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, - encoded_bytes, - output_data_decode_slave, + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); + EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(640, WebRtcOpus_Decode(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode, &audio_type)); + EXPECT_EQ(640, WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, + encoded_bytes, + output_data_decode_slave, + &audio_type)); // Call decoder PLC for both versions of the decoder. - int16_t plc_buffer_left[kOpusMaxFrameSamples]; - int16_t plc_buffer_right[kOpusMaxFrameSamples]; - int16_t plc_buffer_new[kOpusMaxFrameSamples]; - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodePlcMaster(opus_stereo_decoder_, - plc_buffer_left, 1)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodePlcSlave(opus_stereo_decoder_, - plc_buffer_right, 1)); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DecodePlc(opus_stereo_decoder_new_, plc_buffer_new, 1)); + int16_t plc_buffer_left[kOpusNumberOfSamples]; + int16_t plc_buffer_right[kOpusNumberOfSamples]; + int16_t plc_buffer_new[kOpusNumberOfSamples]; + EXPECT_EQ(640, WebRtcOpus_DecodePlcMaster(opus_stereo_decoder_, + plc_buffer_left, 1)); + EXPECT_EQ(640, WebRtcOpus_DecodePlcSlave(opus_stereo_decoder_, + plc_buffer_right, 1)); + EXPECT_EQ(640, WebRtcOpus_DecodePlc(opus_stereo_decoder_new_, plc_buffer_new, + 1)); // Data in |plc_buffer_left| and |plc_buffer_right|should be the same as the // interleaved samples in |plc_buffer_new|. - for (int i = 0, j = 0; i < kOpus20msFrameSamples; i++) { + for (int i = 0, j = 0; i < 640; i++) { EXPECT_EQ(plc_buffer_left[i], plc_buffer_new[j++]); EXPECT_EQ(plc_buffer_right[i], plc_buffer_new[j++]); } @@ -462,23 +437,21 @@ TEST_F(OpusTest, OpusDurationEstimation) { EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_stereo_encoder_, 2)); EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_stereo_decoder_, 2)); + // Encode with different packet sizes (input 48 kHz, output in 32 kHz). int16_t encoded_bytes; // 10 ms. - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, - kOpus10msFrameSamples, kMaxBytes, - bitstream_); - EXPECT_EQ(kOpus10msFrameSamples, - WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, - encoded_bytes)); + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 480, + kMaxBytes, bitstream_); + EXPECT_EQ(320, WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, + encoded_bytes)); // 20 ms - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, - kOpus20msFrameSamples, kMaxBytes, - bitstream_); - EXPECT_EQ(kOpus20msFrameSamples, - WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, - encoded_bytes)); + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, + kMaxBytes, bitstream_); + EXPECT_EQ(640, WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, + encoded_bytes)); + // Free memory. EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_stereo_encoder_)); diff --git a/modules/audio_coding/main/acm2/audio_coding_module_impl.cc b/modules/audio_coding/main/acm2/audio_coding_module_impl.cc index 5ee211e8..a07e8543 100644 --- a/modules/audio_coding/main/acm2/audio_coding_module_impl.cc +++ b/modules/audio_coding/main/acm2/audio_coding_module_impl.cc @@ -1616,8 +1616,14 @@ int AudioCodingModuleImpl::ReceiveFrequency() const { int codec_id = receiver_.last_audio_codec_id(); - return codec_id < 0 ? receiver_.current_sample_rate_hz() : - ACMCodecDB::database_[codec_id].plfreq; + int sample_rate_hz; + if (codec_id < 0) + sample_rate_hz = receiver_.current_sample_rate_hz(); + else + sample_rate_hz = ACMCodecDB::database_[codec_id].plfreq; + + // TODO(tlegrand): Remove this option when we have full 48 kHz support. + return (sample_rate_hz > 32000) ? 32000 : sample_rate_hz; } // Get current playout frequency. diff --git a/modules/audio_coding/main/test/opus_test.cc b/modules/audio_coding/main/test/opus_test.cc index 398d59da..261eb613 100644 --- a/modules/audio_coding/main/test/opus_test.cc +++ b/modules/audio_coding/main/test/opus_test.cc @@ -218,8 +218,6 @@ void OpusTest::Run(TestPackStereo* channel, int channels, int bitrate, int written_samples = 0; int read_samples = 0; int decoded_samples = 0; - bool first_packet = true; - uint32_t start_time_stamp = 0; channel->reset_payload_size(); counter_ = 0; @@ -326,10 +324,6 @@ void OpusTest::Run(TestPackStereo* channel, int channels, int bitrate, // Send data to the channel. "channel" will handle the loss simulation. channel->SendData(kAudioFrameSpeech, payload_type_, rtp_timestamp_, bitstream, bitstream_len_byte, NULL); - if (first_packet) { - first_packet = false; - start_time_stamp = rtp_timestamp_; - } rtp_timestamp_ += frame_length; read_samples += frame_length * channels; } @@ -350,11 +344,9 @@ void OpusTest::Run(TestPackStereo* channel, int channels, int bitrate, // Write stand-alone speech to file. out_file_standalone_.Write10MsData(out_audio, decoded_samples * channels); - if (audio_frame.timestamp_ > start_time_stamp) { - // Number of channels should be the same for both stand-alone and - // ACM-decoding. - EXPECT_EQ(audio_frame.num_channels_, channels); - } + // Number of channels should be the same for both stand-alone and + // ACM-decoding. + EXPECT_EQ(audio_frame.num_channels_, channels); decoded_samples = 0; } @@ -375,13 +367,13 @@ void OpusTest::OpenOutFile(int test_number) { file_stream << webrtc::test::OutputPath() << "opustest_out_" << test_number << ".pcm"; file_name = file_stream.str(); - out_file_.Open(file_name, 48000, "wb"); + out_file_.Open(file_name, 32000, "wb"); file_stream.str(""); file_name = file_stream.str(); file_stream << webrtc::test::OutputPath() << "opusstandalone_out_" << test_number << ".pcm"; file_name = file_stream.str(); - out_file_standalone_.Open(file_name, 48000, "wb"); + out_file_standalone_.Open(file_name, 32000, "wb"); } } // namespace webrtc diff --git a/modules/audio_coding/neteq/audio_decoder.cc b/modules/audio_coding/neteq/audio_decoder.cc index 0fdaa44b..f539bb2e 100644 --- a/modules/audio_coding/neteq/audio_decoder.cc +++ b/modules/audio_coding/neteq/audio_decoder.cc @@ -162,7 +162,7 @@ int AudioDecoder::CodecSampleRateHz(NetEqDecoder codec_type) { #ifdef WEBRTC_CODEC_OPUS case kDecoderOpus: case kDecoderOpus_2ch: { - return 48000; + return 32000; } #endif case kDecoderCNGswb48kHz: { diff --git a/modules/audio_coding/neteq/audio_decoder_unittest.cc b/modules/audio_coding/neteq/audio_decoder_unittest.cc index 7eb31423..f82644cb 100644 --- a/modules/audio_coding/neteq/audio_decoder_unittest.cc +++ b/modules/audio_coding/neteq/audio_decoder_unittest.cc @@ -607,7 +607,7 @@ class AudioDecoderCeltStereoTest : public AudioDecoderTest { class AudioDecoderOpusTest : public AudioDecoderTest { protected: AudioDecoderOpusTest() : AudioDecoderTest() { - frame_size_ = 480; + frame_size_ = 320; data_length_ = 10 * frame_size_; decoder_ = new AudioDecoderOpus(kDecoderOpus); assert(decoder_); @@ -618,69 +618,75 @@ class AudioDecoderOpusTest : public AudioDecoderTest { WebRtcOpus_EncoderFree(encoder_); } - virtual void SetUp() OVERRIDE { - AudioDecoderTest::SetUp(); + virtual void InitEncoder() {} + + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, + uint8_t* output) { // Upsample from 32 to 48 kHz. - // Because Opus is 48 kHz codec but the input file is 32 kHz, so the data - // read in |AudioDecoderTest::SetUp| has to be upsampled. - // |AudioDecoderTest::SetUp| has read |data_length_| samples, which is more - // than necessary after upsampling, so the end of audio that has been read - // is unused and the end of the buffer is overwritten by the resampled data. Resampler rs; rs.Reset(32000, 48000, kResamplerSynchronous); - const int before_resamp_len_samples = static_cast<int>(data_length_) * 2 - / 3; - int16_t* before_resamp_input = new int16_t[before_resamp_len_samples]; - memcpy(before_resamp_input, input_, - sizeof(int16_t) * before_resamp_len_samples); + const int max_resamp_len_samples = static_cast<int>(input_len_samples) * + 3 / 2; + int16_t* resamp_input = new int16_t[max_resamp_len_samples]; int resamp_len_samples; - EXPECT_EQ(0, rs.Push(before_resamp_input, before_resamp_len_samples, - input_, static_cast<int>(data_length_), + EXPECT_EQ(0, rs.Push(input, static_cast<int>(input_len_samples), + resamp_input, max_resamp_len_samples, resamp_len_samples)); - EXPECT_EQ(static_cast<int>(data_length_), resamp_len_samples); - delete[] before_resamp_input; - } - - virtual void InitEncoder() {} - - virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, - uint8_t* output) OVERRIDE { - int enc_len_bytes = WebRtcOpus_Encode(encoder_, const_cast<int16_t*>(input), - static_cast<int16_t>(input_len_samples), - static_cast<int16_t>(data_length_), output); + EXPECT_EQ(max_resamp_len_samples, resamp_len_samples); + int enc_len_bytes = + WebRtcOpus_Encode(encoder_, resamp_input, resamp_len_samples, + static_cast<int>(data_length_), output); EXPECT_GT(enc_len_bytes, 0); + delete [] resamp_input; return enc_len_bytes; } OpusEncInst* encoder_; }; -class AudioDecoderOpusStereoTest : public AudioDecoderOpusTest { +class AudioDecoderOpusStereoTest : public AudioDecoderTest { protected: - AudioDecoderOpusStereoTest() : AudioDecoderOpusTest() { + AudioDecoderOpusStereoTest() : AudioDecoderTest() { channels_ = 2; - WebRtcOpus_EncoderFree(encoder_); - delete decoder_; + frame_size_ = 320; + data_length_ = 10 * frame_size_; decoder_ = new AudioDecoderOpus(kDecoderOpus_2ch); assert(decoder_); WebRtcOpus_EncoderCreate(&encoder_, 2); } + ~AudioDecoderOpusStereoTest() { + WebRtcOpus_EncoderFree(encoder_); + } + + virtual void InitEncoder() {} + virtual int EncodeFrame(const int16_t* input, size_t input_len_samples, - uint8_t* output) OVERRIDE { + uint8_t* output) { // Create stereo by duplicating each sample in |input|. const int input_stereo_samples = static_cast<int>(input_len_samples) * 2; int16_t* input_stereo = new int16_t[input_stereo_samples]; for (size_t i = 0; i < input_len_samples; i++) input_stereo[i * 2] = input_stereo[i * 2 + 1] = input[i]; - - int enc_len_bytes = WebRtcOpus_Encode( - encoder_, input_stereo, static_cast<int16_t>(input_len_samples), - static_cast<int16_t>(data_length_), output); + // Upsample from 32 to 48 kHz. + Resampler rs; + rs.Reset(32000, 48000, kResamplerSynchronousStereo); + const int max_resamp_len_samples = input_stereo_samples * 3 / 2; + int16_t* resamp_input = new int16_t[max_resamp_len_samples]; + int resamp_len_samples; + EXPECT_EQ(0, rs.Push(input_stereo, input_stereo_samples, resamp_input, + max_resamp_len_samples, resamp_len_samples)); + EXPECT_EQ(max_resamp_len_samples, resamp_len_samples); + int enc_len_bytes = + WebRtcOpus_Encode(encoder_, resamp_input, resamp_len_samples / 2, + static_cast<int16_t>(data_length_), output); EXPECT_GT(enc_len_bytes, 0); - delete[] input_stereo; + delete [] resamp_input; + delete [] input_stereo; return enc_len_bytes; } + + OpusEncInst* encoder_; }; TEST_F(AudioDecoderPcmUTest, EncodeDecode) { @@ -870,11 +876,11 @@ TEST(AudioDecoder, CodecSampleRateHz) { EXPECT_EQ(8000, AudioDecoder::CodecSampleRateHz(kDecoderCNGnb)); EXPECT_EQ(16000, AudioDecoder::CodecSampleRateHz(kDecoderCNGwb)); EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderCNGswb32kHz)); - EXPECT_EQ(48000, AudioDecoder::CodecSampleRateHz(kDecoderOpus)); - EXPECT_EQ(48000, AudioDecoder::CodecSampleRateHz(kDecoderOpus_2ch)); // TODO(tlegrand): Change 32000 to 48000 below once ACM has 48 kHz support. EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderCNGswb48kHz)); EXPECT_EQ(-1, AudioDecoder::CodecSampleRateHz(kDecoderArbitrary)); + EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderOpus)); + EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderOpus_2ch)); #ifdef WEBRTC_CODEC_CELT EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderCELT_32)); EXPECT_EQ(32000, AudioDecoder::CodecSampleRateHz(kDecoderCELT_32_2ch)); diff --git a/modules/audio_coding/neteq/payload_splitter_unittest.cc b/modules/audio_coding/neteq/payload_splitter_unittest.cc index 9d0aaa1d..5cde1bda 100644 --- a/modules/audio_coding/neteq/payload_splitter_unittest.cc +++ b/modules/audio_coding/neteq/payload_splitter_unittest.cc @@ -743,7 +743,7 @@ TEST(FecPayloadSplitter, MixedPayload) { // Check first packet. packet = packet_list.front(); EXPECT_EQ(0, packet->header.payloadType); - EXPECT_EQ(kBaseTimestamp - 20 * 48, packet->header.timestamp); + EXPECT_EQ(kBaseTimestamp - 20 * 32, packet->header.timestamp); EXPECT_EQ(10, packet->payload_length); EXPECT_FALSE(packet->primary); delete [] packet->payload; diff --git a/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc b/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc index 66a448a1..ad6d8ece 100644 --- a/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc +++ b/modules/audio_coding/neteq/test/neteq_opus_fec_quality_test.cc @@ -22,7 +22,8 @@ namespace webrtc { namespace test { static const int kOpusBlockDurationMs = 20; -static const int kOpusSamplingKhz = 48; +static const int kOpusInputSamplingKhz = 48; +static const int kOpusOutputSamplingKhz = 32; static bool ValidateInFilename(const char* flagname, const string& value) { FILE* fid = fopen(value.c_str(), "rb"); @@ -116,8 +117,8 @@ class NetEqOpusFecQualityTest : public NetEqQualityTest { }; NetEqOpusFecQualityTest::NetEqOpusFecQualityTest() - : NetEqQualityTest(kOpusBlockDurationMs, kOpusSamplingKhz, - kOpusSamplingKhz, + : NetEqQualityTest(kOpusBlockDurationMs, kOpusInputSamplingKhz, + kOpusOutputSamplingKhz, (FLAGS_channels == 1) ? kDecoderOpus : kDecoderOpus_2ch, FLAGS_channels, 0.0f, FLAGS_in_filename, FLAGS_out_filename), diff --git a/modules/audio_coding/neteq/timestamp_scaler.cc b/modules/audio_coding/neteq/timestamp_scaler.cc index 1809324b..01890136 100644 --- a/modules/audio_coding/neteq/timestamp_scaler.cc +++ b/modules/audio_coding/neteq/timestamp_scaler.cc @@ -48,6 +48,8 @@ uint32_t TimestampScaler::ToInternal(uint32_t external_timestamp, denominator_ = 1; break; } + case kDecoderOpus: + case kDecoderOpus_2ch: case kDecoderISACfb: case kDecoderCNGswb48kHz: { // Use timestamp scaling with factor 2/3 (32 kHz sample rate, but RTP diff --git a/modules/audio_coding/neteq/timestamp_scaler_unittest.cc b/modules/audio_coding/neteq/timestamp_scaler_unittest.cc index 1cbbf7f3..8cbbfa39 100644 --- a/modules/audio_coding/neteq/timestamp_scaler_unittest.cc +++ b/modules/audio_coding/neteq/timestamp_scaler_unittest.cc @@ -252,14 +252,10 @@ TEST(TimestampScaler, TestG722Reset) { EXPECT_CALL(db, Die()); // Called when database object is deleted. } -// TODO(minyue): This test becomes trivial since Opus does not need a timestamp -// scaler. Therefore, this test may be removed in future. There is no harm to -// keep it, since it can be taken as a test case for the situation of a trivial -// timestamp scaler. TEST(TimestampScaler, TestOpusLargeStep) { MockDecoderDatabase db; DecoderDatabase::DecoderInfo info; - info.codec_type = kDecoderOpus; + info.codec_type = kDecoderOpus; // Uses a factor 2/3 scaling. static const uint8_t kRtpPayloadType = 17; EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) .WillRepeatedly(Return(&info)); @@ -277,7 +273,8 @@ TEST(TimestampScaler, TestOpusLargeStep) { scaler.ToInternal(external_timestamp, kRtpPayloadType)); // Scale back. EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); - internal_timestamp += kStep; + // Internal timestamp should be incremented with twice the step. + internal_timestamp += 2 * kStep / 3; } EXPECT_CALL(db, Die()); // Called when database object is deleted. @@ -286,7 +283,7 @@ TEST(TimestampScaler, TestOpusLargeStep) { TEST(TimestampScaler, TestIsacFbLargeStep) { MockDecoderDatabase db; DecoderDatabase::DecoderInfo info; - info.codec_type = kDecoderISACfb; + info.codec_type = kDecoderISACfb; // Uses a factor 2/3 scaling. static const uint8_t kRtpPayloadType = 17; EXPECT_CALL(db, GetDecoderInfo(kRtpPayloadType)) .WillRepeatedly(Return(&info)); @@ -304,7 +301,7 @@ TEST(TimestampScaler, TestIsacFbLargeStep) { scaler.ToInternal(external_timestamp, kRtpPayloadType)); // Scale back. EXPECT_EQ(external_timestamp, scaler.ToExternal(internal_timestamp)); - // Internal timestamp should be incremented with two-thirds the step. + // Internal timestamp should be incremented with twice the step. internal_timestamp += 2 * kStep / 3; } |