diff options
Diffstat (limited to 'modules/audio_coding/codecs/opus')
-rw-r--r-- | modules/audio_coding/codecs/opus/opus_fec_test.cc | 35 | ||||
-rw-r--r-- | modules/audio_coding/codecs/opus/opus_interface.c | 259 | ||||
-rw-r--r-- | modules/audio_coding/codecs/opus/opus_speed_test.cc | 7 | ||||
-rw-r--r-- | modules/audio_coding/codecs/opus/opus_unittest.cc | 241 |
4 files changed, 181 insertions, 361 deletions
diff --git a/modules/audio_coding/codecs/opus/opus_fec_test.cc b/modules/audio_coding/codecs/opus/opus_fec_test.cc index fb4cb04f..ee027e80 100644 --- a/modules/audio_coding/codecs/opus/opus_fec_test.cc +++ b/modules/audio_coding/codecs/opus/opus_fec_test.cc @@ -32,8 +32,7 @@ struct mode { }; const int kOpusBlockDurationMs = 20; -const int kOpusInputSamplingKhz = 48; -const int kOpusOutputSamplingKhz = 32; +const int kOpusSamplingKhz = 48; class OpusFecTest : public TestWithParam<coding_param> { protected: @@ -47,14 +46,8 @@ class OpusFecTest : public TestWithParam<coding_param> { virtual void DecodeABlock(bool lost_previous, bool lost_current); int block_duration_ms_; - int input_sampling_khz_; - int output_sampling_khz_; - - // Number of samples-per-channel in a frame. - int input_length_sample_; - - // Expected output number of samples-per-channel in a frame. - int output_length_sample_; + int sampling_khz_; + int block_length_sample_; int channels_; int bit_rate_; @@ -91,7 +84,7 @@ void OpusFecTest::SetUp() { // Allocate memory to contain the whole file. in_data_.reset(new int16_t[loop_length_samples_ + - input_length_sample_ * channels_]); + block_length_sample_ * channels_]); // Copy the file into the buffer. ASSERT_EQ(fread(&in_data_[0], sizeof(int16_t), loop_length_samples_, fp), @@ -104,12 +97,12 @@ void OpusFecTest::SetUp() { // beginning of the array. Audio frames cross the end of the excerpt always // appear as a continuum of memory. memcpy(&in_data_[loop_length_samples_], &in_data_[0], - input_length_sample_ * channels_ * sizeof(int16_t)); + block_length_sample_ * channels_ * sizeof(int16_t)); // Maximum number of bytes in output bitstream. - max_bytes_ = input_length_sample_ * channels_ * sizeof(int16_t); + max_bytes_ = block_length_sample_ * channels_ * sizeof(int16_t); - out_data_.reset(new int16_t[2 * output_length_sample_ * channels_]); + out_data_.reset(new int16_t[2 * block_length_sample_ * channels_]); bit_stream_.reset(new uint8_t[max_bytes_]); // Create encoder memory. @@ -127,10 +120,8 @@ void OpusFecTest::TearDown() { OpusFecTest::OpusFecTest() : block_duration_ms_(kOpusBlockDurationMs), - input_sampling_khz_(kOpusInputSamplingKhz), - output_sampling_khz_(kOpusOutputSamplingKhz), - input_length_sample_(block_duration_ms_ * input_sampling_khz_), - output_length_sample_(block_duration_ms_ * output_sampling_khz_), + sampling_khz_(kOpusSamplingKhz), + block_length_sample_(block_duration_ms_ * sampling_khz_), data_pointer_(0), max_bytes_(0), encoded_bytes_(0), @@ -141,7 +132,7 @@ OpusFecTest::OpusFecTest() void OpusFecTest::EncodeABlock() { int16_t value = WebRtcOpus_Encode(opus_encoder_, &in_data_[data_pointer_], - input_length_sample_, + block_length_sample_, max_bytes_, &bit_stream_[0]); EXPECT_GT(value, 0); @@ -162,7 +153,7 @@ void OpusFecTest::DecodeABlock(bool lost_previous, bool lost_current) { } else { value_1 = WebRtcOpus_DecodePlc(opus_decoder_, &out_data_[0], 1); } - EXPECT_EQ(output_length_sample_, value_1); + EXPECT_EQ(block_length_sample_, value_1); } if (!lost_current) { @@ -171,7 +162,7 @@ void OpusFecTest::DecodeABlock(bool lost_previous, bool lost_current) { encoded_bytes_, &out_data_[value_1 * channels_], &audio_type); - EXPECT_EQ(output_length_sample_, value_2); + EXPECT_EQ(block_length_sample_, value_2); } } @@ -224,7 +215,7 @@ TEST_P(OpusFecTest, RandomPacketLossTest) { // |data_pointer_| is incremented and wrapped across // |loop_length_samples_|. - data_pointer_ = (data_pointer_ + input_length_sample_ * channels_) % + data_pointer_ = (data_pointer_ + block_length_sample_ * channels_) % loop_length_samples_; } if (mode_set[i].fec) { diff --git a/modules/audio_coding/codecs/opus/opus_interface.c b/modules/audio_coding/codecs/opus/opus_interface.c index 24fc4fc4..ea535ea9 100644 --- a/modules/audio_coding/codecs/opus/opus_interface.c +++ b/modules/audio_coding/codecs/opus/opus_interface.c @@ -15,9 +15,6 @@ #include "opus.h" -#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h" -#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" - enum { /* Maximum supported frame size in WebRTC is 60 ms. */ kWebRtcOpusMaxEncodeFrameSizeMs = 60, @@ -31,17 +28,6 @@ enum { * milliseconds. */ kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs, - /* Maximum sample count per frame is 48 kHz * maximum frame size in - * milliseconds * maximum number of channels. */ - kWebRtcOpusMaxFrameSize = kWebRtcOpusMaxFrameSizePerChannel * 2, - - /* Maximum sample count per channel for output resampled to 32 kHz, - * 32 kHz * maximum frame size in milliseconds. */ - kWebRtcOpusMaxFrameSizePerChannel32kHz = 32 * kWebRtcOpusMaxDecodeFrameSizeMs, - - /* Number of samples in resampler state. */ - kWebRtcOpusStateSize = 7, - /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */ kWebRtcOpusDefaultFrameSize = 960, }; @@ -143,8 +129,6 @@ int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) { } struct WebRtcOpusDecInst { - int16_t state_48_32_left[8]; - int16_t state_48_32_right[8]; OpusDecoder* decoder_left; OpusDecoder* decoder_right; int prev_decoded_samples; @@ -205,8 +189,6 @@ int WebRtcOpus_DecoderChannels(OpusDecInst* inst) { int16_t WebRtcOpus_DecoderInitNew(OpusDecInst* inst) { int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE); if (error == OPUS_OK) { - memset(inst->state_48_32_left, 0, sizeof(inst->state_48_32_left)); - memset(inst->state_48_32_right, 0, sizeof(inst->state_48_32_right)); return 0; } return -1; @@ -215,7 +197,6 @@ int16_t WebRtcOpus_DecoderInitNew(OpusDecInst* inst) { int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) { int error = opus_decoder_ctl(inst->decoder_left, OPUS_RESET_STATE); if (error == OPUS_OK) { - memset(inst->state_48_32_left, 0, sizeof(inst->state_48_32_left)); return 0; } return -1; @@ -224,7 +205,6 @@ int16_t WebRtcOpus_DecoderInit(OpusDecInst* inst) { int16_t WebRtcOpus_DecoderInitSlave(OpusDecInst* inst) { int error = opus_decoder_ctl(inst->decoder_right, OPUS_RESET_STATE); if (error == OPUS_OK) { - memset(inst->state_48_32_right, 0, sizeof(inst->state_48_32_right)); return 0; } return -1; @@ -267,124 +247,29 @@ static int DecodeFec(OpusDecoder* inst, const int16_t* encoded, return -1; } -/* Resample from 48 to 32 kHz. Length of state is assumed to be - * kWebRtcOpusStateSize (7). - */ -static int WebRtcOpus_Resample48to32(const int16_t* samples_in, int length, - int16_t* state, int16_t* samples_out) { - int i; - int blocks; - int16_t output_samples; - int32_t buffer32[kWebRtcOpusMaxFrameSizePerChannel + kWebRtcOpusStateSize]; - - /* Resample from 48 kHz to 32 kHz. */ - for (i = 0; i < kWebRtcOpusStateSize; i++) { - buffer32[i] = state[i]; - state[i] = samples_in[length - kWebRtcOpusStateSize + i]; - } - for (i = 0; i < length; i++) { - buffer32[kWebRtcOpusStateSize + i] = samples_in[i]; - } - /* Resampling 3 samples to 2. Function divides the input in |blocks| number - * of 3-sample groups, and output is |blocks| number of 2-sample groups. - * When this is removed, the compensation in WebRtcOpus_DurationEst should be - * removed too. */ - blocks = length / 3; - WebRtcSpl_Resample48khzTo32khz(buffer32, buffer32, blocks); - output_samples = (int16_t) (blocks * 2); - WebRtcSpl_VectorBitShiftW32ToW16(samples_out, output_samples, buffer32, 15); - - return output_samples; -} - -static int WebRtcOpus_DeInterleaveResample(OpusDecInst* inst, int16_t* input, - int sample_pairs, int16_t* output) { - int i; - int16_t buffer_left[kWebRtcOpusMaxFrameSizePerChannel]; - int16_t buffer_right[kWebRtcOpusMaxFrameSizePerChannel]; - int16_t buffer_out[kWebRtcOpusMaxFrameSizePerChannel32kHz]; - int resampled_samples; - - /* De-interleave the signal in left and right channel. */ - for (i = 0; i < sample_pairs; i++) { - /* Take every second sample, starting at the first sample. */ - buffer_left[i] = input[i * 2]; - buffer_right[i] = input[i * 2 + 1]; - } - - /* Resample from 48 kHz to 32 kHz for left channel. */ - resampled_samples = WebRtcOpus_Resample48to32( - buffer_left, sample_pairs, inst->state_48_32_left, buffer_out); - - /* Add samples interleaved to output vector. */ - for (i = 0; i < resampled_samples; i++) { - output[i * 2] = buffer_out[i]; - } - - /* Resample from 48 kHz to 32 kHz for right channel. */ - resampled_samples = WebRtcOpus_Resample48to32( - buffer_right, sample_pairs, inst->state_48_32_right, buffer_out); - - /* Add samples interleaved to output vector. */ - for (i = 0; i < resampled_samples; i++) { - output[i * 2 + 1] = buffer_out[i]; - } - - return resampled_samples; -} - int16_t WebRtcOpus_DecodeNew(OpusDecInst* inst, const uint8_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { - /* |buffer| is big enough for 120 ms (the largest Opus packet size) of stereo - * audio at 48 kHz. */ - int16_t buffer[kWebRtcOpusMaxFrameSize]; int16_t* coded = (int16_t*)encoded; int decoded_samples; - int resampled_samples; - - /* If mono case, just do a regular call to the decoder. - * If stereo, we need to de-interleave the stereo output into blocks with - * left and right channel. Each block is resampled to 32 kHz, and then - * interleaved again. */ - /* Decode to a temporary buffer. */ decoded_samples = DecodeNative(inst->decoder_left, coded, encoded_bytes, kWebRtcOpusMaxFrameSizePerChannel, - buffer, audio_type); + decoded, audio_type); if (decoded_samples < 0) { return -1; } - if (inst->channels == 2) { - /* De-interleave and resample. */ - resampled_samples = WebRtcOpus_DeInterleaveResample(inst, - buffer, - decoded_samples, - decoded); - } else { - /* Resample from 48 kHz to 32 kHz. Filter state memory for left channel is - * used for mono signals. */ - resampled_samples = WebRtcOpus_Resample48to32(buffer, - decoded_samples, - inst->state_48_32_left, - decoded); - } - /* Update decoded sample memory, to be used by the PLC in case of losses. */ inst->prev_decoded_samples = decoded_samples; - return resampled_samples; + return decoded_samples; } int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { - /* |buffer16| is big enough for 120 ms (the largestOpus packet size) of - * stereo audio at 48 kHz. */ - int16_t buffer16[kWebRtcOpusMaxFrameSize]; int decoded_samples; - int16_t output_samples; int i; /* If mono case, just do a regular call to the decoder. @@ -393,120 +278,82 @@ int16_t WebRtcOpus_Decode(OpusDecInst* inst, const int16_t* encoded, * This is to make stereo work with the current setup of NetEQ, which * requires two calls to the decoder to produce stereo. */ - /* Decode to a temporary buffer. */ decoded_samples = DecodeNative(inst->decoder_left, encoded, encoded_bytes, - kWebRtcOpusMaxFrameSizePerChannel, buffer16, + kWebRtcOpusMaxFrameSizePerChannel, decoded, audio_type); if (decoded_samples < 0) { return -1; } if (inst->channels == 2) { /* The parameter |decoded_samples| holds the number of samples pairs, in - * case of stereo. Number of samples in |buffer16| equals |decoded_samples| + * case of stereo. Number of samples in |decoded| equals |decoded_samples| * times 2. */ for (i = 0; i < decoded_samples; i++) { /* Take every second sample, starting at the first sample. This gives * the left channel. */ - buffer16[i] = buffer16[i * 2]; + decoded[i] = decoded[i * 2]; } } - /* Resample from 48 kHz to 32 kHz. */ - output_samples = WebRtcOpus_Resample48to32(buffer16, decoded_samples, - inst->state_48_32_left, decoded); - /* Update decoded sample memory, to be used by the PLC in case of losses. */ inst->prev_decoded_samples = decoded_samples; - return output_samples; + return decoded_samples; } int16_t WebRtcOpus_DecodeSlave(OpusDecInst* inst, const int16_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { - /* |buffer16| is big enough for 120 ms (the largestOpus packet size) of - * stereo audio at 48 kHz. */ - int16_t buffer16[kWebRtcOpusMaxFrameSize]; int decoded_samples; - int16_t output_samples; int i; - /* Decode to a temporary buffer. */ decoded_samples = DecodeNative(inst->decoder_right, encoded, encoded_bytes, - kWebRtcOpusMaxFrameSizePerChannel, buffer16, + kWebRtcOpusMaxFrameSizePerChannel, decoded, audio_type); if (decoded_samples < 0) { return -1; } if (inst->channels == 2) { /* The parameter |decoded_samples| holds the number of samples pairs, in - * case of stereo. Number of samples in |buffer16| equals |decoded_samples| + * case of stereo. Number of samples in |decoded| equals |decoded_samples| * times 2. */ for (i = 0; i < decoded_samples; i++) { /* Take every second sample, starting at the second sample. This gives * the right channel. */ - buffer16[i] = buffer16[i * 2 + 1]; + decoded[i] = decoded[i * 2 + 1]; } } else { /* Decode slave should never be called for mono packets. */ return -1; } - /* Resample from 48 kHz to 32 kHz. */ - output_samples = WebRtcOpus_Resample48to32(buffer16, decoded_samples, - inst->state_48_32_right, decoded); - return output_samples; + return decoded_samples; } int16_t WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded, int16_t number_of_lost_frames) { - int16_t buffer[kWebRtcOpusMaxFrameSize]; int16_t audio_type = 0; int decoded_samples; - int resampled_samples; int plc_samples; - /* If mono case, just do a regular call to the plc function, before - * resampling. - * If stereo, we need to de-interleave the stereo output into blocks with - * left and right channel. Each block is resampled to 32 kHz, and then - * interleaved again. */ - - /* Decode to a temporary buffer. The number of samples we ask for is - * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number - * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */ + /* The number of samples we ask for is |number_of_lost_frames| times + * |prev_decoded_samples_|. Limit the number of samples to maximum + * |kWebRtcOpusMaxFrameSizePerChannel|. */ plc_samples = number_of_lost_frames * inst->prev_decoded_samples; plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel; decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples, - buffer, &audio_type); + decoded, &audio_type); if (decoded_samples < 0) { return -1; } - if (inst->channels == 2) { - /* De-interleave and resample. */ - resampled_samples = WebRtcOpus_DeInterleaveResample(inst, - buffer, - decoded_samples, - decoded); - } else { - /* Resample from 48 kHz to 32 kHz. Filter state memory for left channel is - * used for mono signals. */ - resampled_samples = WebRtcOpus_Resample48to32(buffer, - decoded_samples, - inst->state_48_32_left, - decoded); - } - - return resampled_samples; + return decoded_samples; } int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded, int16_t number_of_lost_frames) { - int16_t buffer[kWebRtcOpusMaxFrameSize]; int decoded_samples; - int resampled_samples; int16_t audio_type = 0; int plc_samples; int i; @@ -517,42 +364,35 @@ int16_t WebRtcOpus_DecodePlcMaster(OpusDecInst* inst, int16_t* decoded, * output. This is to make stereo work with the current setup of NetEQ, which * requires two calls to the decoder to produce stereo. */ - /* Decode to a temporary buffer. The number of samples we ask for is - * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number - * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */ + /* The number of samples we ask for is |number_of_lost_frames| times + * |prev_decoded_samples_|. Limit the number of samples to maximum + * |kWebRtcOpusMaxFrameSizePerChannel|. */ plc_samples = number_of_lost_frames * inst->prev_decoded_samples; plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel; decoded_samples = DecodeNative(inst->decoder_left, NULL, 0, plc_samples, - buffer, &audio_type); + decoded, &audio_type); if (decoded_samples < 0) { return -1; } if (inst->channels == 2) { /* The parameter |decoded_samples| holds the number of sample pairs, in - * case of stereo. The original number of samples in |buffer| equals + * case of stereo. The original number of samples in |decoded| equals * |decoded_samples| times 2. */ for (i = 0; i < decoded_samples; i++) { /* Take every second sample, starting at the first sample. This gives * the left channel. */ - buffer[i] = buffer[i * 2]; + decoded[i] = decoded[i * 2]; } } - /* Resample from 48 kHz to 32 kHz for left channel. */ - resampled_samples = WebRtcOpus_Resample48to32(buffer, - decoded_samples, - inst->state_48_32_left, - decoded); - return resampled_samples; + return decoded_samples; } int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded, int16_t number_of_lost_frames) { - int16_t buffer[kWebRtcOpusMaxFrameSize]; int decoded_samples; - int resampled_samples; int16_t audio_type = 0; int plc_samples; int i; @@ -563,44 +403,35 @@ int16_t WebRtcOpus_DecodePlcSlave(OpusDecInst* inst, int16_t* decoded, return -1; } - /* Decode to a temporary buffer. The number of samples we ask for is - * |number_of_lost_frames| times |prev_decoded_samples_|. Limit the number - * of samples to maximum |kWebRtcOpusMaxFrameSizePerChannel|. */ + /* The number of samples we ask for is |number_of_lost_frames| times + * |prev_decoded_samples_|. Limit the number of samples to maximum + * |kWebRtcOpusMaxFrameSizePerChannel|. */ plc_samples = number_of_lost_frames * inst->prev_decoded_samples; plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ? plc_samples : kWebRtcOpusMaxFrameSizePerChannel; decoded_samples = DecodeNative(inst->decoder_right, NULL, 0, plc_samples, - buffer, &audio_type); + decoded, &audio_type); if (decoded_samples < 0) { return -1; } /* The parameter |decoded_samples| holds the number of sample pairs, - * The original number of samples in |buffer| equals |decoded_samples| + * The original number of samples in |decoded| equals |decoded_samples| * times 2. */ for (i = 0; i < decoded_samples; i++) { /* Take every second sample, starting at the second sample. This gives * the right channel. */ - buffer[i] = buffer[i * 2 + 1]; + decoded[i] = decoded[i * 2 + 1]; } - /* Resample from 48 kHz to 32 kHz for left channel. */ - resampled_samples = WebRtcOpus_Resample48to32(buffer, - decoded_samples, - inst->state_48_32_right, - decoded); - return resampled_samples; + return decoded_samples; } int16_t WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded, int16_t encoded_bytes, int16_t* decoded, int16_t* audio_type) { - /* |buffer| is big enough for 120 ms (the largest Opus packet size) of stereo - * audio at 48 kHz. */ - int16_t buffer[kWebRtcOpusMaxFrameSize]; int16_t* coded = (int16_t*)encoded; int decoded_samples; - int resampled_samples; int fec_samples; if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) { @@ -609,33 +440,13 @@ int16_t WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded, fec_samples = opus_packet_get_samples_per_frame(encoded, 48000); - /* Decode to a temporary buffer. */ decoded_samples = DecodeFec(inst->decoder_left, coded, encoded_bytes, - fec_samples, buffer, audio_type); + fec_samples, decoded, audio_type); if (decoded_samples < 0) { return -1; } - /* If mono case, just do a regular call to the decoder. - * If stereo, we need to de-interleave the stereo output into blocks with - * left and right channel. Each block is resampled to 32 kHz, and then - * interleaved again. */ - if (inst->channels == 2) { - /* De-interleave and resample. */ - resampled_samples = WebRtcOpus_DeInterleaveResample(inst, - buffer, - decoded_samples, - decoded); - } else { - /* Resample from 48 kHz to 32 kHz. Filter state memory for left channel is - * used for mono signals. */ - resampled_samples = WebRtcOpus_Resample48to32(buffer, - decoded_samples, - inst->state_48_32_left, - decoded); - } - - return resampled_samples; + return decoded_samples; } int WebRtcOpus_DurationEst(OpusDecInst* inst, @@ -652,10 +463,6 @@ int WebRtcOpus_DurationEst(OpusDecInst* inst, /* Invalid payload duration. */ return 0; } - /* Compensate for the down-sampling from 48 kHz to 32 kHz. - * This should be removed when the resampling in WebRtcOpus_Decode is - * removed. */ - samples = samples * 2 / 3; return samples; } @@ -671,10 +478,6 @@ int WebRtcOpus_FecDurationEst(const uint8_t* payload, /* Invalid payload duration. */ return 0; } - /* Compensate for the down-sampling from 48 kHz to 32 kHz. - * This should be removed when the resampling in WebRtcOpus_Decode is - * removed. */ - samples = samples * 2 / 3; return samples; } diff --git a/modules/audio_coding/codecs/opus/opus_speed_test.cc b/modules/audio_coding/codecs/opus/opus_speed_test.cc index 16099c6d..e2439cf5 100644 --- a/modules/audio_coding/codecs/opus/opus_speed_test.cc +++ b/modules/audio_coding/codecs/opus/opus_speed_test.cc @@ -18,8 +18,7 @@ using ::testing::ValuesIn; namespace webrtc { static const int kOpusBlockDurationMs = 20; -static const int kOpusInputSamplingKhz = 48; -static const int kOpustOutputSamplingKhz = 32; +static const int kOpusSamplingKhz = 48; class OpusSpeedTest : public AudioCodecSpeedTest { protected: @@ -36,8 +35,8 @@ class OpusSpeedTest : public AudioCodecSpeedTest { OpusSpeedTest::OpusSpeedTest() : AudioCodecSpeedTest(kOpusBlockDurationMs, - kOpusInputSamplingKhz, - kOpustOutputSamplingKhz), + kOpusSamplingKhz, + kOpusSamplingKhz), opus_encoder_(NULL), opus_decoder_(NULL) { } diff --git a/modules/audio_coding/codecs/opus/opus_unittest.cc b/modules/audio_coding/codecs/opus/opus_unittest.cc index ed876cd1..2ec77a53 100644 --- a/modules/audio_coding/codecs/opus/opus_unittest.cc +++ b/modules/audio_coding/codecs/opus/opus_unittest.cc @@ -19,9 +19,13 @@ struct WebRtcOpusDecInst; namespace webrtc { // Number of samples in a 60 ms stereo frame, sampled at 48 kHz. -const int kOpusNumberOfSamples = 480 * 6 * 2; +const int kOpusMaxFrameSamples = 48 * 60 * 2; // Maximum number of bytes in output bitstream. const size_t kMaxBytes = 1000; +// Number of samples-per-channel in a 20 ms frame, sampled at 48 kHz. +const int kOpus20msFrameSamples = 48 * 20; +// Number of samples-per-channel in a 10 ms frame, sampled at 48 kHz. +const int kOpus10msFrameSamples = 48 * 10; class OpusTest : public ::testing::Test { protected: @@ -35,8 +39,8 @@ class OpusTest : public ::testing::Test { WebRtcOpusDecInst* opus_stereo_decoder_; WebRtcOpusDecInst* opus_stereo_decoder_new_; - int16_t speech_data_[kOpusNumberOfSamples]; - int16_t output_data_[kOpusNumberOfSamples]; + int16_t speech_data_[kOpusMaxFrameSamples]; + int16_t output_data_[kOpusMaxFrameSamples]; uint8_t bitstream_[kMaxBytes]; }; @@ -50,17 +54,14 @@ OpusTest::OpusTest() } void OpusTest::SetUp() { - // Read some samples from a speech file, to be used in the encode test. - // In this test we do not care that the sampling frequency of the file is - // really 32000 Hz. We pretend that it is 48000 Hz. FILE* input_file; const std::string file_name = - webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + webrtc::test::ResourcePath("audio_coding/speech_mono_32_48kHz", "pcm"); input_file = fopen(file_name.c_str(), "rb"); ASSERT_TRUE(input_file != NULL); - ASSERT_EQ(kOpusNumberOfSamples, + ASSERT_EQ(kOpusMaxFrameSamples, static_cast<int32_t>(fread(speech_data_, sizeof(int16_t), - kOpusNumberOfSamples, input_file))); + kOpusMaxFrameSamples, input_file))); fclose(input_file); input_file = NULL; } @@ -114,21 +115,24 @@ TEST_F(OpusTest, OpusEncodeDecodeMono) { // Encode & decode. int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode_new[kOpusNumberOfSamples]; - int16_t output_data_decode[kOpusNumberOfSamples]; + int16_t output_data_decode_new[kOpusMaxFrameSamples]; + int16_t output_data_decode[kOpusMaxFrameSamples]; int16_t* coded = reinterpret_cast<int16_t*>(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, 960, - kMaxBytes, bitstream_); - EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_mono_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(640, WebRtcOpus_Decode(opus_mono_decoder_, coded, - encoded_bytes, output_data_decode, - &audio_type)); + encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, + kOpus20msFrameSamples, kMaxBytes, + bitstream_); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodeNew(opus_mono_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_Decode(opus_mono_decoder_, coded, + encoded_bytes, output_data_decode, + &audio_type)); // Data in |output_data_decode_new| should be the same as in // |output_data_decode|. - for (int i = 0; i < 640; i++) { + for (int i = 0; i < kOpus20msFrameSamples; i++) { EXPECT_EQ(output_data_decode_new[i], output_data_decode[i]); } @@ -154,26 +158,30 @@ TEST_F(OpusTest, OpusEncodeDecodeStereo) { // Encode & decode. int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode_new[kOpusNumberOfSamples]; - int16_t output_data_decode[kOpusNumberOfSamples]; - int16_t output_data_decode_slave[kOpusNumberOfSamples]; + int16_t output_data_decode_new[kOpusMaxFrameSamples]; + int16_t output_data_decode[kOpusMaxFrameSamples]; + int16_t output_data_decode_slave[kOpusMaxFrameSamples]; int16_t* coded = reinterpret_cast<int16_t*>(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, - kMaxBytes, bitstream_); - EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(640, WebRtcOpus_Decode(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode, + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, + kOpus20msFrameSamples, kMaxBytes, + bitstream_); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_Decode(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode, + &audio_type)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode_slave, &audio_type)); - EXPECT_EQ(640, WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode_slave, - &audio_type)); // Data in |output_data_decode_new| should be the same as in // |output_data_decode| and |output_data_decode_slave| interleaved to a // stereo signal. - for (int i = 0; i < 640; i++) { + for (int i = 0; i < kOpus20msFrameSamples; i++) { EXPECT_EQ(output_data_decode_new[i * 2], output_data_decode[i]); EXPECT_EQ(output_data_decode_new[i * 2 + 1], output_data_decode_slave[i]); } @@ -234,26 +242,30 @@ TEST_F(OpusTest, OpusDecodeInit) { // Encode & decode. int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode_new[kOpusNumberOfSamples]; - int16_t output_data_decode[kOpusNumberOfSamples]; - int16_t output_data_decode_slave[kOpusNumberOfSamples]; + int16_t output_data_decode_new[kOpusMaxFrameSamples]; + int16_t output_data_decode[kOpusMaxFrameSamples]; + int16_t output_data_decode_slave[kOpusMaxFrameSamples]; int16_t* coded = reinterpret_cast<int16_t*>(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, - kMaxBytes, bitstream_); - EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(640, WebRtcOpus_Decode(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode, + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, + kOpus20msFrameSamples, kMaxBytes, + bitstream_); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_Decode(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode, + &audio_type)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode_slave, &audio_type)); - EXPECT_EQ(640, WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode_slave, - &audio_type)); // Data in |output_data_decode_new| should be the same as in // |output_data_decode| and |output_data_decode_slave| interleaved to a // stereo signal. - for (int i = 0; i < 640; i++) { + for (int i = 0; i < kOpus20msFrameSamples; i++) { EXPECT_EQ(output_data_decode_new[i * 2], output_data_decode[i]); EXPECT_EQ(output_data_decode_new[i * 2 + 1], output_data_decode_slave[i]); } @@ -262,20 +274,23 @@ TEST_F(OpusTest, OpusDecodeInit) { EXPECT_EQ(0, WebRtcOpus_DecoderInit(opus_stereo_decoder_)); EXPECT_EQ(0, WebRtcOpus_DecoderInitSlave(opus_stereo_decoder_)); - EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(640, WebRtcOpus_Decode(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode, + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_Decode(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode, + &audio_type)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode_slave, &audio_type)); - EXPECT_EQ(640, WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode_slave, - &audio_type)); // Data in |output_data_decode_new| should be the same as in // |output_data_decode| and |output_data_decode_slave| interleaved to a // stereo signal. - for (int i = 0; i < 640; i++) { + for (int i = 0; i < kOpus20msFrameSamples; i++) { EXPECT_EQ(output_data_decode_new[i * 2], output_data_decode[i]); EXPECT_EQ(output_data_decode_new[i * 2 + 1], output_data_decode_slave[i]); } @@ -344,27 +359,31 @@ TEST_F(OpusTest, OpusDecodePlcMono) { // Encode & decode. int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode_new[kOpusNumberOfSamples]; - int16_t output_data_decode[kOpusNumberOfSamples]; + int16_t output_data_decode_new[kOpusMaxFrameSamples]; + int16_t output_data_decode[kOpusMaxFrameSamples]; int16_t* coded = reinterpret_cast<int16_t*>(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, 960, - kMaxBytes, bitstream_); - EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_mono_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(640, WebRtcOpus_Decode(opus_mono_decoder_, coded, - encoded_bytes, output_data_decode, - &audio_type)); + encoded_bytes = WebRtcOpus_Encode(opus_mono_encoder_, speech_data_, + kOpus20msFrameSamples, kMaxBytes, + bitstream_); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodeNew(opus_mono_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_Decode(opus_mono_decoder_, coded, + encoded_bytes, output_data_decode, + &audio_type)); // Call decoder PLC for both versions of the decoder. - int16_t plc_buffer[kOpusNumberOfSamples]; - int16_t plc_buffer_new[kOpusNumberOfSamples]; - EXPECT_EQ(640, WebRtcOpus_DecodePlcMaster(opus_mono_decoder_, plc_buffer, 1)); - EXPECT_EQ(640, WebRtcOpus_DecodePlc(opus_mono_decoder_new_, - plc_buffer_new, 1)); + int16_t plc_buffer[kOpusMaxFrameSamples]; + int16_t plc_buffer_new[kOpusMaxFrameSamples]; + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodePlcMaster(opus_mono_decoder_, plc_buffer, 1)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodePlc(opus_mono_decoder_new_, plc_buffer_new, 1)); // Data in |plc_buffer| should be the same as in |plc_buffer_new|. - for (int i = 0; i < 640; i++) { + for (int i = 0; i < kOpus20msFrameSamples; i++) { EXPECT_EQ(plc_buffer[i], plc_buffer_new[i]); } @@ -391,36 +410,42 @@ TEST_F(OpusTest, OpusDecodePlcStereo) { // Encode & decode. int16_t encoded_bytes; int16_t audio_type; - int16_t output_data_decode_new[kOpusNumberOfSamples]; - int16_t output_data_decode[kOpusNumberOfSamples]; - int16_t output_data_decode_slave[kOpusNumberOfSamples]; + int16_t output_data_decode_new[kOpusMaxFrameSamples]; + int16_t output_data_decode[kOpusMaxFrameSamples]; + int16_t output_data_decode_slave[kOpusMaxFrameSamples]; int16_t* coded = reinterpret_cast<int16_t*>(bitstream_); - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, - kMaxBytes, bitstream_); - EXPECT_EQ(640, WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, - encoded_bytes, output_data_decode_new, - &audio_type)); - EXPECT_EQ(640, WebRtcOpus_Decode(opus_stereo_decoder_, coded, - encoded_bytes, output_data_decode, + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, + kOpus20msFrameSamples, kMaxBytes, + bitstream_); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodeNew(opus_stereo_decoder_new_, bitstream_, + encoded_bytes, output_data_decode_new, + &audio_type)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_Decode(opus_stereo_decoder_, coded, + encoded_bytes, output_data_decode, + &audio_type)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, + encoded_bytes, + output_data_decode_slave, &audio_type)); - EXPECT_EQ(640, WebRtcOpus_DecodeSlave(opus_stereo_decoder_, coded, - encoded_bytes, - output_data_decode_slave, - &audio_type)); // Call decoder PLC for both versions of the decoder. - int16_t plc_buffer_left[kOpusNumberOfSamples]; - int16_t plc_buffer_right[kOpusNumberOfSamples]; - int16_t plc_buffer_new[kOpusNumberOfSamples]; - EXPECT_EQ(640, WebRtcOpus_DecodePlcMaster(opus_stereo_decoder_, - plc_buffer_left, 1)); - EXPECT_EQ(640, WebRtcOpus_DecodePlcSlave(opus_stereo_decoder_, - plc_buffer_right, 1)); - EXPECT_EQ(640, WebRtcOpus_DecodePlc(opus_stereo_decoder_new_, plc_buffer_new, - 1)); + int16_t plc_buffer_left[kOpusMaxFrameSamples]; + int16_t plc_buffer_right[kOpusMaxFrameSamples]; + int16_t plc_buffer_new[kOpusMaxFrameSamples]; + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodePlcMaster(opus_stereo_decoder_, + plc_buffer_left, 1)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodePlcSlave(opus_stereo_decoder_, + plc_buffer_right, 1)); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DecodePlc(opus_stereo_decoder_new_, plc_buffer_new, 1)); // Data in |plc_buffer_left| and |plc_buffer_right|should be the same as the // interleaved samples in |plc_buffer_new|. - for (int i = 0, j = 0; i < 640; i++) { + for (int i = 0, j = 0; i < kOpus20msFrameSamples; i++) { EXPECT_EQ(plc_buffer_left[i], plc_buffer_new[j++]); EXPECT_EQ(plc_buffer_right[i], plc_buffer_new[j++]); } @@ -437,21 +462,23 @@ TEST_F(OpusTest, OpusDurationEstimation) { EXPECT_EQ(0, WebRtcOpus_EncoderCreate(&opus_stereo_encoder_, 2)); EXPECT_EQ(0, WebRtcOpus_DecoderCreate(&opus_stereo_decoder_, 2)); - // Encode with different packet sizes (input 48 kHz, output in 32 kHz). int16_t encoded_bytes; // 10 ms. - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 480, - kMaxBytes, bitstream_); - EXPECT_EQ(320, WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, - encoded_bytes)); + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, + kOpus10msFrameSamples, kMaxBytes, + bitstream_); + EXPECT_EQ(kOpus10msFrameSamples, + WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, + encoded_bytes)); // 20 ms - encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, 960, - kMaxBytes, bitstream_); - EXPECT_EQ(640, WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, - encoded_bytes)); - + encoded_bytes = WebRtcOpus_Encode(opus_stereo_encoder_, speech_data_, + kOpus20msFrameSamples, kMaxBytes, + bitstream_); + EXPECT_EQ(kOpus20msFrameSamples, + WebRtcOpus_DurationEst(opus_stereo_decoder_, bitstream_, + encoded_bytes)); // Free memory. EXPECT_EQ(0, WebRtcOpus_EncoderFree(opus_stereo_encoder_)); |