diff options
Diffstat (limited to 'webrtc/modules/audio_coding/codecs/cng')
10 files changed, 2051 insertions, 0 deletions
diff --git a/webrtc/modules/audio_coding/codecs/cng/OWNERS b/webrtc/modules/audio_coding/codecs/cng/OWNERS new file mode 100644 index 0000000000..3ee6b4bf5f --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/OWNERS @@ -0,0 +1,5 @@ + +# These are for the common case of adding or renaming files. If you're doing +# structural changes, please get a review from a reviewer in this file. +per-file *.gyp=* +per-file *.gypi=* diff --git a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc new file mode 100644 index 0000000000..121524633c --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng.cc @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h" + +#include <algorithm> +#include <limits> + +namespace webrtc { + +namespace { + +const int kMaxFrameSizeMs = 60; + +rtc::scoped_ptr<CNG_enc_inst, CngInstDeleter> CreateCngInst( + int sample_rate_hz, + int sid_frame_interval_ms, + int num_cng_coefficients) { + rtc::scoped_ptr<CNG_enc_inst, CngInstDeleter> cng_inst; + RTC_CHECK_EQ(0, WebRtcCng_CreateEnc(cng_inst.accept())); + RTC_CHECK_EQ(0, + WebRtcCng_InitEnc(cng_inst.get(), sample_rate_hz, + sid_frame_interval_ms, num_cng_coefficients)); + return cng_inst; +} + +} // namespace + +bool AudioEncoderCng::Config::IsOk() const { + if (num_channels != 1) + return false; + if (!speech_encoder) + return false; + if (num_channels != speech_encoder->NumChannels()) + return false; + if (sid_frame_interval_ms < + static_cast<int>(speech_encoder->Max10MsFramesInAPacket() * 10)) + return false; + if (num_cng_coefficients > WEBRTC_CNG_MAX_LPC_ORDER || + num_cng_coefficients <= 0) + return false; + return true; +} + +AudioEncoderCng::AudioEncoderCng(const Config& config) + : speech_encoder_(config.speech_encoder), + cng_payload_type_(config.payload_type), + num_cng_coefficients_(config.num_cng_coefficients), + sid_frame_interval_ms_(config.sid_frame_interval_ms), + last_frame_active_(true), + vad_(config.vad ? rtc_make_scoped_ptr(config.vad) + : CreateVad(config.vad_mode)) { + RTC_CHECK(config.IsOk()) << "Invalid configuration."; + cng_inst_ = CreateCngInst(SampleRateHz(), sid_frame_interval_ms_, + num_cng_coefficients_); +} + +AudioEncoderCng::~AudioEncoderCng() = default; + +size_t AudioEncoderCng::MaxEncodedBytes() const { + const size_t max_encoded_bytes_active = speech_encoder_->MaxEncodedBytes(); + const size_t max_encoded_bytes_passive = + rtc::CheckedDivExact(kMaxFrameSizeMs, 10) * SamplesPer10msFrame(); + return std::max(max_encoded_bytes_active, max_encoded_bytes_passive); +} + +int AudioEncoderCng::SampleRateHz() const { + return speech_encoder_->SampleRateHz(); +} + +int AudioEncoderCng::NumChannels() const { + return 1; +} + +int AudioEncoderCng::RtpTimestampRateHz() const { + return speech_encoder_->RtpTimestampRateHz(); +} + +size_t AudioEncoderCng::Num10MsFramesInNextPacket() const { + return speech_encoder_->Num10MsFramesInNextPacket(); +} + +size_t AudioEncoderCng::Max10MsFramesInAPacket() const { + return speech_encoder_->Max10MsFramesInAPacket(); +} + +int AudioEncoderCng::GetTargetBitrate() const { + return speech_encoder_->GetTargetBitrate(); +} + +AudioEncoder::EncodedInfo AudioEncoderCng::EncodeInternal( + uint32_t rtp_timestamp, + const int16_t* audio, + size_t max_encoded_bytes, + uint8_t* encoded) { + RTC_CHECK_GE(max_encoded_bytes, + static_cast<size_t>(num_cng_coefficients_ + 1)); + const size_t samples_per_10ms_frame = SamplesPer10msFrame(); + RTC_CHECK_EQ(speech_buffer_.size(), + rtp_timestamps_.size() * samples_per_10ms_frame); + rtp_timestamps_.push_back(rtp_timestamp); + for (size_t i = 0; i < samples_per_10ms_frame; ++i) { + speech_buffer_.push_back(audio[i]); + } + const size_t frames_to_encode = speech_encoder_->Num10MsFramesInNextPacket(); + if (rtp_timestamps_.size() < frames_to_encode) { + return EncodedInfo(); + } + RTC_CHECK_LE(static_cast<int>(frames_to_encode * 10), kMaxFrameSizeMs) + << "Frame size cannot be larger than " << kMaxFrameSizeMs + << " ms when using VAD/CNG."; + + // Group several 10 ms blocks per VAD call. Call VAD once or twice using the + // following split sizes: + // 10 ms = 10 + 0 ms; 20 ms = 20 + 0 ms; 30 ms = 30 + 0 ms; + // 40 ms = 20 + 20 ms; 50 ms = 30 + 20 ms; 60 ms = 30 + 30 ms. + size_t blocks_in_first_vad_call = + (frames_to_encode > 3 ? 3 : frames_to_encode); + if (frames_to_encode == 4) + blocks_in_first_vad_call = 2; + RTC_CHECK_GE(frames_to_encode, blocks_in_first_vad_call); + const size_t blocks_in_second_vad_call = + frames_to_encode - blocks_in_first_vad_call; + + // Check if all of the buffer is passive speech. Start with checking the first + // block. + Vad::Activity activity = vad_->VoiceActivity( + &speech_buffer_[0], samples_per_10ms_frame * blocks_in_first_vad_call, + SampleRateHz()); + if (activity == Vad::kPassive && blocks_in_second_vad_call > 0) { + // Only check the second block if the first was passive. + activity = vad_->VoiceActivity( + &speech_buffer_[samples_per_10ms_frame * blocks_in_first_vad_call], + samples_per_10ms_frame * blocks_in_second_vad_call, SampleRateHz()); + } + + EncodedInfo info; + switch (activity) { + case Vad::kPassive: { + info = EncodePassive(frames_to_encode, max_encoded_bytes, encoded); + last_frame_active_ = false; + break; + } + case Vad::kActive: { + info = EncodeActive(frames_to_encode, max_encoded_bytes, encoded); + last_frame_active_ = true; + break; + } + case Vad::kError: { + FATAL(); // Fails only if fed invalid data. + break; + } + } + + speech_buffer_.erase( + speech_buffer_.begin(), + speech_buffer_.begin() + frames_to_encode * samples_per_10ms_frame); + rtp_timestamps_.erase(rtp_timestamps_.begin(), + rtp_timestamps_.begin() + frames_to_encode); + return info; +} + +void AudioEncoderCng::Reset() { + speech_encoder_->Reset(); + speech_buffer_.clear(); + rtp_timestamps_.clear(); + last_frame_active_ = true; + vad_->Reset(); + cng_inst_ = CreateCngInst(SampleRateHz(), sid_frame_interval_ms_, + num_cng_coefficients_); +} + +bool AudioEncoderCng::SetFec(bool enable) { + return speech_encoder_->SetFec(enable); +} + +bool AudioEncoderCng::SetDtx(bool enable) { + return speech_encoder_->SetDtx(enable); +} + +bool AudioEncoderCng::SetApplication(Application application) { + return speech_encoder_->SetApplication(application); +} + +void AudioEncoderCng::SetMaxPlaybackRate(int frequency_hz) { + speech_encoder_->SetMaxPlaybackRate(frequency_hz); +} + +void AudioEncoderCng::SetProjectedPacketLossRate(double fraction) { + speech_encoder_->SetProjectedPacketLossRate(fraction); +} + +void AudioEncoderCng::SetTargetBitrate(int bits_per_second) { + speech_encoder_->SetTargetBitrate(bits_per_second); +} + +AudioEncoder::EncodedInfo AudioEncoderCng::EncodePassive( + size_t frames_to_encode, + size_t max_encoded_bytes, + uint8_t* encoded) { + bool force_sid = last_frame_active_; + bool output_produced = false; + const size_t samples_per_10ms_frame = SamplesPer10msFrame(); + RTC_CHECK_GE(max_encoded_bytes, frames_to_encode * samples_per_10ms_frame); + AudioEncoder::EncodedInfo info; + for (size_t i = 0; i < frames_to_encode; ++i) { + // It's important not to pass &info.encoded_bytes directly to + // WebRtcCng_Encode(), since later loop iterations may return zero in that + // value, in which case we don't want to overwrite any value from an earlier + // iteration. + size_t encoded_bytes_tmp = 0; + RTC_CHECK_GE(WebRtcCng_Encode(cng_inst_.get(), + &speech_buffer_[i * samples_per_10ms_frame], + samples_per_10ms_frame, encoded, + &encoded_bytes_tmp, force_sid), + 0); + if (encoded_bytes_tmp > 0) { + RTC_CHECK(!output_produced); + info.encoded_bytes = encoded_bytes_tmp; + output_produced = true; + force_sid = false; + } + } + info.encoded_timestamp = rtp_timestamps_.front(); + info.payload_type = cng_payload_type_; + info.send_even_if_empty = true; + info.speech = false; + return info; +} + +AudioEncoder::EncodedInfo AudioEncoderCng::EncodeActive( + size_t frames_to_encode, + size_t max_encoded_bytes, + uint8_t* encoded) { + const size_t samples_per_10ms_frame = SamplesPer10msFrame(); + AudioEncoder::EncodedInfo info; + for (size_t i = 0; i < frames_to_encode; ++i) { + info = speech_encoder_->Encode( + rtp_timestamps_.front(), &speech_buffer_[i * samples_per_10ms_frame], + samples_per_10ms_frame, max_encoded_bytes, encoded); + if (i + 1 == frames_to_encode) { + RTC_CHECK_GT(info.encoded_bytes, 0u) << "Encoder didn't deliver data."; + } else { + RTC_CHECK_EQ(info.encoded_bytes, 0u) + << "Encoder delivered data too early."; + } + } + return info; +} + +size_t AudioEncoderCng::SamplesPer10msFrame() const { + return rtc::CheckedDivExact(10 * SampleRateHz(), 1000); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc new file mode 100644 index 0000000000..0b837a0f12 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/audio_encoder_cng_unittest.cc @@ -0,0 +1,464 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <vector> + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/vad/mock/mock_vad.h" +#include "webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h" +#include "webrtc/modules/audio_coding/codecs/mock/mock_audio_encoder.h" + +using ::testing::Return; +using ::testing::_; +using ::testing::SetArgPointee; +using ::testing::InSequence; +using ::testing::Invoke; + +namespace webrtc { + +namespace { +static const size_t kMockMaxEncodedBytes = 1000; +static const size_t kMaxNumSamples = 48 * 10 * 2; // 10 ms @ 48 kHz stereo. +static const size_t kMockReturnEncodedBytes = 17; +static const int kCngPayloadType = 18; +} + +class AudioEncoderCngTest : public ::testing::Test { + protected: + AudioEncoderCngTest() + : mock_vad_(new MockVad), + timestamp_(4711), + num_audio_samples_10ms_(0), + sample_rate_hz_(8000) { + memset(audio_, 0, kMaxNumSamples * 2); + config_.speech_encoder = &mock_encoder_; + EXPECT_CALL(mock_encoder_, NumChannels()).WillRepeatedly(Return(1)); + // Let the AudioEncoderCng object use a MockVad instead of its internally + // created Vad object. + config_.vad = mock_vad_; + config_.payload_type = kCngPayloadType; + } + + void TearDown() override { + EXPECT_CALL(*mock_vad_, Die()).Times(1); + cng_.reset(); + // Don't expect the cng_ object to delete the AudioEncoder object. But it + // will be deleted with the test fixture. This is why we explicitly delete + // the cng_ object above, and set expectations on mock_encoder_ afterwards. + EXPECT_CALL(mock_encoder_, Die()).Times(1); + } + + void CreateCng() { + // The config_ parameters may be changed by the TEST_Fs up until CreateCng() + // is called, thus we cannot use the values until now. + num_audio_samples_10ms_ = static_cast<size_t>(10 * sample_rate_hz_ / 1000); + ASSERT_LE(num_audio_samples_10ms_, kMaxNumSamples); + EXPECT_CALL(mock_encoder_, SampleRateHz()) + .WillRepeatedly(Return(sample_rate_hz_)); + // Max10MsFramesInAPacket() is just used to verify that the SID frame period + // is not too small. The return value does not matter that much, as long as + // it is smaller than 10. + EXPECT_CALL(mock_encoder_, Max10MsFramesInAPacket()).WillOnce(Return(1u)); + EXPECT_CALL(mock_encoder_, MaxEncodedBytes()) + .WillRepeatedly(Return(kMockMaxEncodedBytes)); + cng_.reset(new AudioEncoderCng(config_)); + encoded_.resize(cng_->MaxEncodedBytes(), 0); + } + + void Encode() { + ASSERT_TRUE(cng_) << "Must call CreateCng() first."; + encoded_info_ = cng_->Encode(timestamp_, audio_, num_audio_samples_10ms_, + encoded_.size(), &encoded_[0]); + timestamp_ += static_cast<uint32_t>(num_audio_samples_10ms_); + } + + // Expect |num_calls| calls to the encoder, all successful. The last call + // claims to have encoded |kMockMaxEncodedBytes| bytes, and all the preceding + // ones 0 bytes. + void ExpectEncodeCalls(size_t num_calls) { + InSequence s; + AudioEncoder::EncodedInfo info; + for (size_t j = 0; j < num_calls - 1; ++j) { + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)) + .WillOnce(Return(info)); + } + info.encoded_bytes = kMockReturnEncodedBytes; + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)) + .WillOnce(Return(info)); + } + + // Verifies that the cng_ object waits until it has collected + // |blocks_per_frame| blocks of audio, and then dispatches all of them to + // the underlying codec (speech or cng). + void CheckBlockGrouping(size_t blocks_per_frame, bool active_speech) { + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + CreateCng(); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillRepeatedly(Return(active_speech ? Vad::kActive : Vad::kPassive)); + + // Don't expect any calls to the encoder yet. + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)).Times(0); + for (size_t i = 0; i < blocks_per_frame - 1; ++i) { + Encode(); + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + } + if (active_speech) + ExpectEncodeCalls(blocks_per_frame); + Encode(); + if (active_speech) { + EXPECT_EQ(kMockReturnEncodedBytes, encoded_info_.encoded_bytes); + } else { + EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients + 1), + encoded_info_.encoded_bytes); + } + } + + // Verifies that the audio is partitioned into larger blocks before calling + // the VAD. + void CheckVadInputSize(int input_frame_size_ms, + int expected_first_block_size_ms, + int expected_second_block_size_ms) { + const size_t blocks_per_frame = + static_cast<size_t>(input_frame_size_ms / 10); + + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + + // Expect nothing to happen before the last block is sent to cng_. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)).Times(0); + for (size_t i = 0; i < blocks_per_frame - 1; ++i) { + Encode(); + } + + // Let the VAD decision be passive, since an active decision may lead to + // early termination of the decision loop. + InSequence s; + EXPECT_CALL( + *mock_vad_, + VoiceActivity(_, expected_first_block_size_ms * sample_rate_hz_ / 1000, + sample_rate_hz_)).WillOnce(Return(Vad::kPassive)); + if (expected_second_block_size_ms > 0) { + EXPECT_CALL(*mock_vad_, + VoiceActivity( + _, expected_second_block_size_ms * sample_rate_hz_ / 1000, + sample_rate_hz_)).WillOnce(Return(Vad::kPassive)); + } + + // With this call to Encode(), |mock_vad_| should be called according to the + // above expectations. + Encode(); + } + + // Tests a frame with both active and passive speech. Returns true if the + // decision was active speech, false if it was passive. + bool CheckMixedActivePassive(Vad::Activity first_type, + Vad::Activity second_type) { + // Set the speech encoder frame size to 60 ms, to ensure that the VAD will + // be called twice. + const size_t blocks_per_frame = 6; + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(blocks_per_frame)); + InSequence s; + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(first_type)); + if (first_type == Vad::kPassive) { + // Expect a second call to the VAD only if the first frame was passive. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(second_type)); + } + encoded_info_.payload_type = 0; + for (size_t i = 0; i < blocks_per_frame; ++i) { + Encode(); + } + return encoded_info_.payload_type != kCngPayloadType; + } + + AudioEncoderCng::Config config_; + rtc::scoped_ptr<AudioEncoderCng> cng_; + MockAudioEncoder mock_encoder_; + MockVad* mock_vad_; // Ownership is transferred to |cng_|. + uint32_t timestamp_; + int16_t audio_[kMaxNumSamples]; + size_t num_audio_samples_10ms_; + std::vector<uint8_t> encoded_; + AudioEncoder::EncodedInfo encoded_info_; + int sample_rate_hz_; +}; + +TEST_F(AudioEncoderCngTest, CreateAndDestroy) { + CreateCng(); +} + +TEST_F(AudioEncoderCngTest, CheckFrameSizePropagation) { + CreateCng(); + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(17U)); + EXPECT_EQ(17U, cng_->Num10MsFramesInNextPacket()); +} + +TEST_F(AudioEncoderCngTest, CheckChangeBitratePropagation) { + CreateCng(); + EXPECT_CALL(mock_encoder_, SetTargetBitrate(4711)); + cng_->SetTargetBitrate(4711); +} + +TEST_F(AudioEncoderCngTest, CheckProjectedPacketLossRatePropagation) { + CreateCng(); + EXPECT_CALL(mock_encoder_, SetProjectedPacketLossRate(0.5)); + cng_->SetProjectedPacketLossRate(0.5); +} + +TEST_F(AudioEncoderCngTest, EncodeCallsVad) { + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(1U)); + CreateCng(); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + Encode(); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects1BlockPassiveSpeech) { + CheckBlockGrouping(1, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksPassiveSpeech) { + CheckBlockGrouping(2, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksPassiveSpeech) { + CheckBlockGrouping(3, false); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects1BlockActiveSpeech) { + CheckBlockGrouping(1, true); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects2BlocksActiveSpeech) { + CheckBlockGrouping(2, true); +} + +TEST_F(AudioEncoderCngTest, EncodeCollects3BlocksActiveSpeech) { + CheckBlockGrouping(3, true); +} + +TEST_F(AudioEncoderCngTest, EncodePassive) { + const size_t kBlocksPerFrame = 3; + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(kBlocksPerFrame)); + CreateCng(); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillRepeatedly(Return(Vad::kPassive)); + // Expect no calls at all to the speech encoder mock. + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)).Times(0); + uint32_t expected_timestamp = timestamp_; + for (size_t i = 0; i < 100; ++i) { + Encode(); + // Check if it was time to call the cng encoder. This is done once every + // |kBlocksPerFrame| calls. + if ((i + 1) % kBlocksPerFrame == 0) { + // Now check if a SID interval has elapsed. + if ((i % (config_.sid_frame_interval_ms / 10)) < kBlocksPerFrame) { + // If so, verify that we got a CNG encoding. + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_FALSE(encoded_info_.speech); + EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients) + 1, + encoded_info_.encoded_bytes); + EXPECT_EQ(expected_timestamp, encoded_info_.encoded_timestamp); + } + expected_timestamp += kBlocksPerFrame * num_audio_samples_10ms_; + } else { + // Otherwise, expect no output. + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + } + } +} + +// Verifies that the correct action is taken for frames with both active and +// passive speech. +TEST_F(AudioEncoderCngTest, MixedActivePassive) { + CreateCng(); + + // All of the frame is active speech. + ExpectEncodeCalls(6); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kActive)); + EXPECT_TRUE(encoded_info_.speech); + + // First half of the frame is active speech. + ExpectEncodeCalls(6); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kActive, Vad::kPassive)); + EXPECT_TRUE(encoded_info_.speech); + + // Second half of the frame is active speech. + ExpectEncodeCalls(6); + EXPECT_TRUE(CheckMixedActivePassive(Vad::kPassive, Vad::kActive)); + EXPECT_TRUE(encoded_info_.speech); + + // All of the frame is passive speech. Expect no calls to |mock_encoder_|. + EXPECT_FALSE(CheckMixedActivePassive(Vad::kPassive, Vad::kPassive)); + EXPECT_FALSE(encoded_info_.speech); +} + +// These tests verify that the audio is partitioned into larger blocks before +// calling the VAD. +// The parameters for CheckVadInputSize are: +// CheckVadInputSize(frame_size, expected_first_block_size, +// expected_second_block_size); +TEST_F(AudioEncoderCngTest, VadInputSize10Ms) { + CreateCng(); + CheckVadInputSize(10, 10, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize20Ms) { + CreateCng(); + CheckVadInputSize(20, 20, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize30Ms) { + CreateCng(); + CheckVadInputSize(30, 30, 0); +} +TEST_F(AudioEncoderCngTest, VadInputSize40Ms) { + CreateCng(); + CheckVadInputSize(40, 20, 20); +} +TEST_F(AudioEncoderCngTest, VadInputSize50Ms) { + CreateCng(); + CheckVadInputSize(50, 30, 20); +} +TEST_F(AudioEncoderCngTest, VadInputSize60Ms) { + CreateCng(); + CheckVadInputSize(60, 30, 30); +} + +// Verifies that the correct payload type is set when CNG is encoded. +TEST_F(AudioEncoderCngTest, VerifyCngPayloadType) { + CreateCng(); + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)).Times(0); + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()).WillOnce(Return(1U)); + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + encoded_info_.payload_type = 0; + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); +} + +// Verifies that a SID frame is encoded immediately as the signal changes from +// active speech to passive. +TEST_F(AudioEncoderCngTest, VerifySidFrameAfterSpeech) { + CreateCng(); + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(1U)); + // Start with encoding noise. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .Times(2) + .WillRepeatedly(Return(Vad::kPassive)); + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients) + 1, + encoded_info_.encoded_bytes); + // Encode again, and make sure we got no frame at all (since the SID frame + // period is 100 ms by default). + Encode(); + EXPECT_EQ(0u, encoded_info_.encoded_bytes); + + // Now encode active speech. + encoded_info_.payload_type = 0; + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kActive)); + AudioEncoder::EncodedInfo info; + info.encoded_bytes = kMockReturnEncodedBytes; + EXPECT_CALL(mock_encoder_, EncodeInternal(_, _, _, _)).WillOnce(Return(info)); + Encode(); + EXPECT_EQ(kMockReturnEncodedBytes, encoded_info_.encoded_bytes); + + // Go back to noise again, and verify that a SID frame is emitted. + EXPECT_CALL(*mock_vad_, VoiceActivity(_, _, _)) + .WillOnce(Return(Vad::kPassive)); + Encode(); + EXPECT_EQ(kCngPayloadType, encoded_info_.payload_type); + EXPECT_EQ(static_cast<size_t>(config_.num_cng_coefficients) + 1, + encoded_info_.encoded_bytes); +} + +// Resetting the CNG should reset both the VAD and the encoder. +TEST_F(AudioEncoderCngTest, Reset) { + CreateCng(); + EXPECT_CALL(mock_encoder_, Reset()).Times(1); + EXPECT_CALL(*mock_vad_, Reset()).Times(1); + cng_->Reset(); +} + +#if GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID) + +// This test fixture tests various error conditions that makes the +// AudioEncoderCng die via CHECKs. +class AudioEncoderCngDeathTest : public AudioEncoderCngTest { + protected: + AudioEncoderCngDeathTest() : AudioEncoderCngTest() { + // Don't provide a Vad mock object, since it will leak when the test dies. + config_.vad = NULL; + EXPECT_CALL(*mock_vad_, Die()).Times(1); + delete mock_vad_; + mock_vad_ = NULL; + } + + // Override AudioEncoderCngTest::TearDown, since that one expects a call to + // the destructor of |mock_vad_|. In this case, that object is already + // deleted. + void TearDown() override { + cng_.reset(); + // Don't expect the cng_ object to delete the AudioEncoder object. But it + // will be deleted with the test fixture. This is why we explicitly delete + // the cng_ object above, and set expectations on mock_encoder_ afterwards. + EXPECT_CALL(mock_encoder_, Die()).Times(1); + } +}; + +TEST_F(AudioEncoderCngDeathTest, WrongFrameSize) { + CreateCng(); + num_audio_samples_10ms_ *= 2; // 20 ms frame. + EXPECT_DEATH(Encode(), ""); + num_audio_samples_10ms_ = 0; // Zero samples. + EXPECT_DEATH(Encode(), ""); +} + +TEST_F(AudioEncoderCngDeathTest, WrongNumCoefficients) { + config_.num_cng_coefficients = -1; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); + config_.num_cng_coefficients = 0; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); + config_.num_cng_coefficients = 13; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, NullSpeechEncoder) { + config_.speech_encoder = NULL; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, Stereo) { + EXPECT_CALL(mock_encoder_, NumChannels()).WillRepeatedly(Return(2)); + EXPECT_DEATH(CreateCng(), "Invalid configuration"); + config_.num_channels = 2; + EXPECT_DEATH(CreateCng(), "Invalid configuration"); +} + +TEST_F(AudioEncoderCngDeathTest, EncoderFrameSizeTooLarge) { + CreateCng(); + EXPECT_CALL(mock_encoder_, Num10MsFramesInNextPacket()) + .WillRepeatedly(Return(7U)); + for (int i = 0; i < 6; ++i) + Encode(); + EXPECT_DEATH(Encode(), + "Frame size cannot be larger than 60 ms when using VAD/CNG."); +} + +#endif // GTEST_HAS_DEATH_TEST + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/codecs/cng/cng.gypi b/webrtc/modules/audio_coding/codecs/cng/cng.gypi new file mode 100644 index 0000000000..78dc41a94f --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/cng.gypi @@ -0,0 +1,38 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +{ + 'targets': [ + { + 'target_name': 'cng', + 'type': 'static_library', + 'dependencies': [ + '<(webrtc_root)/common_audio/common_audio.gyp:common_audio', + 'audio_encoder_interface', + ], + 'include_dirs': [ + 'include', + '<(webrtc_root)', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + 'include', + '<(webrtc_root)', + ], + }, + 'sources': [ + 'include/audio_encoder_cng.h', + 'include/webrtc_cng.h', + 'audio_encoder_cng.cc', + 'webrtc_cng.c', + 'cng_helpfuns.c', + 'cng_helpfuns.h', + ], + }, + ], # targets +} diff --git a/webrtc/modules/audio_coding/codecs/cng/cng_helpfuns.c b/webrtc/modules/audio_coding/codecs/cng/cng_helpfuns.c new file mode 100644 index 0000000000..bc08d431a6 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/cng_helpfuns.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "cng_helpfuns.h" + +#include "signal_processing_library.h" +#include "webrtc/typedefs.h" +#include "webrtc_cng.h" + +/* Values in |k| are Q15, and |a| Q12. */ +void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a) { + int16_t any[WEBRTC_SPL_MAX_LPC_ORDER + 1]; + int16_t *aptr, *aptr2, *anyptr; + const int16_t *kptr; + int m, i; + + kptr = k; + *a = 4096; /* i.e., (Word16_MAX >> 3) + 1 */ + *any = *a; + a[1] = (*k + 4) >> 3; + for (m = 1; m < useOrder; m++) { + kptr++; + aptr = a; + aptr++; + aptr2 = &a[m]; + anyptr = any; + anyptr++; + + any[m + 1] = (*kptr + 4) >> 3; + for (i = 0; i < m; i++) { + *anyptr++ = (*aptr++) + + (int16_t)((((int32_t)(*aptr2--) * (int32_t) * kptr) + 16384) >> 15); + } + + aptr = a; + anyptr = any; + for (i = 0; i < (m + 2); i++) { + *aptr++ = *anyptr++; + } + } +} diff --git a/webrtc/modules/audio_coding/codecs/cng/cng_helpfuns.h b/webrtc/modules/audio_coding/codecs/cng/cng_helpfuns.h new file mode 100644 index 0000000000..a553a7615e --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/cng_helpfuns.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_CNG_HELPFUNS_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_CNG_HELPFUNS_H_ + +#include "webrtc/typedefs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void WebRtcCng_K2a16(int16_t* k, int useOrder, int16_t* a); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_CNG_HELPFUNS_H_ diff --git a/webrtc/modules/audio_coding/codecs/cng/cng_unittest.cc b/webrtc/modules/audio_coding/codecs/cng/cng_unittest.cc new file mode 100644 index 0000000000..1061dca69a --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/cng_unittest.cc @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include <string> + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/test/testsupport/fileutils.h" +#include "webrtc_cng.h" + +namespace webrtc { + +enum { + kSidShortIntervalUpdate = 1, + kSidNormalIntervalUpdate = 100, + kSidLongIntervalUpdate = 10000 +}; + +enum { + kCNGNumParamsLow = 0, + kCNGNumParamsNormal = 8, + kCNGNumParamsHigh = WEBRTC_CNG_MAX_LPC_ORDER, + kCNGNumParamsTooHigh = WEBRTC_CNG_MAX_LPC_ORDER + 1 +}; + +enum { + kNoSid, + kForceSid +}; + +class CngTest : public ::testing::Test { + protected: + CngTest(); + virtual void SetUp(); + + CNG_enc_inst* cng_enc_inst_; + CNG_dec_inst* cng_dec_inst_; + int16_t speech_data_[640]; // Max size of CNG internal buffers. +}; + +CngTest::CngTest() + : cng_enc_inst_(NULL), + cng_dec_inst_(NULL) { +} + +void CngTest::SetUp() { + FILE* input_file; + const std::string file_name = + webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"); + input_file = fopen(file_name.c_str(), "rb"); + ASSERT_TRUE(input_file != NULL); + ASSERT_EQ(640, static_cast<int32_t>(fread(speech_data_, sizeof(int16_t), + 640, input_file))); + fclose(input_file); + input_file = NULL; +} + +// Test failing Create. +TEST_F(CngTest, CngCreateFail) { + // Test to see that an invalid pointer is caught. + EXPECT_EQ(-1, WebRtcCng_CreateEnc(NULL)); + EXPECT_EQ(-1, WebRtcCng_CreateDec(NULL)); +} + +// Test normal Create. +TEST_F(CngTest, CngCreate) { + EXPECT_EQ(0, WebRtcCng_CreateEnc(&cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_CreateDec(&cng_dec_inst_)); + EXPECT_TRUE(cng_enc_inst_ != NULL); + EXPECT_TRUE(cng_dec_inst_ != NULL); + // Free encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_FreeEnc(cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_FreeDec(cng_dec_inst_)); +} + +// Create CNG encoder, init with faulty values, free CNG encoder. +TEST_F(CngTest, CngInitFail) { + // Create encoder memory. + EXPECT_EQ(0, WebRtcCng_CreateEnc(&cng_enc_inst_)); + + // Call with too few parameters. + EXPECT_EQ(-1, WebRtcCng_InitEnc(cng_enc_inst_, 8000, kSidNormalIntervalUpdate, + kCNGNumParamsLow)); + EXPECT_EQ(6130, WebRtcCng_GetErrorCodeEnc(cng_enc_inst_)); + + // Call with too many parameters. + EXPECT_EQ(-1, WebRtcCng_InitEnc(cng_enc_inst_, 8000, kSidNormalIntervalUpdate, + kCNGNumParamsTooHigh)); + EXPECT_EQ(6130, WebRtcCng_GetErrorCodeEnc(cng_enc_inst_)); + + // Free encoder memory. + EXPECT_EQ(0, WebRtcCng_FreeEnc(cng_enc_inst_)); +} + +TEST_F(CngTest, CngEncode) { + uint8_t sid_data[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + size_t number_bytes; + + // Create encoder memory. + EXPECT_EQ(0, WebRtcCng_CreateEnc(&cng_enc_inst_)); + + // 8 kHz, Normal number of parameters + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 8000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal)); + EXPECT_EQ(0, WebRtcCng_Encode(cng_enc_inst_, speech_data_, 80, sid_data, + &number_bytes, kNoSid)); + EXPECT_EQ(kCNGNumParamsNormal + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_, 80, sid_data, &number_bytes, kForceSid)); + + // 16 kHz, Normal number of parameters + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal)); + EXPECT_EQ(0, WebRtcCng_Encode(cng_enc_inst_, speech_data_, 160, sid_data, + &number_bytes, kNoSid)); + EXPECT_EQ(kCNGNumParamsNormal + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_, 160, sid_data, &number_bytes, kForceSid)); + + // 32 kHz, Max number of parameters + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 32000, kSidNormalIntervalUpdate, + kCNGNumParamsHigh)); + EXPECT_EQ(0, WebRtcCng_Encode(cng_enc_inst_, speech_data_, 320, sid_data, + &number_bytes, kNoSid)); + EXPECT_EQ(kCNGNumParamsHigh + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_, 320, sid_data, &number_bytes, kForceSid)); + + // 48 kHz, Normal number of parameters + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 48000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal)); + EXPECT_EQ(0, WebRtcCng_Encode(cng_enc_inst_, speech_data_, 480, sid_data, + &number_bytes, kNoSid)); + EXPECT_EQ(kCNGNumParamsNormal + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_, 480, sid_data, &number_bytes, kForceSid)); + + // 64 kHz, Normal number of parameters + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 64000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal)); + EXPECT_EQ(0, WebRtcCng_Encode(cng_enc_inst_, speech_data_, 640, sid_data, + &number_bytes, kNoSid)); + EXPECT_EQ(kCNGNumParamsNormal + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_, 640, sid_data, &number_bytes, kForceSid)); + + // Free encoder memory. + EXPECT_EQ(0, WebRtcCng_FreeEnc(cng_enc_inst_)); +} + +// Encode Cng with too long input vector. +TEST_F(CngTest, CngEncodeTooLong) { + uint8_t sid_data[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + size_t number_bytes; + + // Create and init encoder memory. + EXPECT_EQ(0, WebRtcCng_CreateEnc(&cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 8000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal)); + + // Run encoder with too much data. + EXPECT_EQ(-1, WebRtcCng_Encode(cng_enc_inst_, speech_data_, 641, sid_data, + &number_bytes, kNoSid)); + EXPECT_EQ(6140, WebRtcCng_GetErrorCodeEnc(cng_enc_inst_)); + + // Free encoder memory. + EXPECT_EQ(0, WebRtcCng_FreeEnc(cng_enc_inst_)); +} + +// Call encode without calling init. +TEST_F(CngTest, CngEncodeNoInit) { + uint8_t sid_data[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + size_t number_bytes; + + // Create encoder memory. + EXPECT_EQ(0, WebRtcCng_CreateEnc(&cng_enc_inst_)); + + // Run encoder without calling init. + EXPECT_EQ(-1, WebRtcCng_Encode(cng_enc_inst_, speech_data_, 640, sid_data, + &number_bytes, kNoSid)); + EXPECT_EQ(6120, WebRtcCng_GetErrorCodeEnc(cng_enc_inst_)); + + // Free encoder memory. + EXPECT_EQ(0, WebRtcCng_FreeEnc(cng_enc_inst_)); +} + +// Update SID parameters, for both 9 and 16 parameters. +TEST_F(CngTest, CngUpdateSid) { + uint8_t sid_data[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + size_t number_bytes; + + // Create and initialize encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_CreateEnc(&cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_CreateDec(&cng_dec_inst_)); + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal)); + WebRtcCng_InitDec(cng_dec_inst_); + + // Run normal Encode and UpdateSid. + EXPECT_EQ(kCNGNumParamsNormal + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_, 160, sid_data, &number_bytes, kForceSid)); + EXPECT_EQ(0, WebRtcCng_UpdateSid(cng_dec_inst_, sid_data, + kCNGNumParamsNormal + 1)); + + // Reinit with new length. + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 16000, kSidNormalIntervalUpdate, + kCNGNumParamsHigh)); + WebRtcCng_InitDec(cng_dec_inst_); + + // Expect 0 because of unstable parameters after switching length. + EXPECT_EQ(0, WebRtcCng_Encode(cng_enc_inst_, speech_data_, 160, sid_data, + &number_bytes, kForceSid)); + EXPECT_EQ(kCNGNumParamsHigh + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_ + 160, 160, sid_data, &number_bytes, + kForceSid)); + EXPECT_EQ(0, WebRtcCng_UpdateSid(cng_dec_inst_, sid_data, + kCNGNumParamsNormal + 1)); + + // Free encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_FreeEnc(cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_FreeDec(cng_dec_inst_)); +} + +// Update SID parameters, with wrong parameters or without calling decode. +TEST_F(CngTest, CngUpdateSidErroneous) { + uint8_t sid_data[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + size_t number_bytes; + + // Create encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_CreateEnc(&cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_CreateDec(&cng_dec_inst_)); + + // Encode. + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal)); + EXPECT_EQ(kCNGNumParamsNormal + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_, 160, sid_data, &number_bytes, kForceSid)); + + // Update Sid before initializing decoder. + EXPECT_EQ(-1, WebRtcCng_UpdateSid(cng_dec_inst_, sid_data, + kCNGNumParamsNormal + 1)); + EXPECT_EQ(6220, WebRtcCng_GetErrorCodeDec(cng_dec_inst_)); + + // Initialize decoder. + WebRtcCng_InitDec(cng_dec_inst_); + + // First run with valid parameters, then with too many CNG parameters. + // The function will operate correctly by only reading the maximum number of + // parameters, skipping the extra. + EXPECT_EQ(0, WebRtcCng_UpdateSid(cng_dec_inst_, sid_data, + kCNGNumParamsNormal + 1)); + EXPECT_EQ(0, WebRtcCng_UpdateSid(cng_dec_inst_, sid_data, + kCNGNumParamsTooHigh + 1)); + + // Free encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_FreeEnc(cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_FreeDec(cng_dec_inst_)); +} + +// Test to generate cng data, by forcing SID. Both normal and faulty condition. +TEST_F(CngTest, CngGenerate) { + uint8_t sid_data[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t out_data[640]; + size_t number_bytes; + + // Create and initialize encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_CreateEnc(&cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_CreateDec(&cng_dec_inst_)); + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal)); + WebRtcCng_InitDec(cng_dec_inst_); + + // Normal Encode. + EXPECT_EQ(kCNGNumParamsNormal + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_, 160, sid_data, &number_bytes, kForceSid)); + + // Normal UpdateSid. + EXPECT_EQ(0, WebRtcCng_UpdateSid(cng_dec_inst_, sid_data, + kCNGNumParamsNormal + 1)); + + // Two normal Generate, one with new_period. + EXPECT_EQ(0, WebRtcCng_Generate(cng_dec_inst_, out_data, 640, 1)); + EXPECT_EQ(0, WebRtcCng_Generate(cng_dec_inst_, out_data, 640, 0)); + + // Call Genereate with too much data. + EXPECT_EQ(-1, WebRtcCng_Generate(cng_dec_inst_, out_data, 641, 0)); + EXPECT_EQ(6140, WebRtcCng_GetErrorCodeDec(cng_dec_inst_)); + + // Free encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_FreeEnc(cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_FreeDec(cng_dec_inst_)); +} + +// Test automatic SID. +TEST_F(CngTest, CngAutoSid) { + uint8_t sid_data[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + size_t number_bytes; + + // Create and initialize encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_CreateEnc(&cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_CreateDec(&cng_dec_inst_)); + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 16000, kSidNormalIntervalUpdate, + kCNGNumParamsNormal)); + WebRtcCng_InitDec(cng_dec_inst_); + + // Normal Encode, 100 msec, where no SID data should be generated. + for (int i = 0; i < 10; i++) { + EXPECT_EQ(0, WebRtcCng_Encode(cng_enc_inst_, speech_data_, 160, sid_data, + &number_bytes, kNoSid)); + } + + // We have reached 100 msec, and SID data should be generated. + EXPECT_EQ(kCNGNumParamsNormal + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_, 160, sid_data, &number_bytes, kNoSid)); + + // Free encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_FreeEnc(cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_FreeDec(cng_dec_inst_)); +} + +// Test automatic SID, with very short interval. +TEST_F(CngTest, CngAutoSidShort) { + uint8_t sid_data[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + size_t number_bytes; + + // Create and initialize encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_CreateEnc(&cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_CreateDec(&cng_dec_inst_)); + EXPECT_EQ(0, WebRtcCng_InitEnc(cng_enc_inst_, 16000, kSidShortIntervalUpdate, + kCNGNumParamsNormal)); + WebRtcCng_InitDec(cng_dec_inst_); + + // First call will never generate SID, unless forced to. + EXPECT_EQ(0, WebRtcCng_Encode(cng_enc_inst_, speech_data_, 160, sid_data, + &number_bytes, kNoSid)); + + // Normal Encode, 100 msec, SID data should be generated all the time. + for (int i = 0; i < 10; i++) { + EXPECT_EQ(kCNGNumParamsNormal + 1, WebRtcCng_Encode( + cng_enc_inst_, speech_data_, 160, sid_data, &number_bytes, kNoSid)); + } + + // Free encoder and decoder memory. + EXPECT_EQ(0, WebRtcCng_FreeEnc(cng_enc_inst_)); + EXPECT_EQ(0, WebRtcCng_FreeDec(cng_dec_inst_)); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h b/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h new file mode 100644 index 0000000000..3ca9eb60f3 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/include/audio_encoder_cng.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_ + +#include <vector> + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/vad/include/vad.h" +#include "webrtc/modules/audio_coding/codecs/audio_encoder.h" +#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h" + +namespace webrtc { + +// Deleter for use with scoped_ptr. +struct CngInstDeleter { + void operator()(CNG_enc_inst* ptr) const { WebRtcCng_FreeEnc(ptr); } +}; + +class Vad; + +class AudioEncoderCng final : public AudioEncoder { + public: + struct Config { + bool IsOk() const; + + int num_channels = 1; + int payload_type = 13; + // Caller keeps ownership of the AudioEncoder object. + AudioEncoder* speech_encoder = nullptr; + Vad::Aggressiveness vad_mode = Vad::kVadNormal; + int sid_frame_interval_ms = 100; + int num_cng_coefficients = 8; + // The Vad pointer is mainly for testing. If a NULL pointer is passed, the + // AudioEncoderCng creates (and destroys) a Vad object internally. If an + // object is passed, the AudioEncoderCng assumes ownership of the Vad + // object. + Vad* vad = nullptr; + }; + + explicit AudioEncoderCng(const Config& config); + ~AudioEncoderCng() override; + + size_t MaxEncodedBytes() const override; + int SampleRateHz() const override; + int NumChannels() const override; + int RtpTimestampRateHz() const override; + size_t Num10MsFramesInNextPacket() const override; + size_t Max10MsFramesInAPacket() const override; + int GetTargetBitrate() const override; + EncodedInfo EncodeInternal(uint32_t rtp_timestamp, + const int16_t* audio, + size_t max_encoded_bytes, + uint8_t* encoded) override; + void Reset() override; + bool SetFec(bool enable) override; + bool SetDtx(bool enable) override; + bool SetApplication(Application application) override; + void SetMaxPlaybackRate(int frequency_hz) override; + void SetProjectedPacketLossRate(double fraction) override; + void SetTargetBitrate(int target_bps) override; + + private: + EncodedInfo EncodePassive(size_t frames_to_encode, + size_t max_encoded_bytes, + uint8_t* encoded); + EncodedInfo EncodeActive(size_t frames_to_encode, + size_t max_encoded_bytes, + uint8_t* encoded); + size_t SamplesPer10msFrame() const; + + AudioEncoder* speech_encoder_; + const int cng_payload_type_; + const int num_cng_coefficients_; + const int sid_frame_interval_ms_; + std::vector<int16_t> speech_buffer_; + std::vector<uint32_t> rtp_timestamps_; + bool last_frame_active_; + rtc::scoped_ptr<Vad> vad_; + rtc::scoped_ptr<CNG_enc_inst, CngInstDeleter> cng_inst_; + + RTC_DISALLOW_COPY_AND_ASSIGN(AudioEncoderCng); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_INCLUDE_AUDIO_ENCODER_CNG_H_ diff --git a/webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h b/webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h new file mode 100644 index 0000000000..35660c4c3c --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_MAIN_INCLUDE_WEBRTC_CNG_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_MAIN_INCLUDE_WEBRTC_CNG_H_ + +#include <stddef.h> +#include "webrtc/typedefs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define WEBRTC_CNG_MAX_LPC_ORDER 12 +#define WEBRTC_CNG_MAX_OUTSIZE_ORDER 640 + +/* Define Error codes. */ + +/* 6100 Encoder */ +#define CNG_ENCODER_NOT_INITIATED 6120 +#define CNG_DISALLOWED_LPC_ORDER 6130 +#define CNG_DISALLOWED_FRAME_SIZE 6140 +#define CNG_DISALLOWED_SAMPLING_FREQUENCY 6150 +/* 6200 Decoder */ +#define CNG_DECODER_NOT_INITIATED 6220 + +typedef struct WebRtcCngEncInst CNG_enc_inst; +typedef struct WebRtcCngDecInst CNG_dec_inst; + +/**************************************************************************** + * WebRtcCng_CreateEnc/Dec(...) + * + * These functions create an instance to the specified structure + * + * Input: + * - XXX_inst : Pointer to created instance that should be created + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcCng_CreateEnc(CNG_enc_inst** cng_inst); +int16_t WebRtcCng_CreateDec(CNG_dec_inst** cng_inst); + +/**************************************************************************** + * WebRtcCng_InitEnc/Dec(...) + * + * This function initializes a instance + * + * Input: + * - cng_inst : Instance that should be initialized + * + * - fs : 8000 for narrowband and 16000 for wideband + * - interval : generate SID data every interval ms + * - quality : Number of refl. coefs, maximum allowed is 12 + * + * Output: + * - cng_inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ + +int WebRtcCng_InitEnc(CNG_enc_inst* cng_inst, int fs, int16_t interval, + int16_t quality); +void WebRtcCng_InitDec(CNG_dec_inst* cng_inst); + +/**************************************************************************** + * WebRtcCng_FreeEnc/Dec(...) + * + * These functions frees the dynamic memory of a specified instance + * + * Input: + * - cng_inst : Pointer to created instance that should be freed + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcCng_FreeEnc(CNG_enc_inst* cng_inst); +int16_t WebRtcCng_FreeDec(CNG_dec_inst* cng_inst); + +/**************************************************************************** + * WebRtcCng_Encode(...) + * + * These functions analyzes background noise + * + * Input: + * - cng_inst : Pointer to created instance + * - speech : Signal to be analyzed + * - nrOfSamples : Size of speech vector + * - forceSID : not zero to force SID frame and reset + * + * Output: + * - bytesOut : Nr of bytes to transmit, might be 0 + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcCng_Encode(CNG_enc_inst* cng_inst, int16_t* speech, + size_t nrOfSamples, uint8_t* SIDdata, + size_t* bytesOut, int16_t forceSID); + +/**************************************************************************** + * WebRtcCng_UpdateSid(...) + * + * These functions updates the CN state, when a new SID packet arrives + * + * Input: + * - cng_inst : Pointer to created instance that should be freed + * - SID : SID packet, all headers removed + * - length : Length in bytes of SID packet + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcCng_UpdateSid(CNG_dec_inst* cng_inst, uint8_t* SID, + size_t length); + +/**************************************************************************** + * WebRtcCng_Generate(...) + * + * These functions generates CN data when needed + * + * Input: + * - cng_inst : Pointer to created instance that should be freed + * - outData : pointer to area to write CN data + * - nrOfSamples : How much data to generate + * - new_period : >0 if a new period of CNG, will reset history + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcCng_Generate(CNG_dec_inst* cng_inst, int16_t* outData, + size_t nrOfSamples, int16_t new_period); + +/***************************************************************************** + * WebRtcCng_GetErrorCodeEnc/Dec(...) + * + * This functions can be used to check the error code of a CNG instance. When + * a function returns -1 a error code will be set for that instance. The + * function below extract the code of the last error that occurred in the + * specified instance. + * + * Input: + * - CNG_inst : CNG enc/dec instance + * + * Return value : Error code + */ +int16_t WebRtcCng_GetErrorCodeEnc(CNG_enc_inst* cng_inst); +int16_t WebRtcCng_GetErrorCodeDec(CNG_dec_inst* cng_inst); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_MAIN_INCLUDE_WEBRTC_CNG_H_ diff --git a/webrtc/modules/audio_coding/codecs/cng/webrtc_cng.c b/webrtc/modules/audio_coding/codecs/cng/webrtc_cng.c new file mode 100644 index 0000000000..8dddc5c717 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/cng/webrtc_cng.c @@ -0,0 +1,603 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc_cng.h" + +#include <string.h> +#include <stdlib.h> + +#include "cng_helpfuns.h" +#include "signal_processing_library.h" + +typedef struct WebRtcCngDecoder_ { + uint32_t dec_seed; + int32_t dec_target_energy; + int32_t dec_used_energy; + int16_t dec_target_reflCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_used_reflCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_filtstate[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_filtstateLow[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_Efiltstate[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_EfiltstateLow[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t dec_order; + int16_t dec_target_scale_factor; /* Q29 */ + int16_t dec_used_scale_factor; /* Q29 */ + int16_t target_scale_factor; /* Q13 */ + int16_t errorcode; + int16_t initflag; +} WebRtcCngDecoder; + +typedef struct WebRtcCngEncoder_ { + size_t enc_nrOfCoefs; + int enc_sampfreq; + int16_t enc_interval; + int16_t enc_msSinceSID; + int32_t enc_Energy; + int16_t enc_reflCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int32_t enc_corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + uint32_t enc_seed; + int16_t errorcode; + int16_t initflag; +} WebRtcCngEncoder; + +const int32_t WebRtcCng_kDbov[94] = { + 1081109975, 858756178, 682134279, 541838517, 430397633, 341876992, + 271562548, 215709799, 171344384, 136103682, 108110997, 85875618, + 68213428, 54183852, 43039763, 34187699, 27156255, 21570980, + 17134438, 13610368, 10811100, 8587562, 6821343, 5418385, + 4303976, 3418770, 2715625, 2157098, 1713444, 1361037, + 1081110, 858756, 682134, 541839, 430398, 341877, + 271563, 215710, 171344, 136104, 108111, 85876, + 68213, 54184, 43040, 34188, 27156, 21571, + 17134, 13610, 10811, 8588, 6821, 5418, + 4304, 3419, 2716, 2157, 1713, 1361, + 1081, 859, 682, 542, 430, 342, + 272, 216, 171, 136, 108, 86, + 68, 54, 43, 34, 27, 22, + 17, 14, 11, 9, 7, 5, + 4, 3, 3, 2, 2, 1, + 1, 1, 1, 1 +}; + +const int16_t WebRtcCng_kCorrWindow[WEBRTC_CNG_MAX_LPC_ORDER] = { + 32702, 32636, 32570, 32505, 32439, 32374, + 32309, 32244, 32179, 32114, 32049, 31985 +}; + +/**************************************************************************** + * WebRtcCng_CreateEnc/Dec(...) + * + * These functions create an instance to the specified structure + * + * Input: + * - XXX_inst : Pointer to created instance that should be created + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcCng_CreateEnc(CNG_enc_inst** cng_inst) { + if (cng_inst != NULL) { + *cng_inst = (CNG_enc_inst*) malloc(sizeof(WebRtcCngEncoder)); + if (*cng_inst != NULL) { + (*(WebRtcCngEncoder**) cng_inst)->errorcode = 0; + (*(WebRtcCngEncoder**) cng_inst)->initflag = 0; + + /* Needed to get the right function pointers in SPLIB. */ + WebRtcSpl_Init(); + + return 0; + } else { + /* The memory could not be allocated. */ + return -1; + } + } else { + /* The input pointer is invalid (NULL). */ + return -1; + } +} + +int16_t WebRtcCng_CreateDec(CNG_dec_inst** cng_inst) { + if (cng_inst != NULL ) { + *cng_inst = (CNG_dec_inst*) malloc(sizeof(WebRtcCngDecoder)); + if (*cng_inst != NULL ) { + (*(WebRtcCngDecoder**) cng_inst)->errorcode = 0; + (*(WebRtcCngDecoder**) cng_inst)->initflag = 0; + + /* Needed to get the right function pointers in SPLIB. */ + WebRtcSpl_Init(); + + return 0; + } else { + /* The memory could not be allocated */ + return -1; + } + } else { + /* The input pointer is invalid (NULL). */ + return -1; + } +} + +/**************************************************************************** + * WebRtcCng_InitEnc/Dec(...) + * + * This function initializes a instance + * + * Input: + * - cng_inst : Instance that should be initialized + * + * - fs : 8000 for narrowband and 16000 for wideband + * - interval : generate SID data every interval ms + * - quality : TBD + * + * Output: + * - cng_inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcCng_InitEnc(CNG_enc_inst* cng_inst, int fs, int16_t interval, + int16_t quality) { + int i; + WebRtcCngEncoder* inst = (WebRtcCngEncoder*) cng_inst; + memset(inst, 0, sizeof(WebRtcCngEncoder)); + + /* Check LPC order */ + if (quality > WEBRTC_CNG_MAX_LPC_ORDER || quality <= 0) { + inst->errorcode = CNG_DISALLOWED_LPC_ORDER; + return -1; + } + + inst->enc_sampfreq = fs; + inst->enc_interval = interval; + inst->enc_nrOfCoefs = quality; + inst->enc_msSinceSID = 0; + inst->enc_seed = 7777; /* For debugging only. */ + inst->enc_Energy = 0; + for (i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER + 1); i++) { + inst->enc_reflCoefs[i] = 0; + inst->enc_corrVector[i] = 0; + } + inst->initflag = 1; + + return 0; +} + +void WebRtcCng_InitDec(CNG_dec_inst* cng_inst) { + int i; + + WebRtcCngDecoder* inst = (WebRtcCngDecoder*) cng_inst; + + memset(inst, 0, sizeof(WebRtcCngDecoder)); + inst->dec_seed = 7777; /* For debugging only. */ + inst->dec_order = 5; + inst->dec_target_scale_factor = 0; + inst->dec_used_scale_factor = 0; + for (i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER + 1); i++) { + inst->dec_filtstate[i] = 0; + inst->dec_target_reflCoefs[i] = 0; + inst->dec_used_reflCoefs[i] = 0; + } + inst->dec_target_reflCoefs[0] = 0; + inst->dec_used_reflCoefs[0] = 0; + inst->dec_used_energy = 0; + inst->initflag = 1; +} + +/**************************************************************************** + * WebRtcCng_FreeEnc/Dec(...) + * + * These functions frees the dynamic memory of a specified instance + * + * Input: + * - cng_inst : Pointer to created instance that should be freed + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcCng_FreeEnc(CNG_enc_inst* cng_inst) { + free(cng_inst); + return 0; +} + +int16_t WebRtcCng_FreeDec(CNG_dec_inst* cng_inst) { + free(cng_inst); + return 0; +} + +/**************************************************************************** + * WebRtcCng_Encode(...) + * + * These functions analyzes background noise + * + * Input: + * - cng_inst : Pointer to created instance + * - speech : Signal (noise) to be analyzed + * - nrOfSamples : Size of speech vector + * - bytesOut : Nr of bytes to transmit, might be 0 + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcCng_Encode(CNG_enc_inst* cng_inst, int16_t* speech, + size_t nrOfSamples, uint8_t* SIDdata, + size_t* bytesOut, int16_t forceSID) { + WebRtcCngEncoder* inst = (WebRtcCngEncoder*) cng_inst; + + int16_t arCoefs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int32_t corrVector[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t hanningW[WEBRTC_CNG_MAX_OUTSIZE_ORDER]; + int16_t ReflBeta = 19661; /* 0.6 in q15. */ + int16_t ReflBetaComp = 13107; /* 0.4 in q15. */ + int32_t outEnergy; + int outShifts; + size_t i; + int stab; + int acorrScale; + size_t index; + size_t ind, factor; + int32_t* bptr; + int32_t blo, bhi; + int16_t negate; + const int16_t* aptr; + int16_t speechBuf[WEBRTC_CNG_MAX_OUTSIZE_ORDER]; + + /* Check if encoder initiated. */ + if (inst->initflag != 1) { + inst->errorcode = CNG_ENCODER_NOT_INITIATED; + return -1; + } + + /* Check framesize. */ + if (nrOfSamples > WEBRTC_CNG_MAX_OUTSIZE_ORDER) { + inst->errorcode = CNG_DISALLOWED_FRAME_SIZE; + return -1; + } + + for (i = 0; i < nrOfSamples; i++) { + speechBuf[i] = speech[i]; + } + + factor = nrOfSamples; + + /* Calculate energy and a coefficients. */ + outEnergy = WebRtcSpl_Energy(speechBuf, nrOfSamples, &outShifts); + while (outShifts > 0) { + /* We can only do 5 shifts without destroying accuracy in + * division factor. */ + if (outShifts > 5) { + outEnergy <<= (outShifts - 5); + outShifts = 5; + } else { + factor /= 2; + outShifts--; + } + } + outEnergy = WebRtcSpl_DivW32W16(outEnergy, (int16_t)factor); + + if (outEnergy > 1) { + /* Create Hanning Window. */ + WebRtcSpl_GetHanningWindow(hanningW, nrOfSamples / 2); + for (i = 0; i < (nrOfSamples / 2); i++) + hanningW[nrOfSamples - i - 1] = hanningW[i]; + + WebRtcSpl_ElementwiseVectorMult(speechBuf, hanningW, speechBuf, nrOfSamples, + 14); + + WebRtcSpl_AutoCorrelation(speechBuf, nrOfSamples, inst->enc_nrOfCoefs, + corrVector, &acorrScale); + + if (*corrVector == 0) + *corrVector = WEBRTC_SPL_WORD16_MAX; + + /* Adds the bandwidth expansion. */ + aptr = WebRtcCng_kCorrWindow; + bptr = corrVector; + + /* (zzz) lpc16_1 = 17+1+820+2+2 = 842 (ordo2=700). */ + for (ind = 0; ind < inst->enc_nrOfCoefs; ind++) { + /* The below code multiplies the 16 b corrWindow values (Q15) with + * the 32 b corrvector (Q0) and shifts the result down 15 steps. */ + negate = *bptr < 0; + if (negate) + *bptr = -*bptr; + + blo = (int32_t) * aptr * (*bptr & 0xffff); + bhi = ((blo >> 16) & 0xffff) + + ((int32_t)(*aptr++) * ((*bptr >> 16) & 0xffff)); + blo = (blo & 0xffff) | ((bhi & 0xffff) << 16); + + *bptr = (((bhi >> 16) & 0x7fff) << 17) | ((uint32_t) blo >> 15); + if (negate) + *bptr = -*bptr; + bptr++; + } + /* End of bandwidth expansion. */ + + stab = WebRtcSpl_LevinsonDurbin(corrVector, arCoefs, refCs, + inst->enc_nrOfCoefs); + + if (!stab) { + /* Disregard from this frame */ + *bytesOut = 0; + return 0; + } + + } else { + for (i = 0; i < inst->enc_nrOfCoefs; i++) + refCs[i] = 0; + } + + if (forceSID) { + /* Read instantaneous values instead of averaged. */ + for (i = 0; i < inst->enc_nrOfCoefs; i++) + inst->enc_reflCoefs[i] = refCs[i]; + inst->enc_Energy = outEnergy; + } else { + /* Average history with new values. */ + for (i = 0; i < (inst->enc_nrOfCoefs); i++) { + inst->enc_reflCoefs[i] = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT( + inst->enc_reflCoefs[i], ReflBeta, 15); + inst->enc_reflCoefs[i] += (int16_t) WEBRTC_SPL_MUL_16_16_RSFT( + refCs[i], ReflBetaComp, 15); + } + inst->enc_Energy = (outEnergy >> 2) + (inst->enc_Energy >> 1) + + (inst->enc_Energy >> 2); + } + + if (inst->enc_Energy < 1) { + inst->enc_Energy = 1; + } + + if ((inst->enc_msSinceSID > (inst->enc_interval - 1)) || forceSID) { + + /* Search for best dbov value. */ + index = 0; + for (i = 1; i < 93; i++) { + /* Always round downwards. */ + if ((inst->enc_Energy - WebRtcCng_kDbov[i]) > 0) { + index = i; + break; + } + } + if ((i == 93) && (index == 0)) + index = 94; + SIDdata[0] = (uint8_t)index; + + /* Quantize coefficients with tweak for WebRtc implementation of RFC3389. */ + if (inst->enc_nrOfCoefs == WEBRTC_CNG_MAX_LPC_ORDER) { + for (i = 0; i < inst->enc_nrOfCoefs; i++) { + /* Q15 to Q7 with rounding. */ + SIDdata[i + 1] = ((inst->enc_reflCoefs[i] + 128) >> 8); + } + } else { + for (i = 0; i < inst->enc_nrOfCoefs; i++) { + /* Q15 to Q7 with rounding. */ + SIDdata[i + 1] = (127 + ((inst->enc_reflCoefs[i] + 128) >> 8)); + } + } + + inst->enc_msSinceSID = 0; + *bytesOut = inst->enc_nrOfCoefs + 1; + + inst->enc_msSinceSID += + (int16_t)((1000 * nrOfSamples) / inst->enc_sampfreq); + return (int)(inst->enc_nrOfCoefs + 1); + } else { + inst->enc_msSinceSID += + (int16_t)((1000 * nrOfSamples) / inst->enc_sampfreq); + *bytesOut = 0; + return 0; + } +} + +/**************************************************************************** + * WebRtcCng_UpdateSid(...) + * + * These functions updates the CN state, when a new SID packet arrives + * + * Input: + * - cng_inst : Pointer to created instance that should be freed + * - SID : SID packet, all headers removed + * - length : Length in bytes of SID packet + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcCng_UpdateSid(CNG_dec_inst* cng_inst, uint8_t* SID, + size_t length) { + + WebRtcCngDecoder* inst = (WebRtcCngDecoder*) cng_inst; + int16_t refCs[WEBRTC_CNG_MAX_LPC_ORDER]; + int32_t targetEnergy; + int i; + + if (inst->initflag != 1) { + inst->errorcode = CNG_DECODER_NOT_INITIATED; + return -1; + } + + /* Throw away reflection coefficients of higher order than we can handle. */ + if (length > (WEBRTC_CNG_MAX_LPC_ORDER + 1)) + length = WEBRTC_CNG_MAX_LPC_ORDER + 1; + + inst->dec_order = (int16_t)length - 1; + + if (SID[0] > 93) + SID[0] = 93; + targetEnergy = WebRtcCng_kDbov[SID[0]]; + /* Take down target energy to 75%. */ + targetEnergy = targetEnergy >> 1; + targetEnergy += targetEnergy >> 2; + + inst->dec_target_energy = targetEnergy; + + /* Reconstruct coeffs with tweak for WebRtc implementation of RFC3389. */ + if (inst->dec_order == WEBRTC_CNG_MAX_LPC_ORDER) { + for (i = 0; i < (inst->dec_order); i++) { + refCs[i] = SID[i + 1] << 8; /* Q7 to Q15*/ + inst->dec_target_reflCoefs[i] = refCs[i]; + } + } else { + for (i = 0; i < (inst->dec_order); i++) { + refCs[i] = (SID[i + 1] - 127) << 8; /* Q7 to Q15. */ + inst->dec_target_reflCoefs[i] = refCs[i]; + } + } + + for (i = (inst->dec_order); i < WEBRTC_CNG_MAX_LPC_ORDER; i++) { + refCs[i] = 0; + inst->dec_target_reflCoefs[i] = refCs[i]; + } + + return 0; +} + +/**************************************************************************** + * WebRtcCng_Generate(...) + * + * These functions generates CN data when needed + * + * Input: + * - cng_inst : Pointer to created instance that should be freed + * - outData : pointer to area to write CN data + * - nrOfSamples : How much data to generate + * + * Return value : 0 - Ok + * -1 - Error + */ +int16_t WebRtcCng_Generate(CNG_dec_inst* cng_inst, int16_t* outData, + size_t nrOfSamples, int16_t new_period) { + WebRtcCngDecoder* inst = (WebRtcCngDecoder*) cng_inst; + + size_t i; + int16_t excitation[WEBRTC_CNG_MAX_OUTSIZE_ORDER]; + int16_t low[WEBRTC_CNG_MAX_OUTSIZE_ORDER]; + int16_t lpPoly[WEBRTC_CNG_MAX_LPC_ORDER + 1]; + int16_t ReflBetaStd = 26214; /* 0.8 in q15. */ + int16_t ReflBetaCompStd = 6553; /* 0.2 in q15. */ + int16_t ReflBetaNewP = 19661; /* 0.6 in q15. */ + int16_t ReflBetaCompNewP = 13107; /* 0.4 in q15. */ + int16_t Beta, BetaC, tmp1, tmp2, tmp3; + int32_t targetEnergy; + int16_t En; + int16_t temp16; + + if (nrOfSamples > WEBRTC_CNG_MAX_OUTSIZE_ORDER) { + inst->errorcode = CNG_DISALLOWED_FRAME_SIZE; + return -1; + } + + if (new_period) { + inst->dec_used_scale_factor = inst->dec_target_scale_factor; + Beta = ReflBetaNewP; + BetaC = ReflBetaCompNewP; + } else { + Beta = ReflBetaStd; + BetaC = ReflBetaCompStd; + } + + /* Here we use a 0.5 weighting, should possibly be modified to 0.6. */ + tmp1 = inst->dec_used_scale_factor << 2; /* Q13->Q15 */ + tmp2 = inst->dec_target_scale_factor << 2; /* Q13->Q15 */ + tmp3 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(tmp1, Beta, 15); + tmp3 += (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(tmp2, BetaC, 15); + inst->dec_used_scale_factor = tmp3 >> 2; /* Q15->Q13 */ + + inst->dec_used_energy = inst->dec_used_energy >> 1; + inst->dec_used_energy += inst->dec_target_energy >> 1; + + /* Do the same for the reflection coeffs. */ + for (i = 0; i < WEBRTC_CNG_MAX_LPC_ORDER; i++) { + inst->dec_used_reflCoefs[i] = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT( + inst->dec_used_reflCoefs[i], Beta, 15); + inst->dec_used_reflCoefs[i] += (int16_t) WEBRTC_SPL_MUL_16_16_RSFT( + inst->dec_target_reflCoefs[i], BetaC, 15); + } + + /* Compute the polynomial coefficients. */ + WebRtcCng_K2a16(inst->dec_used_reflCoefs, WEBRTC_CNG_MAX_LPC_ORDER, lpPoly); + + + targetEnergy = inst->dec_used_energy; + + /* Calculate scaling factor based on filter energy. */ + En = 8192; /* 1.0 in Q13. */ + for (i = 0; i < (WEBRTC_CNG_MAX_LPC_ORDER); i++) { + + /* Floating point value for reference. + E *= 1.0 - (inst->dec_used_reflCoefs[i] / 32768.0) * + (inst->dec_used_reflCoefs[i] / 32768.0); + */ + + /* Same in fixed point. */ + /* K(i).^2 in Q15. */ + temp16 = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT( + inst->dec_used_reflCoefs[i], inst->dec_used_reflCoefs[i], 15); + /* 1 - K(i).^2 in Q15. */ + temp16 = 0x7fff - temp16; + En = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(En, temp16, 15); + } + + /* float scaling= sqrt(E * inst->dec_target_energy / (1 << 24)); */ + + /* Calculate sqrt(En * target_energy / excitation energy) */ + targetEnergy = WebRtcSpl_Sqrt(inst->dec_used_energy); + + En = (int16_t) WebRtcSpl_Sqrt(En) << 6; + En = (En * 3) >> 1; /* 1.5 estimates sqrt(2). */ + inst->dec_used_scale_factor = (int16_t)((En * targetEnergy) >> 12); + + /* Generate excitation. */ + /* Excitation energy per sample is 2.^24 - Q13 N(0,1). */ + for (i = 0; i < nrOfSamples; i++) { + excitation[i] = WebRtcSpl_RandN(&inst->dec_seed) >> 1; + } + + /* Scale to correct energy. */ + WebRtcSpl_ScaleVector(excitation, excitation, inst->dec_used_scale_factor, + nrOfSamples, 13); + + /* |lpPoly| - Coefficients in Q12. + * |excitation| - Speech samples. + * |nst->dec_filtstate| - State preservation. + * |outData| - Filtered speech samples. */ + WebRtcSpl_FilterAR(lpPoly, WEBRTC_CNG_MAX_LPC_ORDER + 1, excitation, + nrOfSamples, inst->dec_filtstate, WEBRTC_CNG_MAX_LPC_ORDER, + inst->dec_filtstateLow, WEBRTC_CNG_MAX_LPC_ORDER, outData, + low, nrOfSamples); + + return 0; +} + +/**************************************************************************** + * WebRtcCng_GetErrorCodeEnc/Dec(...) + * + * This functions can be used to check the error code of a CNG instance. When + * a function returns -1 a error code will be set for that instance. The + * function below extract the code of the last error that occured in the + * specified instance. + * + * Input: + * - CNG_inst : CNG enc/dec instance + * + * Return value : Error code + */ +int16_t WebRtcCng_GetErrorCodeEnc(CNG_enc_inst* cng_inst) { + /* Typecast pointer to real structure. */ + WebRtcCngEncoder* inst = (WebRtcCngEncoder*) cng_inst; + return inst->errorcode; +} + +int16_t WebRtcCng_GetErrorCodeDec(CNG_dec_inst* cng_inst) { + /* Typecast pointer to real structure. */ + WebRtcCngDecoder* inst = (WebRtcCngDecoder*) cng_inst; + return inst->errorcode; +} |