aboutsummaryrefslogtreecommitdiff
path: root/webrtc/modules/audio_coding/acm2/acm_receiver.cc
diff options
context:
space:
mode:
Diffstat (limited to 'webrtc/modules/audio_coding/acm2/acm_receiver.cc')
-rw-r--r--webrtc/modules/audio_coding/acm2/acm_receiver.cc541
1 files changed, 541 insertions, 0 deletions
diff --git a/webrtc/modules/audio_coding/acm2/acm_receiver.cc b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
new file mode 100644
index 0000000000..f45d5d3414
--- /dev/null
+++ b/webrtc/modules/audio_coding/acm2/acm_receiver.cc
@@ -0,0 +1,541 @@
+/*
+ * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_coding/acm2/acm_receiver.h"
+
+#include <stdlib.h> // malloc
+
+#include <algorithm> // sort
+#include <vector>
+
+#include "webrtc/base/checks.h"
+#include "webrtc/base/format_macros.h"
+#include "webrtc/base/logging.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_types.h"
+#include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
+#include "webrtc/modules/audio_coding/acm2/acm_resampler.h"
+#include "webrtc/modules/audio_coding/acm2/call_statistics.h"
+#include "webrtc/modules/audio_coding/neteq/include/neteq.h"
+#include "webrtc/system_wrappers/include/clock.h"
+#include "webrtc/system_wrappers/include/critical_section_wrapper.h"
+#include "webrtc/system_wrappers/include/tick_util.h"
+#include "webrtc/system_wrappers/include/trace.h"
+
+namespace webrtc {
+
+namespace acm2 {
+
+namespace {
+
+// |vad_activity_| field of |audio_frame| is set to |previous_audio_activity_|
+// before the call to this function.
+void SetAudioFrameActivityAndType(bool vad_enabled,
+ NetEqOutputType type,
+ AudioFrame* audio_frame) {
+ if (vad_enabled) {
+ switch (type) {
+ case kOutputNormal: {
+ audio_frame->vad_activity_ = AudioFrame::kVadActive;
+ audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
+ break;
+ }
+ case kOutputVADPassive: {
+ audio_frame->vad_activity_ = AudioFrame::kVadPassive;
+ audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
+ break;
+ }
+ case kOutputCNG: {
+ audio_frame->vad_activity_ = AudioFrame::kVadPassive;
+ audio_frame->speech_type_ = AudioFrame::kCNG;
+ break;
+ }
+ case kOutputPLC: {
+ // Don't change |audio_frame->vad_activity_|, it should be the same as
+ // |previous_audio_activity_|.
+ audio_frame->speech_type_ = AudioFrame::kPLC;
+ break;
+ }
+ case kOutputPLCtoCNG: {
+ audio_frame->vad_activity_ = AudioFrame::kVadPassive;
+ audio_frame->speech_type_ = AudioFrame::kPLCCNG;
+ break;
+ }
+ default:
+ assert(false);
+ }
+ } else {
+ // Always return kVadUnknown when receive VAD is inactive
+ audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
+ switch (type) {
+ case kOutputNormal: {
+ audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
+ break;
+ }
+ case kOutputCNG: {
+ audio_frame->speech_type_ = AudioFrame::kCNG;
+ break;
+ }
+ case kOutputPLC: {
+ audio_frame->speech_type_ = AudioFrame::kPLC;
+ break;
+ }
+ case kOutputPLCtoCNG: {
+ audio_frame->speech_type_ = AudioFrame::kPLCCNG;
+ break;
+ }
+ case kOutputVADPassive: {
+ // Normally, we should no get any VAD decision if post-decoding VAD is
+ // not active. However, if post-decoding VAD has been active then
+ // disabled, we might be here for couple of frames.
+ audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
+ LOG(WARNING) << "Post-decoding VAD is disabled but output is "
+ << "labeled VAD-passive";
+ break;
+ }
+ default:
+ assert(false);
+ }
+ }
+}
+
+// Is the given codec a CNG codec?
+// TODO(kwiberg): Move to RentACodec.
+bool IsCng(int codec_id) {
+ auto i = RentACodec::CodecIdFromIndex(codec_id);
+ return (i && (*i == RentACodec::CodecId::kCNNB ||
+ *i == RentACodec::CodecId::kCNWB ||
+ *i == RentACodec::CodecId::kCNSWB ||
+ *i == RentACodec::CodecId::kCNFB));
+}
+
+} // namespace
+
+AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
+ : crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
+ id_(config.id),
+ last_audio_decoder_(nullptr),
+ previous_audio_activity_(AudioFrame::kVadPassive),
+ audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
+ last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
+ neteq_(NetEq::Create(config.neteq_config)),
+ vad_enabled_(config.neteq_config.enable_post_decode_vad),
+ clock_(config.clock),
+ resampled_last_output_frame_(true) {
+ assert(clock_);
+ memset(audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
+ memset(last_audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
+}
+
+AcmReceiver::~AcmReceiver() {
+ delete neteq_;
+}
+
+int AcmReceiver::SetMinimumDelay(int delay_ms) {
+ if (neteq_->SetMinimumDelay(delay_ms))
+ return 0;
+ LOG(LERROR) << "AcmReceiver::SetExtraDelay " << delay_ms;
+ return -1;
+}
+
+int AcmReceiver::SetMaximumDelay(int delay_ms) {
+ if (neteq_->SetMaximumDelay(delay_ms))
+ return 0;
+ LOG(LERROR) << "AcmReceiver::SetExtraDelay " << delay_ms;
+ return -1;
+}
+
+int AcmReceiver::LeastRequiredDelayMs() const {
+ return neteq_->LeastRequiredDelayMs();
+}
+
+rtc::Optional<int> AcmReceiver::last_packet_sample_rate_hz() const {
+ CriticalSectionScoped lock(crit_sect_.get());
+ return last_packet_sample_rate_hz_;
+}
+
+int AcmReceiver::last_output_sample_rate_hz() const {
+ return neteq_->last_output_sample_rate_hz();
+}
+
+int AcmReceiver::InsertPacket(const WebRtcRTPHeader& rtp_header,
+ rtc::ArrayView<const uint8_t> incoming_payload) {
+ uint32_t receive_timestamp = 0;
+ const RTPHeader* header = &rtp_header.header; // Just a shorthand.
+
+ {
+ CriticalSectionScoped lock(crit_sect_.get());
+
+ const Decoder* decoder = RtpHeaderToDecoder(*header, incoming_payload[0]);
+ if (!decoder) {
+ LOG_F(LS_ERROR) << "Payload-type "
+ << static_cast<int>(header->payloadType)
+ << " is not registered.";
+ return -1;
+ }
+ const int sample_rate_hz = [&decoder] {
+ const auto ci = RentACodec::CodecIdFromIndex(decoder->acm_codec_id);
+ return ci ? RentACodec::CodecInstById(*ci)->plfreq : -1;
+ }();
+ receive_timestamp = NowInTimestamp(sample_rate_hz);
+
+ // If this is a CNG while the audio codec is not mono, skip pushing in
+ // packets into NetEq.
+ if (IsCng(decoder->acm_codec_id) && last_audio_decoder_ &&
+ last_audio_decoder_->channels > 1)
+ return 0;
+ if (!IsCng(decoder->acm_codec_id) &&
+ decoder->acm_codec_id !=
+ *RentACodec::CodecIndexFromId(RentACodec::CodecId::kAVT)) {
+ last_audio_decoder_ = decoder;
+ last_packet_sample_rate_hz_ = rtc::Optional<int>(decoder->sample_rate_hz);
+ }
+
+ } // |crit_sect_| is released.
+
+ if (neteq_->InsertPacket(rtp_header, incoming_payload, receive_timestamp) <
+ 0) {
+ LOG(LERROR) << "AcmReceiver::InsertPacket "
+ << static_cast<int>(header->payloadType)
+ << " Failed to insert packet";
+ return -1;
+ }
+ return 0;
+}
+
+int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
+ enum NetEqOutputType type;
+ size_t samples_per_channel;
+ size_t num_channels;
+
+ // Accessing members, take the lock.
+ CriticalSectionScoped lock(crit_sect_.get());
+
+ // Always write the output to |audio_buffer_| first.
+ if (neteq_->GetAudio(AudioFrame::kMaxDataSizeSamples,
+ audio_buffer_.get(),
+ &samples_per_channel,
+ &num_channels,
+ &type) != NetEq::kOK) {
+ LOG(LERROR) << "AcmReceiver::GetAudio - NetEq Failed.";
+ return -1;
+ }
+
+ const int current_sample_rate_hz = neteq_->last_output_sample_rate_hz();
+
+ // Update if resampling is required.
+ const bool need_resampling =
+ (desired_freq_hz != -1) && (current_sample_rate_hz != desired_freq_hz);
+
+ if (need_resampling && !resampled_last_output_frame_) {
+ // Prime the resampler with the last frame.
+ int16_t temp_output[AudioFrame::kMaxDataSizeSamples];
+ int samples_per_channel_int = resampler_.Resample10Msec(
+ last_audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
+ num_channels, AudioFrame::kMaxDataSizeSamples, temp_output);
+ if (samples_per_channel_int < 0) {
+ LOG(LERROR) << "AcmReceiver::GetAudio - "
+ "Resampling last_audio_buffer_ failed.";
+ return -1;
+ }
+ samples_per_channel = static_cast<size_t>(samples_per_channel_int);
+ }
+
+ // The audio in |audio_buffer_| is tansferred to |audio_frame_| below, either
+ // through resampling, or through straight memcpy.
+ // TODO(henrik.lundin) Glitches in the output may appear if the output rate
+ // from NetEq changes. See WebRTC issue 3923.
+ if (need_resampling) {
+ int samples_per_channel_int = resampler_.Resample10Msec(
+ audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
+ num_channels, AudioFrame::kMaxDataSizeSamples, audio_frame->data_);
+ if (samples_per_channel_int < 0) {
+ LOG(LERROR) << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
+ return -1;
+ }
+ samples_per_channel = static_cast<size_t>(samples_per_channel_int);
+ resampled_last_output_frame_ = true;
+ } else {
+ resampled_last_output_frame_ = false;
+ // We might end up here ONLY if codec is changed.
+ memcpy(audio_frame->data_,
+ audio_buffer_.get(),
+ samples_per_channel * num_channels * sizeof(int16_t));
+ }
+
+ // Swap buffers, so that the current audio is stored in |last_audio_buffer_|
+ // for next time.
+ audio_buffer_.swap(last_audio_buffer_);
+
+ audio_frame->num_channels_ = num_channels;
+ audio_frame->samples_per_channel_ = samples_per_channel;
+ audio_frame->sample_rate_hz_ = static_cast<int>(samples_per_channel * 100);
+
+ // Should set |vad_activity| before calling SetAudioFrameActivityAndType().
+ audio_frame->vad_activity_ = previous_audio_activity_;
+ SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame);
+ previous_audio_activity_ = audio_frame->vad_activity_;
+ call_stats_.DecodedByNetEq(audio_frame->speech_type_);
+
+ // Computes the RTP timestamp of the first sample in |audio_frame| from
+ // |GetPlayoutTimestamp|, which is the timestamp of the last sample of
+ // |audio_frame|.
+ uint32_t playout_timestamp = 0;
+ if (GetPlayoutTimestamp(&playout_timestamp)) {
+ audio_frame->timestamp_ = playout_timestamp -
+ static_cast<uint32_t>(audio_frame->samples_per_channel_);
+ } else {
+ // Remain 0 until we have a valid |playout_timestamp|.
+ audio_frame->timestamp_ = 0;
+ }
+
+ return 0;
+}
+
+int32_t AcmReceiver::AddCodec(int acm_codec_id,
+ uint8_t payload_type,
+ size_t channels,
+ int sample_rate_hz,
+ AudioDecoder* audio_decoder,
+ const std::string& name) {
+ const auto neteq_decoder = [acm_codec_id, channels]() -> NetEqDecoder {
+ if (acm_codec_id == -1)
+ return NetEqDecoder::kDecoderArbitrary; // External decoder.
+ const rtc::Optional<RentACodec::CodecId> cid =
+ RentACodec::CodecIdFromIndex(acm_codec_id);
+ RTC_DCHECK(cid) << "Invalid codec index: " << acm_codec_id;
+ const rtc::Optional<NetEqDecoder> ned =
+ RentACodec::NetEqDecoderFromCodecId(*cid, channels);
+ RTC_DCHECK(ned) << "Invalid codec ID: " << static_cast<int>(*cid);
+ return *ned;
+ }();
+
+ CriticalSectionScoped lock(crit_sect_.get());
+
+ // The corresponding NetEq decoder ID.
+ // If this codec has been registered before.
+ auto it = decoders_.find(payload_type);
+ if (it != decoders_.end()) {
+ const Decoder& decoder = it->second;
+ if (acm_codec_id != -1 && decoder.acm_codec_id == acm_codec_id &&
+ decoder.channels == channels &&
+ decoder.sample_rate_hz == sample_rate_hz) {
+ // Re-registering the same codec. Do nothing and return.
+ return 0;
+ }
+
+ // Changing codec. First unregister the old codec, then register the new
+ // one.
+ if (neteq_->RemovePayloadType(payload_type) != NetEq::kOK) {
+ LOG(LERROR) << "Cannot remove payload " << static_cast<int>(payload_type);
+ return -1;
+ }
+
+ decoders_.erase(it);
+ }
+
+ int ret_val;
+ if (!audio_decoder) {
+ ret_val = neteq_->RegisterPayloadType(neteq_decoder, name, payload_type);
+ } else {
+ ret_val = neteq_->RegisterExternalDecoder(
+ audio_decoder, neteq_decoder, name, payload_type, sample_rate_hz);
+ }
+ if (ret_val != NetEq::kOK) {
+ LOG(LERROR) << "AcmReceiver::AddCodec " << acm_codec_id
+ << static_cast<int>(payload_type)
+ << " channels: " << channels;
+ return -1;
+ }
+
+ Decoder decoder;
+ decoder.acm_codec_id = acm_codec_id;
+ decoder.payload_type = payload_type;
+ decoder.channels = channels;
+ decoder.sample_rate_hz = sample_rate_hz;
+ decoders_[payload_type] = decoder;
+ return 0;
+}
+
+void AcmReceiver::EnableVad() {
+ neteq_->EnableVad();
+ CriticalSectionScoped lock(crit_sect_.get());
+ vad_enabled_ = true;
+}
+
+void AcmReceiver::DisableVad() {
+ neteq_->DisableVad();
+ CriticalSectionScoped lock(crit_sect_.get());
+ vad_enabled_ = false;
+}
+
+void AcmReceiver::FlushBuffers() {
+ neteq_->FlushBuffers();
+}
+
+// If failed in removing one of the codecs, this method continues to remove as
+// many as it can.
+int AcmReceiver::RemoveAllCodecs() {
+ int ret_val = 0;
+ CriticalSectionScoped lock(crit_sect_.get());
+ for (auto it = decoders_.begin(); it != decoders_.end(); ) {
+ auto cur = it;
+ ++it; // it will be valid even if we erase cur
+ if (neteq_->RemovePayloadType(cur->second.payload_type) == 0) {
+ decoders_.erase(cur);
+ } else {
+ LOG_F(LS_ERROR) << "Cannot remove payload "
+ << static_cast<int>(cur->second.payload_type);
+ ret_val = -1;
+ }
+ }
+
+ // No codec is registered, invalidate last audio decoder.
+ last_audio_decoder_ = nullptr;
+ last_packet_sample_rate_hz_ = rtc::Optional<int>();
+ return ret_val;
+}
+
+int AcmReceiver::RemoveCodec(uint8_t payload_type) {
+ CriticalSectionScoped lock(crit_sect_.get());
+ auto it = decoders_.find(payload_type);
+ if (it == decoders_.end()) { // Such a payload-type is not registered.
+ return 0;
+ }
+ if (neteq_->RemovePayloadType(payload_type) != NetEq::kOK) {
+ LOG(LERROR) << "AcmReceiver::RemoveCodec" << static_cast<int>(payload_type);
+ return -1;
+ }
+ if (last_audio_decoder_ == &it->second) {
+ last_audio_decoder_ = nullptr;
+ last_packet_sample_rate_hz_ = rtc::Optional<int>();
+ }
+ decoders_.erase(it);
+ return 0;
+}
+
+void AcmReceiver::set_id(int id) {
+ CriticalSectionScoped lock(crit_sect_.get());
+ id_ = id;
+}
+
+bool AcmReceiver::GetPlayoutTimestamp(uint32_t* timestamp) {
+ return neteq_->GetPlayoutTimestamp(timestamp);
+}
+
+int AcmReceiver::LastAudioCodec(CodecInst* codec) const {
+ CriticalSectionScoped lock(crit_sect_.get());
+ if (!last_audio_decoder_) {
+ return -1;
+ }
+ *codec = *RentACodec::CodecInstById(
+ *RentACodec::CodecIdFromIndex(last_audio_decoder_->acm_codec_id));
+ codec->pltype = last_audio_decoder_->payload_type;
+ codec->channels = last_audio_decoder_->channels;
+ codec->plfreq = last_audio_decoder_->sample_rate_hz;
+ return 0;
+}
+
+void AcmReceiver::GetNetworkStatistics(NetworkStatistics* acm_stat) {
+ NetEqNetworkStatistics neteq_stat;
+ // NetEq function always returns zero, so we don't check the return value.
+ neteq_->NetworkStatistics(&neteq_stat);
+
+ acm_stat->currentBufferSize = neteq_stat.current_buffer_size_ms;
+ acm_stat->preferredBufferSize = neteq_stat.preferred_buffer_size_ms;
+ acm_stat->jitterPeaksFound = neteq_stat.jitter_peaks_found ? true : false;
+ acm_stat->currentPacketLossRate = neteq_stat.packet_loss_rate;
+ acm_stat->currentDiscardRate = neteq_stat.packet_discard_rate;
+ acm_stat->currentExpandRate = neteq_stat.expand_rate;
+ acm_stat->currentSpeechExpandRate = neteq_stat.speech_expand_rate;
+ acm_stat->currentPreemptiveRate = neteq_stat.preemptive_rate;
+ acm_stat->currentAccelerateRate = neteq_stat.accelerate_rate;
+ acm_stat->currentSecondaryDecodedRate = neteq_stat.secondary_decoded_rate;
+ acm_stat->clockDriftPPM = neteq_stat.clockdrift_ppm;
+ acm_stat->addedSamples = neteq_stat.added_zero_samples;
+ acm_stat->meanWaitingTimeMs = neteq_stat.mean_waiting_time_ms;
+ acm_stat->medianWaitingTimeMs = neteq_stat.median_waiting_time_ms;
+ acm_stat->minWaitingTimeMs = neteq_stat.min_waiting_time_ms;
+ acm_stat->maxWaitingTimeMs = neteq_stat.max_waiting_time_ms;
+}
+
+int AcmReceiver::DecoderByPayloadType(uint8_t payload_type,
+ CodecInst* codec) const {
+ CriticalSectionScoped lock(crit_sect_.get());
+ auto it = decoders_.find(payload_type);
+ if (it == decoders_.end()) {
+ LOG(LERROR) << "AcmReceiver::DecoderByPayloadType "
+ << static_cast<int>(payload_type);
+ return -1;
+ }
+ const Decoder& decoder = it->second;
+ *codec = *RentACodec::CodecInstById(
+ *RentACodec::CodecIdFromIndex(decoder.acm_codec_id));
+ codec->pltype = decoder.payload_type;
+ codec->channels = decoder.channels;
+ codec->plfreq = decoder.sample_rate_hz;
+ return 0;
+}
+
+int AcmReceiver::EnableNack(size_t max_nack_list_size) {
+ neteq_->EnableNack(max_nack_list_size);
+ return 0;
+}
+
+void AcmReceiver::DisableNack() {
+ neteq_->DisableNack();
+}
+
+std::vector<uint16_t> AcmReceiver::GetNackList(
+ int64_t round_trip_time_ms) const {
+ return neteq_->GetNackList(round_trip_time_ms);
+}
+
+void AcmReceiver::ResetInitialDelay() {
+ neteq_->SetMinimumDelay(0);
+ // TODO(turajs): Should NetEq Buffer be flushed?
+}
+
+const AcmReceiver::Decoder* AcmReceiver::RtpHeaderToDecoder(
+ const RTPHeader& rtp_header,
+ uint8_t payload_type) const {
+ auto it = decoders_.find(rtp_header.payloadType);
+ const auto red_index =
+ RentACodec::CodecIndexFromId(RentACodec::CodecId::kRED);
+ if (red_index && // This ensures that RED is defined in WebRTC.
+ it != decoders_.end() && it->second.acm_codec_id == *red_index) {
+ // This is a RED packet, get the payload of the audio codec.
+ it = decoders_.find(payload_type & 0x7F);
+ }
+
+ // Check if the payload is registered.
+ return it != decoders_.end() ? &it->second : nullptr;
+}
+
+uint32_t AcmReceiver::NowInTimestamp(int decoder_sampling_rate) const {
+ // Down-cast the time to (32-6)-bit since we only care about
+ // the least significant bits. (32-6) bits cover 2^(32-6) = 67108864 ms.
+ // We masked 6 most significant bits of 32-bit so there is no overflow in
+ // the conversion from milliseconds to timestamp.
+ const uint32_t now_in_ms = static_cast<uint32_t>(
+ clock_->TimeInMilliseconds() & 0x03ffffff);
+ return static_cast<uint32_t>(
+ (decoder_sampling_rate / 1000) * now_in_ms);
+}
+
+void AcmReceiver::GetDecodingCallStatistics(
+ AudioDecodingCallStats* stats) const {
+ CriticalSectionScoped lock(crit_sect_.get());
+ *stats = call_stats_.GetDecodingStatistics();
+}
+
+} // namespace acm2
+
+} // namespace webrtc