/* * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.h" #include // assert #include // pow() #include // memcpy() #include "webrtc/base/logging.h" #include "webrtc/base/trace_event.h" #include "webrtc/system_wrappers/include/critical_section_wrapper.h" namespace webrtc { RTPReceiverStrategy* RTPReceiverStrategy::CreateAudioStrategy( RtpData* data_callback, RtpAudioFeedback* incoming_messages_callback) { return new RTPReceiverAudio(data_callback, incoming_messages_callback); } RTPReceiverAudio::RTPReceiverAudio(RtpData* data_callback, RtpAudioFeedback* incoming_messages_callback) : RTPReceiverStrategy(data_callback), TelephoneEventHandler(), last_received_frequency_(8000), telephone_event_forward_to_decoder_(false), telephone_event_payload_type_(-1), cng_nb_payload_type_(-1), cng_wb_payload_type_(-1), cng_swb_payload_type_(-1), cng_fb_payload_type_(-1), cng_payload_type_(-1), g722_payload_type_(-1), last_received_g722_(false), num_energy_(0), current_remote_energy_(), cb_audio_feedback_(incoming_messages_callback) { last_payload_.Audio.channels = 1; memset(current_remote_energy_, 0, sizeof(current_remote_energy_)); } // Outband TelephoneEvent(DTMF) detection void RTPReceiverAudio::SetTelephoneEventForwardToDecoder( bool forward_to_decoder) { CriticalSectionScoped lock(crit_sect_.get()); telephone_event_forward_to_decoder_ = forward_to_decoder; } // Is forwarding of outband telephone events turned on/off? bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const { CriticalSectionScoped lock(crit_sect_.get()); return telephone_event_forward_to_decoder_; } bool RTPReceiverAudio::TelephoneEventPayloadType( int8_t payload_type) const { CriticalSectionScoped lock(crit_sect_.get()); return telephone_event_payload_type_ == payload_type; } bool RTPReceiverAudio::CNGPayloadType(int8_t payload_type, uint32_t* frequency, bool* cng_payload_type_has_changed) { CriticalSectionScoped lock(crit_sect_.get()); *cng_payload_type_has_changed = false; // We can have four CNG on 8000Hz, 16000Hz, 32000Hz and 48000Hz. if (cng_nb_payload_type_ == payload_type) { *frequency = 8000; if (cng_payload_type_ != -1 && cng_payload_type_ != cng_nb_payload_type_) *cng_payload_type_has_changed = true; cng_payload_type_ = cng_nb_payload_type_; return true; } else if (cng_wb_payload_type_ == payload_type) { // if last received codec is G.722 we must use frequency 8000 if (last_received_g722_) { *frequency = 8000; } else { *frequency = 16000; } if (cng_payload_type_ != -1 && cng_payload_type_ != cng_wb_payload_type_) *cng_payload_type_has_changed = true; cng_payload_type_ = cng_wb_payload_type_; return true; } else if (cng_swb_payload_type_ == payload_type) { *frequency = 32000; if ((cng_payload_type_ != -1) && (cng_payload_type_ != cng_swb_payload_type_)) *cng_payload_type_has_changed = true; cng_payload_type_ = cng_swb_payload_type_; return true; } else if (cng_fb_payload_type_ == payload_type) { *frequency = 48000; if (cng_payload_type_ != -1 && cng_payload_type_ != cng_fb_payload_type_) *cng_payload_type_has_changed = true; cng_payload_type_ = cng_fb_payload_type_; return true; } else { // not CNG if (g722_payload_type_ == payload_type) { last_received_g722_ = true; } else { last_received_g722_ = false; } } return false; } bool RTPReceiverAudio::ShouldReportCsrcChanges(uint8_t payload_type) const { // Don't do this for DTMF packets, otherwise it's fine. return !TelephoneEventPayloadType(payload_type); } // - Sample based or frame based codecs based on RFC 3551 // - // - NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples. // - The correct rate is 4 bits/sample. // - // - name of sampling default // - encoding sample/frame bits/sample rate ms/frame ms/packet // - // - Sample based audio codecs // - DVI4 sample 4 var. 20 // - G722 sample 4 16,000 20 // - G726-40 sample 5 8,000 20 // - G726-32 sample 4 8,000 20 // - G726-24 sample 3 8,000 20 // - G726-16 sample 2 8,000 20 // - L8 sample 8 var. 20 // - L16 sample 16 var. 20 // - PCMA sample 8 var. 20 // - PCMU sample 8 var. 20 // - // - Frame based audio codecs // - G723 frame N/A 8,000 30 30 // - G728 frame N/A 8,000 2.5 20 // - G729 frame N/A 8,000 10 20 // - G729D frame N/A 8,000 10 20 // - G729E frame N/A 8,000 10 20 // - GSM frame N/A 8,000 20 20 // - GSM-EFR frame N/A 8,000 20 20 // - LPC frame N/A 8,000 20 20 // - MPA frame N/A var. var. // - // - G7221 frame N/A int32_t RTPReceiverAudio::OnNewPayloadTypeCreated( const char payload_name[RTP_PAYLOAD_NAME_SIZE], int8_t payload_type, uint32_t frequency) { CriticalSectionScoped lock(crit_sect_.get()); if (RtpUtility::StringCompare(payload_name, "telephone-event", 15)) { telephone_event_payload_type_ = payload_type; } if (RtpUtility::StringCompare(payload_name, "cn", 2)) { // we can have three CNG on 8000Hz, 16000Hz and 32000Hz if (frequency == 8000) { cng_nb_payload_type_ = payload_type; } else if (frequency == 16000) { cng_wb_payload_type_ = payload_type; } else if (frequency == 32000) { cng_swb_payload_type_ = payload_type; } else if (frequency == 48000) { cng_fb_payload_type_ = payload_type; } else { assert(false); return -1; } } return 0; } int32_t RTPReceiverAudio::ParseRtpPacket(WebRtcRTPHeader* rtp_header, const PayloadUnion& specific_payload, bool is_red, const uint8_t* payload, size_t payload_length, int64_t timestamp_ms, bool is_first_packet) { TRACE_EVENT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"), "Audio::ParseRtp", "seqnum", rtp_header->header.sequenceNumber, "timestamp", rtp_header->header.timestamp); rtp_header->type.Audio.numEnergy = rtp_header->header.numCSRCs; num_energy_ = rtp_header->type.Audio.numEnergy; if (rtp_header->type.Audio.numEnergy > 0 && rtp_header->type.Audio.numEnergy <= kRtpCsrcSize) { memcpy(current_remote_energy_, rtp_header->type.Audio.arrOfEnergy, rtp_header->type.Audio.numEnergy); } return ParseAudioCodecSpecific(rtp_header, payload, payload_length, specific_payload.Audio, is_red); } int RTPReceiverAudio::GetPayloadTypeFrequency() const { CriticalSectionScoped lock(crit_sect_.get()); if (last_received_g722_) { return 8000; } return last_received_frequency_; } RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive( uint16_t last_payload_length) const { // Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check // kRtpNoRtp against NetEq speech_type kOutputPLCtoCNG. if (last_payload_length < 10) { // our CNG is 9 bytes return kRtpNoRtp; } else { return kRtpDead; } } void RTPReceiverAudio::CheckPayloadChanged(int8_t payload_type, PayloadUnion* specific_payload, bool* should_discard_changes) { *should_discard_changes = false; if (TelephoneEventPayloadType(payload_type)) { // Don't do callbacks for DTMF packets. *should_discard_changes = true; return; } // frequency is updated for CNG bool cng_payload_type_has_changed = false; bool is_cng_payload_type = CNGPayloadType(payload_type, &specific_payload->Audio.frequency, &cng_payload_type_has_changed); if (is_cng_payload_type) { // Don't do callbacks for DTMF packets. *should_discard_changes = true; return; } } int RTPReceiverAudio::Energy(uint8_t array_of_energy[kRtpCsrcSize]) const { CriticalSectionScoped cs(crit_sect_.get()); assert(num_energy_ <= kRtpCsrcSize); if (num_energy_ > 0) { memcpy(array_of_energy, current_remote_energy_, sizeof(uint8_t) * num_energy_); } return num_energy_; } int32_t RTPReceiverAudio::InvokeOnInitializeDecoder( RtpFeedback* callback, int8_t payload_type, const char payload_name[RTP_PAYLOAD_NAME_SIZE], const PayloadUnion& specific_payload) const { if (-1 == callback->OnInitializeDecoder( payload_type, payload_name, specific_payload.Audio.frequency, specific_payload.Audio.channels, specific_payload.Audio.rate)) { LOG(LS_ERROR) << "Failed to create decoder for payload type: " << payload_name << "/" << static_cast(payload_type); return -1; } return 0; } // We are not allowed to have any critsects when calling data_callback. int32_t RTPReceiverAudio::ParseAudioCodecSpecific( WebRtcRTPHeader* rtp_header, const uint8_t* payload_data, size_t payload_length, const AudioPayload& audio_specific, bool is_red) { if (payload_length == 0) { return 0; } bool telephone_event_packet = TelephoneEventPayloadType(rtp_header->header.payloadType); if (telephone_event_packet) { CriticalSectionScoped lock(crit_sect_.get()); // RFC 4733 2.3 // 0 1 2 3 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | event |E|R| volume | duration | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // if (payload_length % 4 != 0) { return -1; } size_t number_of_events = payload_length / 4; // sanity if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) { number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS; } for (size_t n = 0; n < number_of_events; ++n) { bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false; std::set::iterator event = telephone_event_reported_.find(payload_data[4 * n]); if (event != telephone_event_reported_.end()) { // we have already seen this event if (end) { telephone_event_reported_.erase(payload_data[4 * n]); } } else { if (end) { // don't add if it's a end of a tone } else { telephone_event_reported_.insert(payload_data[4 * n]); } } } // RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events // should not be a problem since we don't care about the duration // RFC 4733 See 2.5.1.5. & 2.5.2.4. Multiple Events in a Packet } { CriticalSectionScoped lock(crit_sect_.get()); if (!telephone_event_packet) { last_received_frequency_ = audio_specific.frequency; } // Check if this is a CNG packet, receiver might want to know uint32_t ignored; bool also_ignored; if (CNGPayloadType(rtp_header->header.payloadType, &ignored, &also_ignored)) { rtp_header->type.Audio.isCNG = true; rtp_header->frameType = kAudioFrameCN; } else { rtp_header->frameType = kAudioFrameSpeech; rtp_header->type.Audio.isCNG = false; } // check if it's a DTMF event, hence something we can playout if (telephone_event_packet) { if (!telephone_event_forward_to_decoder_) { // don't forward event to decoder return 0; } std::set::iterator first = telephone_event_reported_.begin(); if (first != telephone_event_reported_.end() && *first > 15) { // don't forward non DTMF events return 0; } } } // TODO(holmer): Break this out to have RED parsing handled generically. if (is_red && !(payload_data[0] & 0x80)) { // we recive only one frame packed in a RED packet remove the RED wrapper rtp_header->header.payloadType = payload_data[0]; // only one frame in the RED strip the one byte to help NetEq return data_callback_->OnReceivedPayloadData( payload_data + 1, payload_length - 1, rtp_header); } rtp_header->type.Audio.channel = audio_specific.channels; return data_callback_->OnReceivedPayloadData( payload_data, payload_length, rtp_header); } } // namespace webrtc