diff options
Diffstat (limited to 'media/cast/audio_sender/audio_encoder.cc')
-rw-r--r-- | media/cast/audio_sender/audio_encoder.cc | 367 |
1 files changed, 242 insertions, 125 deletions
diff --git a/media/cast/audio_sender/audio_encoder.cc b/media/cast/audio_sender/audio_encoder.cc index 3cfca0dfc8..a82d1de39a 100644 --- a/media/cast/audio_sender/audio_encoder.cc +++ b/media/cast/audio_sender/audio_encoder.cc @@ -4,171 +4,288 @@ #include "media/cast/audio_sender/audio_encoder.h" +#include <algorithm> + #include "base/bind.h" +#include "base/bind_helpers.h" #include "base/logging.h" #include "base/message_loop/message_loop.h" +#include "base/sys_byteorder.h" +#include "base/time/time.h" +#include "media/base/audio_bus.h" #include "media/cast/cast_defines.h" #include "media/cast/cast_environment.h" -#include "third_party/webrtc/modules/audio_coding/main/interface/audio_coding_module.h" -#include "third_party/webrtc/modules/interface/module_common_types.h" +#include "third_party/opus/src/include/opus.h" namespace media { namespace cast { -// 48KHz, 2 channels and 100 ms. -static const int kMaxNumberOfSamples = 48 * 2 * 100; +void LogAudioEncodedEvent(CastEnvironment* const cast_environment, + const base::TimeTicks& recorded_time) { + // TODO(mikhal): Resolve timestamp calculation for audio. + cast_environment->Logging()->InsertFrameEvent(kAudioFrameEncoded, + GetVideoRtpTimestamp(recorded_time), kFrameIdUnknown); +} -// This class is only called from the cast audio encoder thread. -class WebrtEncodedDataCallback : public webrtc::AudioPacketizationCallback { +// Base class that handles the common problem of feeding one or more AudioBus' +// data into a 10 ms buffer and then, once the buffer is full, encoding the +// signal and emitting an EncodedAudioFrame via the FrameEncodedCallback. +// +// Subclasses complete the implementation by handling the actual encoding +// details. +class AudioEncoder::ImplBase { public: - WebrtEncodedDataCallback(scoped_refptr<CastEnvironment> cast_environment, - AudioCodec codec, - int frequency) - : codec_(codec), - frequency_(frequency), - cast_environment_(cast_environment), - last_timestamp_(0) {} - - virtual int32 SendData( - webrtc::FrameType /*frame_type*/, - uint8 /*payload_type*/, - uint32 timestamp, - const uint8* payload_data, - uint16 payload_size, - const webrtc::RTPFragmentationHeader* /*fragmentation*/) OVERRIDE { - scoped_ptr<EncodedAudioFrame> audio_frame(new EncodedAudioFrame()); - audio_frame->codec = codec_; - audio_frame->samples = timestamp - last_timestamp_; - DCHECK(audio_frame->samples <= kMaxNumberOfSamples); - last_timestamp_ = timestamp; - audio_frame->data.insert(audio_frame->data.begin(), - payload_data, - payload_data + payload_size); + ImplBase(CastEnvironment* cast_environment, + AudioCodec codec, int num_channels, int sampling_rate, + const FrameEncodedCallback& callback) + : cast_environment_(cast_environment), + codec_(codec), num_channels_(num_channels), + samples_per_10ms_(sampling_rate / 100), + callback_(callback), + buffer_fill_end_(0), + frame_id_(0) { + CHECK_GT(num_channels_, 0); + CHECK_GT(samples_per_10ms_, 0); + CHECK_EQ(sampling_rate % 100, 0); + CHECK_LE(samples_per_10ms_ * num_channels_, + EncodedAudioFrame::kMaxNumberOfSamples); + } - cast_environment_->PostTask(CastEnvironment::MAIN, FROM_HERE, - base::Bind(*frame_encoded_callback_, base::Passed(&audio_frame), - recorded_time_)); - return 0; + virtual ~ImplBase() {} + + void EncodeAudio(const AudioBus* audio_bus, + const base::TimeTicks& recorded_time, + const base::Closure& done_callback) { + int src_pos = 0; + while (src_pos < audio_bus->frames()) { + const int num_samples_to_xfer = + std::min(samples_per_10ms_ - buffer_fill_end_, + audio_bus->frames() - src_pos); + DCHECK_EQ(audio_bus->channels(), num_channels_); + TransferSamplesIntoBuffer( + audio_bus, src_pos, buffer_fill_end_, num_samples_to_xfer); + src_pos += num_samples_to_xfer; + buffer_fill_end_ += num_samples_to_xfer; + + if (src_pos == audio_bus->frames()) { + cast_environment_->PostTask(CastEnvironment::MAIN, FROM_HERE, + done_callback); + // Note: |audio_bus| is now invalid.. + } + + if (buffer_fill_end_ == samples_per_10ms_) { + scoped_ptr<EncodedAudioFrame> audio_frame(new EncodedAudioFrame()); + audio_frame->codec = codec_; + audio_frame->frame_id = frame_id_++; + audio_frame->samples = samples_per_10ms_; + if (EncodeFromFilledBuffer(&audio_frame->data)) { + // Compute an offset to determine the recorded time for the first + // audio sample in the buffer. + const base::TimeDelta buffer_time_offset = + (buffer_fill_end_ - src_pos) * + base::TimeDelta::FromMilliseconds(10) / samples_per_10ms_; + // TODO(miu): Consider batching EncodedAudioFrames so we only post a + // at most one task for each call to this method. + cast_environment_->PostTask( + CastEnvironment::MAIN, FROM_HERE, + base::Bind(callback_, base::Passed(&audio_frame), + recorded_time - buffer_time_offset)); + } + buffer_fill_end_ = 0; + } + } } - void SetEncodedCallbackInfo( - const base::TimeTicks& recorded_time, - const AudioEncoder::FrameEncodedCallback* frame_encoded_callback) { - recorded_time_ = recorded_time; - frame_encoded_callback_ = frame_encoded_callback; + protected: + virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, + int source_offset, + int buffer_fill_offset, + int num_samples) = 0; + virtual bool EncodeFromFilledBuffer(std::string* out) = 0; + + CastEnvironment* const cast_environment_; + const AudioCodec codec_; + const int num_channels_; + const int samples_per_10ms_; + const FrameEncodedCallback callback_; + + private: + // In the case where a call to EncodeAudio() cannot completely fill the + // buffer, this points to the position at which to populate data in a later + // call. + int buffer_fill_end_; + + // A counter used to label EncodedAudioFrames. + uint32 frame_id_; + + private: + DISALLOW_COPY_AND_ASSIGN(ImplBase); +}; + +class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase { + public: + OpusImpl(CastEnvironment* cast_environment, + int num_channels, int sampling_rate, int bitrate, + const FrameEncodedCallback& callback) + : ImplBase(cast_environment, kOpus, num_channels, sampling_rate, + callback), + encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]), + opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())), + buffer_(new float[num_channels * samples_per_10ms_]) { + CHECK_EQ(opus_encoder_init(opus_encoder_, sampling_rate, num_channels, + OPUS_APPLICATION_AUDIO), + OPUS_OK); + if (bitrate <= 0) { + // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a + // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms + // frame size. The opus library authors may, of course, adjust this in + // later versions. + bitrate = OPUS_AUTO; + } + CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)), + OPUS_OK); } + virtual ~OpusImpl() {} + private: - const AudioCodec codec_; - const int frequency_; - scoped_refptr<CastEnvironment> cast_environment_; - uint32 last_timestamp_; - base::TimeTicks recorded_time_; - const AudioEncoder::FrameEncodedCallback* frame_encoded_callback_; + virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, + int source_offset, + int buffer_fill_offset, + int num_samples) OVERRIDE { + // Opus requires channel-interleaved samples in a single array. + for (int ch = 0; ch < audio_bus->channels(); ++ch) { + const float* src = audio_bus->channel(ch) + source_offset; + const float* const src_end = src + num_samples; + float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch; + for (; src < src_end; ++src, dest += num_channels_) + *dest = *src; + } + } + + virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE { + out->resize(kOpusMaxPayloadSize); + const opus_int32 result = opus_encode_float( + opus_encoder_, buffer_.get(), samples_per_10ms_, + reinterpret_cast<uint8*>(&out->at(0)), kOpusMaxPayloadSize); + if (result > 1) { + out->resize(result); + return true; + } else if (result < 0) { + LOG(ERROR) << "Error code from opus_encode_float(): " << result; + return false; + } else { + // Do nothing: The documentation says that a return value of zero or + // one byte means the packet does not need to be transmitted. + return false; + } + } + + const scoped_ptr<uint8[]> encoder_memory_; + OpusEncoder* const opus_encoder_; + const scoped_ptr<float[]> buffer_; + + // This is the recommended value, according to documentation in + // third_party/opus/src/include/opus.h, so that the Opus encoder does not + // degrade the audio due to memory constraints. + // + // Note: Whereas other RTP implementations do not, the cast library is + // perfectly capable of transporting larger than MTU-sized audio frames. + static const int kOpusMaxPayloadSize = 4000; + + DISALLOW_COPY_AND_ASSIGN(OpusImpl); }; -AudioEncoder::AudioEncoder(scoped_refptr<CastEnvironment> cast_environment, - const AudioSenderConfig& audio_config) - : cast_environment_(cast_environment), - audio_encoder_(webrtc::AudioCodingModule::Create(0)), - webrtc_encoder_callback_( - new WebrtEncodedDataCallback(cast_environment, audio_config.codec, - audio_config.frequency)), - timestamp_(0) { // Must start at 0; used above. - if (audio_encoder_->InitializeSender() != 0) { - DCHECK(false) << "Invalid webrtc return value"; +class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase { + public: + Pcm16Impl(CastEnvironment* cast_environment, + int num_channels, int sampling_rate, + const FrameEncodedCallback& callback) + : ImplBase(cast_environment, kPcm16, num_channels, sampling_rate, + callback), + buffer_(new int16[num_channels * samples_per_10ms_]) {} + + virtual ~Pcm16Impl() {} + + private: + virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus, + int source_offset, + int buffer_fill_offset, + int num_samples) OVERRIDE { + audio_bus->ToInterleavedPartial( + source_offset, num_samples, sizeof(int16), + buffer_.get() + buffer_fill_offset * num_channels_); } - if (audio_encoder_->RegisterTransportCallback( - webrtc_encoder_callback_.get()) != 0) { - DCHECK(false) << "Invalid webrtc return value"; + + virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE { + // Output 16-bit PCM integers in big-endian byte order. + out->resize(num_channels_ * samples_per_10ms_ * sizeof(int16)); + const int16* src = buffer_.get(); + const int16* const src_end = src + num_channels_ * samples_per_10ms_; + uint16* dest = reinterpret_cast<uint16*>(&out->at(0)); + for (; src < src_end; ++src, ++dest) + *dest = base::HostToNet16(*src); + return true; } - webrtc::CodecInst send_codec; - send_codec.pltype = audio_config.rtp_payload_type; - send_codec.plfreq = audio_config.frequency; - send_codec.channels = audio_config.channels; + + private: + const scoped_ptr<int16[]> buffer_; + + DISALLOW_COPY_AND_ASSIGN(Pcm16Impl); +}; + +AudioEncoder::AudioEncoder( + const scoped_refptr<CastEnvironment>& cast_environment, + const AudioSenderConfig& audio_config, + const FrameEncodedCallback& frame_encoded_callback) + : cast_environment_(cast_environment) { + // Note: It doesn't matter which thread constructs AudioEncoder, just so long + // as all calls to InsertAudio() are by the same thread. + insert_thread_checker_.DetachFromThread(); switch (audio_config.codec) { case kOpus: - strncpy(send_codec.plname, "opus", sizeof(send_codec.plname)); - send_codec.pacsize = audio_config.frequency / 50; // 20 ms - send_codec.rate = audio_config.bitrate; // 64000 + impl_.reset(new OpusImpl( + cast_environment, audio_config.channels, audio_config.frequency, + audio_config.bitrate, frame_encoded_callback)); break; case kPcm16: - strncpy(send_codec.plname, "L16", sizeof(send_codec.plname)); - send_codec.pacsize = audio_config.frequency / 100; // 10 ms - // TODO(pwestin) bug in webrtc; it should take audio_config.channels into - // account. - send_codec.rate = 8 * 2 * audio_config.frequency; + impl_.reset(new Pcm16Impl( + cast_environment, audio_config.channels, audio_config.frequency, + frame_encoded_callback)); break; default: - DCHECK(false) << "Codec must be specified for audio encoder"; - return; - } - if (audio_encoder_->RegisterSendCodec(send_codec) != 0) { - DCHECK(false) << "Invalid webrtc return value; failed to register codec"; + NOTREACHED() << "Unsupported or unspecified codec for audio encoder"; + break; } } AudioEncoder::~AudioEncoder() {} -// Called from main cast thread. -void AudioEncoder::InsertRawAudioFrame( - const PcmAudioFrame* audio_frame, +void AudioEncoder::InsertAudio( + const AudioBus* audio_bus, const base::TimeTicks& recorded_time, - const FrameEncodedCallback& frame_encoded_callback, - const base::Closure release_callback) { + const base::Closure& done_callback) { + DCHECK(insert_thread_checker_.CalledOnValidThread()); + if (!impl_) { + NOTREACHED(); + cast_environment_->PostTask(CastEnvironment::MAIN, FROM_HERE, + done_callback); + return; + } cast_environment_->PostTask(CastEnvironment::AUDIO_ENCODER, FROM_HERE, - base::Bind(&AudioEncoder::EncodeAudioFrameThread, this, audio_frame, - recorded_time, frame_encoded_callback, release_callback)); + base::Bind(&AudioEncoder::EncodeAudio, this, audio_bus, recorded_time, + done_callback)); } -// Called from cast audio encoder thread. -void AudioEncoder::EncodeAudioFrameThread( - const PcmAudioFrame* audio_frame, +void AudioEncoder::EncodeAudio( + const AudioBus* audio_bus, const base::TimeTicks& recorded_time, - const FrameEncodedCallback& frame_encoded_callback, - const base::Closure release_callback) { + const base::Closure& done_callback) { DCHECK(cast_environment_->CurrentlyOn(CastEnvironment::AUDIO_ENCODER)); - size_t samples_per_10ms = audio_frame->frequency / 100; - size_t number_of_10ms_blocks = audio_frame->samples.size() / - (samples_per_10ms * audio_frame->channels); - DCHECK(webrtc::AudioFrame::kMaxDataSizeSamples > samples_per_10ms) - << "webrtc sanity check failed"; - - for (size_t i = 0; i < number_of_10ms_blocks; ++i) { - webrtc::AudioFrame webrtc_audio_frame; - webrtc_audio_frame.timestamp_ = timestamp_; - - // Due to the webrtc::AudioFrame declaration we need to copy our data into - // the webrtc structure. - memcpy(&webrtc_audio_frame.data_[0], - &audio_frame->samples[i * samples_per_10ms * audio_frame->channels], - samples_per_10ms * audio_frame->channels * sizeof(int16)); - - // The webrtc API is int and we have a size_t; the cast should never be an - // issue since the normal values are in the 480 range. - DCHECK_GE(1000u, samples_per_10ms); - webrtc_audio_frame.samples_per_channel_ = - static_cast<int>(samples_per_10ms); - webrtc_audio_frame.sample_rate_hz_ = audio_frame->frequency; - webrtc_audio_frame.num_channels_ = audio_frame->channels; - - // webrtc::AudioCodingModule is thread safe. - if (audio_encoder_->Add10MsData(webrtc_audio_frame) != 0) { - DCHECK(false) << "Invalid webrtc return value"; - } - timestamp_ += static_cast<uint32>(samples_per_10ms); - } - // We are done with the audio frame release it. + impl_->EncodeAudio(audio_bus, recorded_time, done_callback); cast_environment_->PostTask(CastEnvironment::MAIN, FROM_HERE, - release_callback); - - // Note: Not all insert of 10 ms will generate a callback with encoded data. - webrtc_encoder_callback_->SetEncodedCallbackInfo(recorded_time, - &frame_encoded_callback); - for (size_t i = 0; i < number_of_10ms_blocks; ++i) { - audio_encoder_->Process(); - } + base::Bind(LogAudioEncodedEvent, cast_environment_, recorded_time)); } } // namespace cast |