summaryrefslogtreecommitdiff
path: root/media/cast/audio_sender/audio_encoder.cc
diff options
context:
space:
mode:
Diffstat (limited to 'media/cast/audio_sender/audio_encoder.cc')
-rw-r--r--media/cast/audio_sender/audio_encoder.cc367
1 files changed, 242 insertions, 125 deletions
diff --git a/media/cast/audio_sender/audio_encoder.cc b/media/cast/audio_sender/audio_encoder.cc
index 3cfca0dfc8..a82d1de39a 100644
--- a/media/cast/audio_sender/audio_encoder.cc
+++ b/media/cast/audio_sender/audio_encoder.cc
@@ -4,171 +4,288 @@
#include "media/cast/audio_sender/audio_encoder.h"
+#include <algorithm>
+
#include "base/bind.h"
+#include "base/bind_helpers.h"
#include "base/logging.h"
#include "base/message_loop/message_loop.h"
+#include "base/sys_byteorder.h"
+#include "base/time/time.h"
+#include "media/base/audio_bus.h"
#include "media/cast/cast_defines.h"
#include "media/cast/cast_environment.h"
-#include "third_party/webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
-#include "third_party/webrtc/modules/interface/module_common_types.h"
+#include "third_party/opus/src/include/opus.h"
namespace media {
namespace cast {
-// 48KHz, 2 channels and 100 ms.
-static const int kMaxNumberOfSamples = 48 * 2 * 100;
+void LogAudioEncodedEvent(CastEnvironment* const cast_environment,
+ const base::TimeTicks& recorded_time) {
+ // TODO(mikhal): Resolve timestamp calculation for audio.
+ cast_environment->Logging()->InsertFrameEvent(kAudioFrameEncoded,
+ GetVideoRtpTimestamp(recorded_time), kFrameIdUnknown);
+}
-// This class is only called from the cast audio encoder thread.
-class WebrtEncodedDataCallback : public webrtc::AudioPacketizationCallback {
+// Base class that handles the common problem of feeding one or more AudioBus'
+// data into a 10 ms buffer and then, once the buffer is full, encoding the
+// signal and emitting an EncodedAudioFrame via the FrameEncodedCallback.
+//
+// Subclasses complete the implementation by handling the actual encoding
+// details.
+class AudioEncoder::ImplBase {
public:
- WebrtEncodedDataCallback(scoped_refptr<CastEnvironment> cast_environment,
- AudioCodec codec,
- int frequency)
- : codec_(codec),
- frequency_(frequency),
- cast_environment_(cast_environment),
- last_timestamp_(0) {}
-
- virtual int32 SendData(
- webrtc::FrameType /*frame_type*/,
- uint8 /*payload_type*/,
- uint32 timestamp,
- const uint8* payload_data,
- uint16 payload_size,
- const webrtc::RTPFragmentationHeader* /*fragmentation*/) OVERRIDE {
- scoped_ptr<EncodedAudioFrame> audio_frame(new EncodedAudioFrame());
- audio_frame->codec = codec_;
- audio_frame->samples = timestamp - last_timestamp_;
- DCHECK(audio_frame->samples <= kMaxNumberOfSamples);
- last_timestamp_ = timestamp;
- audio_frame->data.insert(audio_frame->data.begin(),
- payload_data,
- payload_data + payload_size);
+ ImplBase(CastEnvironment* cast_environment,
+ AudioCodec codec, int num_channels, int sampling_rate,
+ const FrameEncodedCallback& callback)
+ : cast_environment_(cast_environment),
+ codec_(codec), num_channels_(num_channels),
+ samples_per_10ms_(sampling_rate / 100),
+ callback_(callback),
+ buffer_fill_end_(0),
+ frame_id_(0) {
+ CHECK_GT(num_channels_, 0);
+ CHECK_GT(samples_per_10ms_, 0);
+ CHECK_EQ(sampling_rate % 100, 0);
+ CHECK_LE(samples_per_10ms_ * num_channels_,
+ EncodedAudioFrame::kMaxNumberOfSamples);
+ }
- cast_environment_->PostTask(CastEnvironment::MAIN, FROM_HERE,
- base::Bind(*frame_encoded_callback_, base::Passed(&audio_frame),
- recorded_time_));
- return 0;
+ virtual ~ImplBase() {}
+
+ void EncodeAudio(const AudioBus* audio_bus,
+ const base::TimeTicks& recorded_time,
+ const base::Closure& done_callback) {
+ int src_pos = 0;
+ while (src_pos < audio_bus->frames()) {
+ const int num_samples_to_xfer =
+ std::min(samples_per_10ms_ - buffer_fill_end_,
+ audio_bus->frames() - src_pos);
+ DCHECK_EQ(audio_bus->channels(), num_channels_);
+ TransferSamplesIntoBuffer(
+ audio_bus, src_pos, buffer_fill_end_, num_samples_to_xfer);
+ src_pos += num_samples_to_xfer;
+ buffer_fill_end_ += num_samples_to_xfer;
+
+ if (src_pos == audio_bus->frames()) {
+ cast_environment_->PostTask(CastEnvironment::MAIN, FROM_HERE,
+ done_callback);
+ // Note: |audio_bus| is now invalid..
+ }
+
+ if (buffer_fill_end_ == samples_per_10ms_) {
+ scoped_ptr<EncodedAudioFrame> audio_frame(new EncodedAudioFrame());
+ audio_frame->codec = codec_;
+ audio_frame->frame_id = frame_id_++;
+ audio_frame->samples = samples_per_10ms_;
+ if (EncodeFromFilledBuffer(&audio_frame->data)) {
+ // Compute an offset to determine the recorded time for the first
+ // audio sample in the buffer.
+ const base::TimeDelta buffer_time_offset =
+ (buffer_fill_end_ - src_pos) *
+ base::TimeDelta::FromMilliseconds(10) / samples_per_10ms_;
+ // TODO(miu): Consider batching EncodedAudioFrames so we only post a
+ // at most one task for each call to this method.
+ cast_environment_->PostTask(
+ CastEnvironment::MAIN, FROM_HERE,
+ base::Bind(callback_, base::Passed(&audio_frame),
+ recorded_time - buffer_time_offset));
+ }
+ buffer_fill_end_ = 0;
+ }
+ }
}
- void SetEncodedCallbackInfo(
- const base::TimeTicks& recorded_time,
- const AudioEncoder::FrameEncodedCallback* frame_encoded_callback) {
- recorded_time_ = recorded_time;
- frame_encoded_callback_ = frame_encoded_callback;
+ protected:
+ virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
+ int source_offset,
+ int buffer_fill_offset,
+ int num_samples) = 0;
+ virtual bool EncodeFromFilledBuffer(std::string* out) = 0;
+
+ CastEnvironment* const cast_environment_;
+ const AudioCodec codec_;
+ const int num_channels_;
+ const int samples_per_10ms_;
+ const FrameEncodedCallback callback_;
+
+ private:
+ // In the case where a call to EncodeAudio() cannot completely fill the
+ // buffer, this points to the position at which to populate data in a later
+ // call.
+ int buffer_fill_end_;
+
+ // A counter used to label EncodedAudioFrames.
+ uint32 frame_id_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ImplBase);
+};
+
+class AudioEncoder::OpusImpl : public AudioEncoder::ImplBase {
+ public:
+ OpusImpl(CastEnvironment* cast_environment,
+ int num_channels, int sampling_rate, int bitrate,
+ const FrameEncodedCallback& callback)
+ : ImplBase(cast_environment, kOpus, num_channels, sampling_rate,
+ callback),
+ encoder_memory_(new uint8[opus_encoder_get_size(num_channels)]),
+ opus_encoder_(reinterpret_cast<OpusEncoder*>(encoder_memory_.get())),
+ buffer_(new float[num_channels * samples_per_10ms_]) {
+ CHECK_EQ(opus_encoder_init(opus_encoder_, sampling_rate, num_channels,
+ OPUS_APPLICATION_AUDIO),
+ OPUS_OK);
+ if (bitrate <= 0) {
+ // Note: As of 2013-10-31, the encoder in "auto bitrate" mode would use a
+ // variable bitrate up to 102kbps for 2-channel, 48 kHz audio and a 10 ms
+ // frame size. The opus library authors may, of course, adjust this in
+ // later versions.
+ bitrate = OPUS_AUTO;
+ }
+ CHECK_EQ(opus_encoder_ctl(opus_encoder_, OPUS_SET_BITRATE(bitrate)),
+ OPUS_OK);
}
+ virtual ~OpusImpl() {}
+
private:
- const AudioCodec codec_;
- const int frequency_;
- scoped_refptr<CastEnvironment> cast_environment_;
- uint32 last_timestamp_;
- base::TimeTicks recorded_time_;
- const AudioEncoder::FrameEncodedCallback* frame_encoded_callback_;
+ virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
+ int source_offset,
+ int buffer_fill_offset,
+ int num_samples) OVERRIDE {
+ // Opus requires channel-interleaved samples in a single array.
+ for (int ch = 0; ch < audio_bus->channels(); ++ch) {
+ const float* src = audio_bus->channel(ch) + source_offset;
+ const float* const src_end = src + num_samples;
+ float* dest = buffer_.get() + buffer_fill_offset * num_channels_ + ch;
+ for (; src < src_end; ++src, dest += num_channels_)
+ *dest = *src;
+ }
+ }
+
+ virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
+ out->resize(kOpusMaxPayloadSize);
+ const opus_int32 result = opus_encode_float(
+ opus_encoder_, buffer_.get(), samples_per_10ms_,
+ reinterpret_cast<uint8*>(&out->at(0)), kOpusMaxPayloadSize);
+ if (result > 1) {
+ out->resize(result);
+ return true;
+ } else if (result < 0) {
+ LOG(ERROR) << "Error code from opus_encode_float(): " << result;
+ return false;
+ } else {
+ // Do nothing: The documentation says that a return value of zero or
+ // one byte means the packet does not need to be transmitted.
+ return false;
+ }
+ }
+
+ const scoped_ptr<uint8[]> encoder_memory_;
+ OpusEncoder* const opus_encoder_;
+ const scoped_ptr<float[]> buffer_;
+
+ // This is the recommended value, according to documentation in
+ // third_party/opus/src/include/opus.h, so that the Opus encoder does not
+ // degrade the audio due to memory constraints.
+ //
+ // Note: Whereas other RTP implementations do not, the cast library is
+ // perfectly capable of transporting larger than MTU-sized audio frames.
+ static const int kOpusMaxPayloadSize = 4000;
+
+ DISALLOW_COPY_AND_ASSIGN(OpusImpl);
};
-AudioEncoder::AudioEncoder(scoped_refptr<CastEnvironment> cast_environment,
- const AudioSenderConfig& audio_config)
- : cast_environment_(cast_environment),
- audio_encoder_(webrtc::AudioCodingModule::Create(0)),
- webrtc_encoder_callback_(
- new WebrtEncodedDataCallback(cast_environment, audio_config.codec,
- audio_config.frequency)),
- timestamp_(0) { // Must start at 0; used above.
- if (audio_encoder_->InitializeSender() != 0) {
- DCHECK(false) << "Invalid webrtc return value";
+class AudioEncoder::Pcm16Impl : public AudioEncoder::ImplBase {
+ public:
+ Pcm16Impl(CastEnvironment* cast_environment,
+ int num_channels, int sampling_rate,
+ const FrameEncodedCallback& callback)
+ : ImplBase(cast_environment, kPcm16, num_channels, sampling_rate,
+ callback),
+ buffer_(new int16[num_channels * samples_per_10ms_]) {}
+
+ virtual ~Pcm16Impl() {}
+
+ private:
+ virtual void TransferSamplesIntoBuffer(const AudioBus* audio_bus,
+ int source_offset,
+ int buffer_fill_offset,
+ int num_samples) OVERRIDE {
+ audio_bus->ToInterleavedPartial(
+ source_offset, num_samples, sizeof(int16),
+ buffer_.get() + buffer_fill_offset * num_channels_);
}
- if (audio_encoder_->RegisterTransportCallback(
- webrtc_encoder_callback_.get()) != 0) {
- DCHECK(false) << "Invalid webrtc return value";
+
+ virtual bool EncodeFromFilledBuffer(std::string* out) OVERRIDE {
+ // Output 16-bit PCM integers in big-endian byte order.
+ out->resize(num_channels_ * samples_per_10ms_ * sizeof(int16));
+ const int16* src = buffer_.get();
+ const int16* const src_end = src + num_channels_ * samples_per_10ms_;
+ uint16* dest = reinterpret_cast<uint16*>(&out->at(0));
+ for (; src < src_end; ++src, ++dest)
+ *dest = base::HostToNet16(*src);
+ return true;
}
- webrtc::CodecInst send_codec;
- send_codec.pltype = audio_config.rtp_payload_type;
- send_codec.plfreq = audio_config.frequency;
- send_codec.channels = audio_config.channels;
+
+ private:
+ const scoped_ptr<int16[]> buffer_;
+
+ DISALLOW_COPY_AND_ASSIGN(Pcm16Impl);
+};
+
+AudioEncoder::AudioEncoder(
+ const scoped_refptr<CastEnvironment>& cast_environment,
+ const AudioSenderConfig& audio_config,
+ const FrameEncodedCallback& frame_encoded_callback)
+ : cast_environment_(cast_environment) {
+ // Note: It doesn't matter which thread constructs AudioEncoder, just so long
+ // as all calls to InsertAudio() are by the same thread.
+ insert_thread_checker_.DetachFromThread();
switch (audio_config.codec) {
case kOpus:
- strncpy(send_codec.plname, "opus", sizeof(send_codec.plname));
- send_codec.pacsize = audio_config.frequency / 50; // 20 ms
- send_codec.rate = audio_config.bitrate; // 64000
+ impl_.reset(new OpusImpl(
+ cast_environment, audio_config.channels, audio_config.frequency,
+ audio_config.bitrate, frame_encoded_callback));
break;
case kPcm16:
- strncpy(send_codec.plname, "L16", sizeof(send_codec.plname));
- send_codec.pacsize = audio_config.frequency / 100; // 10 ms
- // TODO(pwestin) bug in webrtc; it should take audio_config.channels into
- // account.
- send_codec.rate = 8 * 2 * audio_config.frequency;
+ impl_.reset(new Pcm16Impl(
+ cast_environment, audio_config.channels, audio_config.frequency,
+ frame_encoded_callback));
break;
default:
- DCHECK(false) << "Codec must be specified for audio encoder";
- return;
- }
- if (audio_encoder_->RegisterSendCodec(send_codec) != 0) {
- DCHECK(false) << "Invalid webrtc return value; failed to register codec";
+ NOTREACHED() << "Unsupported or unspecified codec for audio encoder";
+ break;
}
}
AudioEncoder::~AudioEncoder() {}
-// Called from main cast thread.
-void AudioEncoder::InsertRawAudioFrame(
- const PcmAudioFrame* audio_frame,
+void AudioEncoder::InsertAudio(
+ const AudioBus* audio_bus,
const base::TimeTicks& recorded_time,
- const FrameEncodedCallback& frame_encoded_callback,
- const base::Closure release_callback) {
+ const base::Closure& done_callback) {
+ DCHECK(insert_thread_checker_.CalledOnValidThread());
+ if (!impl_) {
+ NOTREACHED();
+ cast_environment_->PostTask(CastEnvironment::MAIN, FROM_HERE,
+ done_callback);
+ return;
+ }
cast_environment_->PostTask(CastEnvironment::AUDIO_ENCODER, FROM_HERE,
- base::Bind(&AudioEncoder::EncodeAudioFrameThread, this, audio_frame,
- recorded_time, frame_encoded_callback, release_callback));
+ base::Bind(&AudioEncoder::EncodeAudio, this, audio_bus, recorded_time,
+ done_callback));
}
-// Called from cast audio encoder thread.
-void AudioEncoder::EncodeAudioFrameThread(
- const PcmAudioFrame* audio_frame,
+void AudioEncoder::EncodeAudio(
+ const AudioBus* audio_bus,
const base::TimeTicks& recorded_time,
- const FrameEncodedCallback& frame_encoded_callback,
- const base::Closure release_callback) {
+ const base::Closure& done_callback) {
DCHECK(cast_environment_->CurrentlyOn(CastEnvironment::AUDIO_ENCODER));
- size_t samples_per_10ms = audio_frame->frequency / 100;
- size_t number_of_10ms_blocks = audio_frame->samples.size() /
- (samples_per_10ms * audio_frame->channels);
- DCHECK(webrtc::AudioFrame::kMaxDataSizeSamples > samples_per_10ms)
- << "webrtc sanity check failed";
-
- for (size_t i = 0; i < number_of_10ms_blocks; ++i) {
- webrtc::AudioFrame webrtc_audio_frame;
- webrtc_audio_frame.timestamp_ = timestamp_;
-
- // Due to the webrtc::AudioFrame declaration we need to copy our data into
- // the webrtc structure.
- memcpy(&webrtc_audio_frame.data_[0],
- &audio_frame->samples[i * samples_per_10ms * audio_frame->channels],
- samples_per_10ms * audio_frame->channels * sizeof(int16));
-
- // The webrtc API is int and we have a size_t; the cast should never be an
- // issue since the normal values are in the 480 range.
- DCHECK_GE(1000u, samples_per_10ms);
- webrtc_audio_frame.samples_per_channel_ =
- static_cast<int>(samples_per_10ms);
- webrtc_audio_frame.sample_rate_hz_ = audio_frame->frequency;
- webrtc_audio_frame.num_channels_ = audio_frame->channels;
-
- // webrtc::AudioCodingModule is thread safe.
- if (audio_encoder_->Add10MsData(webrtc_audio_frame) != 0) {
- DCHECK(false) << "Invalid webrtc return value";
- }
- timestamp_ += static_cast<uint32>(samples_per_10ms);
- }
- // We are done with the audio frame release it.
+ impl_->EncodeAudio(audio_bus, recorded_time, done_callback);
cast_environment_->PostTask(CastEnvironment::MAIN, FROM_HERE,
- release_callback);
-
- // Note: Not all insert of 10 ms will generate a callback with encoded data.
- webrtc_encoder_callback_->SetEncodedCallbackInfo(recorded_time,
- &frame_encoded_callback);
- for (size_t i = 0; i < number_of_10ms_blocks; ++i) {
- audio_encoder_->Process();
- }
+ base::Bind(LogAudioEncodedEvent, cast_environment_, recorded_time));
}
} // namespace cast