12 files changed, 1031 insertions, 336 deletions
diff --git a/webrtc/audio/BUILD.gn b/webrtc/audio/BUILD.gn
index d5061db9dc..5a9902eac1 100644
--- a/webrtc/audio/BUILD.gn
+++ b/webrtc/audio/BUILD.gn
@@ -14,6 +14,9 @@ source_set("audio") {
     "audio_receive_stream.h",
     "audio_send_stream.cc",
     "audio_send_stream.h",
+    "audio_sink.h",
+    "audio_state.cc",
+    "audio_state.h",
     "conversion.h",
     "scoped_voe_interface.h",
   ]
@@ -29,7 +32,7 @@ source_set("audio") {
 
   deps = [
     "..:webrtc_common",
-    "../voice_engine",
     "../system_wrappers",
+    "../voice_engine",
   ]
 }
diff --git a/webrtc/audio/audio_receive_stream.cc b/webrtc/audio/audio_receive_stream.cc
index 34197c3ff7..64d008326d 100644
--- a/webrtc/audio/audio_receive_stream.cc
+++ b/webrtc/audio/audio_receive_stream.cc
@@ -11,20 +11,41 @@
 #include "webrtc/audio/audio_receive_stream.h"
 
 #include <string>
+#include <utility>
 
+#include "webrtc/audio/audio_sink.h"
+#include "webrtc/audio/audio_state.h"
 #include "webrtc/audio/conversion.h"
 #include "webrtc/base/checks.h"
 #include "webrtc/base/logging.h"
+#include "webrtc/call/congestion_controller.h"
 #include "webrtc/modules/remote_bitrate_estimator/include/remote_bitrate_estimator.h"
 #include "webrtc/system_wrappers/include/tick_util.h"
+#include "webrtc/voice_engine/channel_proxy.h"
 #include "webrtc/voice_engine/include/voe_base.h"
 #include "webrtc/voice_engine/include/voe_codec.h"
 #include "webrtc/voice_engine/include/voe_neteq_stats.h"
 #include "webrtc/voice_engine/include/voe_rtp_rtcp.h"
 #include "webrtc/voice_engine/include/voe_video_sync.h"
 #include "webrtc/voice_engine/include/voe_volume_control.h"
+#include "webrtc/voice_engine/voice_engine_impl.h"
 
 namespace webrtc {
+namespace {
+
+bool UseSendSideBwe(const webrtc::AudioReceiveStream::Config& config) {
+  if (!config.rtp.transport_cc) {
+    return false;
+  }
+  for (const auto& extension : config.rtp.extensions) {
+    if (extension.name == RtpExtension::kTransportSequenceNumber) {
+      return true;
+    }
+  }
+  return false;
+}
+}  // namespace
+
 std::string AudioReceiveStream::Config::Rtp::ToString() const {
   std::stringstream ss;
   ss << "{remote_ssrc: " << remote_ssrc;
@@ -60,120 +81,62 @@ std::string AudioReceiveStream::Config::ToString() const {
 
 namespace internal {
 AudioReceiveStream::AudioReceiveStream(
-      RemoteBitrateEstimator* remote_bitrate_estimator,
-      const webrtc::AudioReceiveStream::Config& config,
-      VoiceEngine* voice_engine)
-    : remote_bitrate_estimator_(remote_bitrate_estimator),
-      config_(config),
-      voice_engine_(voice_engine),
-      voe_base_(voice_engine),
+    CongestionController* congestion_controller,
+    const webrtc::AudioReceiveStream::Config& config,
+    const rtc::scoped_refptr<webrtc::AudioState>& audio_state)
+    : config_(config),
+      audio_state_(audio_state),
       rtp_header_parser_(RtpHeaderParser::Create()) {
   LOG(LS_INFO) << "AudioReceiveStream: " << config_.ToString();
-  RTC_DCHECK(config.voe_channel_id != -1);
-  RTC_DCHECK(remote_bitrate_estimator_ != nullptr);
-  RTC_DCHECK(voice_engine_ != nullptr);
-  RTC_DCHECK(rtp_header_parser_ != nullptr);
-  for (const auto& ext : config.rtp.extensions) {
-    // One-byte-extension local identifiers are in the range 1-14 inclusive.
-    RTC_DCHECK_GE(ext.id, 1);
-    RTC_DCHECK_LE(ext.id, 14);
-    if (ext.name == RtpExtension::kAudioLevel) {
-      RTC_CHECK(rtp_header_parser_->RegisterRtpHeaderExtension(
-          kRtpExtensionAudioLevel, ext.id));
-    } else if (ext.name == RtpExtension::kAbsSendTime) {
-      RTC_CHECK(rtp_header_parser_->RegisterRtpHeaderExtension(
-          kRtpExtensionAbsoluteSendTime, ext.id));
-    } else if (ext.name == RtpExtension::kTransportSequenceNumber) {
-      RTC_CHECK(rtp_header_parser_->RegisterRtpHeaderExtension(
-          kRtpExtensionTransportSequenceNumber, ext.id));
+  RTC_DCHECK_NE(config_.voe_channel_id, -1);
+  RTC_DCHECK(audio_state_.get());
+  RTC_DCHECK(congestion_controller);
+  RTC_DCHECK(rtp_header_parser_);
+
+  VoiceEngineImpl* voe_impl = static_cast<VoiceEngineImpl*>(voice_engine());
+  channel_proxy_ = voe_impl->GetChannelProxy(config_.voe_channel_id);
+  channel_proxy_->SetLocalSSRC(config.rtp.local_ssrc);
+  for (const auto& extension : config.rtp.extensions) {
+    if (extension.name == RtpExtension::kAudioLevel) {
+      channel_proxy_->SetReceiveAudioLevelIndicationStatus(true, extension.id);
+      bool registered = rtp_header_parser_->RegisterRtpHeaderExtension(
+          kRtpExtensionAudioLevel, extension.id);
+      RTC_DCHECK(registered);
+    } else if (extension.name == RtpExtension::kAbsSendTime) {
+      channel_proxy_->SetReceiveAbsoluteSenderTimeStatus(true, extension.id);
+      bool registered = rtp_header_parser_->RegisterRtpHeaderExtension(
+          kRtpExtensionAbsoluteSendTime, extension.id);
+      RTC_DCHECK(registered);
+    } else if (extension.name == RtpExtension::kTransportSequenceNumber) {
+      bool registered = rtp_header_parser_->RegisterRtpHeaderExtension(
+          kRtpExtensionTransportSequenceNumber, extension.id);
+      RTC_DCHECK(registered);
     } else {
       RTC_NOTREACHED() << "Unsupported RTP extension.";
     }
   }
+  // Configure bandwidth estimation.
+  channel_proxy_->SetCongestionControlObjects(
+      nullptr, nullptr, congestion_controller->packet_router());
+  if (config.combined_audio_video_bwe) {
+    if (UseSendSideBwe(config)) {
+      remote_bitrate_estimator_ =
+          congestion_controller->GetRemoteBitrateEstimator(true);
+    } else {
+      remote_bitrate_estimator_ =
+          congestion_controller->GetRemoteBitrateEstimator(false);
+    }
+    RTC_DCHECK(remote_bitrate_estimator_);
+  }
 }
 
 AudioReceiveStream::~AudioReceiveStream() {
   RTC_DCHECK(thread_checker_.CalledOnValidThread());
   LOG(LS_INFO) << "~AudioReceiveStream: " << config_.ToString();
-}
-
-webrtc::AudioReceiveStream::Stats AudioReceiveStream::GetStats() const {
-  RTC_DCHECK(thread_checker_.CalledOnValidThread());
-  webrtc::AudioReceiveStream::Stats stats;
-  stats.remote_ssrc = config_.rtp.remote_ssrc;
-  ScopedVoEInterface<VoECodec> codec(voice_engine_);
-  ScopedVoEInterface<VoENetEqStats> neteq(voice_engine_);
-  ScopedVoEInterface<VoERTP_RTCP> rtp(voice_engine_);
-  ScopedVoEInterface<VoEVideoSync> sync(voice_engine_);
-  ScopedVoEInterface<VoEVolumeControl> volume(voice_engine_);
-  unsigned int ssrc = 0;
-  webrtc::CallStatistics call_stats = {0};
-  webrtc::CodecInst codec_inst = {0};
-  // Only collect stats if we have seen some traffic with the SSRC.
-  if (rtp->GetRemoteSSRC(config_.voe_channel_id, ssrc) == -1 ||
-      rtp->GetRTCPStatistics(config_.voe_channel_id, call_stats) == -1 ||
-      codec->GetRecCodec(config_.voe_channel_id, codec_inst) == -1) {
-    return stats;
+  channel_proxy_->SetCongestionControlObjects(nullptr, nullptr, nullptr);
+  if (remote_bitrate_estimator_) {
+    remote_bitrate_estimator_->RemoveStream(config_.rtp.remote_ssrc);
   }
-
-  stats.bytes_rcvd = call_stats.bytesReceived;
-  stats.packets_rcvd = call_stats.packetsReceived;
-  stats.packets_lost = call_stats.cumulativeLost;
-  stats.fraction_lost = Q8ToFloat(call_stats.fractionLost);
-  if (codec_inst.pltype != -1) {
-    stats.codec_name = codec_inst.plname;
-  }
-  stats.ext_seqnum = call_stats.extendedMax;
-  if (codec_inst.plfreq / 1000 > 0) {
-    stats.jitter_ms = call_stats.jitterSamples / (codec_inst.plfreq / 1000);
-  }
-  {
-    int jitter_buffer_delay_ms = 0;
-    int playout_buffer_delay_ms = 0;
-    sync->GetDelayEstimate(config_.voe_channel_id, &jitter_buffer_delay_ms,
-                           &playout_buffer_delay_ms);
-    stats.delay_estimate_ms =
-        jitter_buffer_delay_ms + playout_buffer_delay_ms;
-  }
-  {
-    unsigned int level = 0;
-    if (volume->GetSpeechOutputLevelFullRange(config_.voe_channel_id, level)
-        != -1) {
-      stats.audio_level = static_cast<int32_t>(level);
-    }
-  }
-
-  webrtc::NetworkStatistics ns = {0};
-  if (neteq->GetNetworkStatistics(config_.voe_channel_id, ns) != -1) {
-    // Get jitter buffer and total delay (alg + jitter + playout) stats.
-    stats.jitter_buffer_ms = ns.currentBufferSize;
-    stats.jitter_buffer_preferred_ms = ns.preferredBufferSize;
-    stats.expand_rate = Q14ToFloat(ns.currentExpandRate);
-    stats.speech_expand_rate = Q14ToFloat(ns.currentSpeechExpandRate);
-    stats.secondary_decoded_rate = Q14ToFloat(ns.currentSecondaryDecodedRate);
-    stats.accelerate_rate = Q14ToFloat(ns.currentAccelerateRate);
-    stats.preemptive_expand_rate = Q14ToFloat(ns.currentPreemptiveRate);
-  }
-
-  webrtc::AudioDecodingCallStats ds;
-  if (neteq->GetDecodingCallStatistics(config_.voe_channel_id, &ds) != -1) {
-    stats.decoding_calls_to_silence_generator =
-        ds.calls_to_silence_generator;
-    stats.decoding_calls_to_neteq = ds.calls_to_neteq;
-    stats.decoding_normal = ds.decoded_normal;
-    stats.decoding_plc = ds.decoded_plc;
-    stats.decoding_cng = ds.decoded_cng;
-    stats.decoding_plc_cng = ds.decoded_plc_cng;
-  }
-
-  stats.capture_start_ntp_time_ms = call_stats.capture_start_ntp_time_ms_;
-
-  return stats;
-}
-
-const webrtc::AudioReceiveStream::Config& AudioReceiveStream::config() const {
-  RTC_DCHECK(thread_checker_.CalledOnValidThread());
-  return config_;
 }
 
 void AudioReceiveStream::Start() {
@@ -204,15 +167,16 @@ bool AudioReceiveStream::DeliverRtp(const uint8_t* packet,
   // thread. Then this check can be enabled.
   // RTC_DCHECK(!thread_checker_.CalledOnValidThread());
   RTPHeader header;
-
   if (!rtp_header_parser_->Parse(packet, length, &header)) {
     return false;
   }
 
-  // Only forward if the parsed header has absolute sender time. RTP timestamps
-  // may have different rates for audio and video and shouldn't be mixed.
-  if (config_.combined_audio_video_bwe &&
-      header.extension.hasAbsoluteSendTime) {
+  // Only forward if the parsed header has one of the headers necessary for
+  // bandwidth estimation. RTP timestamps has different rates for audio and
+  // video and shouldn't be mixed.
+  if (remote_bitrate_estimator_ &&
+      (header.extension.hasAbsoluteSendTime ||
+       header.extension.hasTransportSequenceNumber)) {
     int64_t arrival_time_ms = TickTime::MillisecondTimestamp();
     if (packet_time.timestamp >= 0)
       arrival_time_ms = (packet_time.timestamp + 500) / 1000;
@@ -222,5 +186,71 @@ bool AudioReceiveStream::DeliverRtp(const uint8_t* packet,
   }
   return true;
 }
+
+webrtc::AudioReceiveStream::Stats AudioReceiveStream::GetStats() const {
+  RTC_DCHECK(thread_checker_.CalledOnValidThread());
+  webrtc::AudioReceiveStream::Stats stats;
+  stats.remote_ssrc = config_.rtp.remote_ssrc;
+  ScopedVoEInterface<VoECodec> codec(voice_engine());
+
+  webrtc::CallStatistics call_stats = channel_proxy_->GetRTCPStatistics();
+  webrtc::CodecInst codec_inst = {0};
+  if (codec->GetRecCodec(config_.voe_channel_id, codec_inst) == -1) {
+    return stats;
+  }
+
+  stats.bytes_rcvd = call_stats.bytesReceived;
+  stats.packets_rcvd = call_stats.packetsReceived;
+  stats.packets_lost = call_stats.cumulativeLost;
+  stats.fraction_lost = Q8ToFloat(call_stats.fractionLost);
+  stats.capture_start_ntp_time_ms = call_stats.capture_start_ntp_time_ms_;
+  if (codec_inst.pltype != -1) {
+    stats.codec_name = codec_inst.plname;
+  }
+  stats.ext_seqnum = call_stats.extendedMax;
+  if (codec_inst.plfreq / 1000 > 0) {
+    stats.jitter_ms = call_stats.jitterSamples / (codec_inst.plfreq / 1000);
+  }
+  stats.delay_estimate_ms = channel_proxy_->GetDelayEstimate();
+  stats.audio_level = channel_proxy_->GetSpeechOutputLevelFullRange();
+
+  // Get jitter buffer and total delay (alg + jitter + playout) stats.
+  auto ns = channel_proxy_->GetNetworkStatistics();
+  stats.jitter_buffer_ms = ns.currentBufferSize;
+  stats.jitter_buffer_preferred_ms = ns.preferredBufferSize;
+  stats.expand_rate = Q14ToFloat(ns.currentExpandRate);
+  stats.speech_expand_rate = Q14ToFloat(ns.currentSpeechExpandRate);
+  stats.secondary_decoded_rate = Q14ToFloat(ns.currentSecondaryDecodedRate);
+  stats.accelerate_rate = Q14ToFloat(ns.currentAccelerateRate);
+  stats.preemptive_expand_rate = Q14ToFloat(ns.currentPreemptiveRate);
+
+  auto ds = channel_proxy_->GetDecodingCallStatistics();
+  stats.decoding_calls_to_silence_generator = ds.calls_to_silence_generator;
+  stats.decoding_calls_to_neteq = ds.calls_to_neteq;
+  stats.decoding_normal = ds.decoded_normal;
+  stats.decoding_plc = ds.decoded_plc;
+  stats.decoding_cng = ds.decoded_cng;
+  stats.decoding_plc_cng = ds.decoded_plc_cng;
+
+  return stats;
+}
+
+void AudioReceiveStream::SetSink(rtc::scoped_ptr<AudioSinkInterface> sink) {
+  RTC_DCHECK(thread_checker_.CalledOnValidThread());
+  channel_proxy_->SetSink(std::move(sink));
+}
+
+const webrtc::AudioReceiveStream::Config& AudioReceiveStream::config() const {
+  RTC_DCHECK(thread_checker_.CalledOnValidThread());
+  return config_;
+}
+
+VoiceEngine* AudioReceiveStream::voice_engine() const {
+  internal::AudioState* audio_state =
+      static_cast<internal::AudioState*>(audio_state_.get());
+  VoiceEngine* voice_engine = audio_state->voice_engine();
+  RTC_DCHECK(voice_engine);
+  return voice_engine;
+}
 }  // namespace internal
 }  // namespace webrtc
diff --git a/webrtc/audio/audio_receive_stream.h b/webrtc/audio/audio_receive_stream.h
index 5d02b0e2ae..4940c6a64c 100644
--- a/webrtc/audio/audio_receive_stream.h
+++ b/webrtc/audio/audio_receive_stream.h
@@ -12,23 +12,25 @@
 #define WEBRTC_AUDIO_AUDIO_RECEIVE_STREAM_H_
 
 #include "webrtc/audio_receive_stream.h"
-#include "webrtc/audio/scoped_voe_interface.h"
+#include "webrtc/audio_state.h"
 #include "webrtc/base/thread_checker.h"
-#include "webrtc/modules/rtp_rtcp/interface/rtp_header_parser.h"
-#include "webrtc/voice_engine/include/voe_base.h"
+#include "webrtc/modules/rtp_rtcp/include/rtp_header_parser.h"
 
 namespace webrtc {
-
+class CongestionController;
 class RemoteBitrateEstimator;
-class VoiceEngine;
+
+namespace voe {
+class ChannelProxy;
+}  // namespace voe
 
 namespace internal {
 
 class AudioReceiveStream final : public webrtc::AudioReceiveStream {
  public:
-  AudioReceiveStream(RemoteBitrateEstimator* remote_bitrate_estimator,
+  AudioReceiveStream(CongestionController* congestion_controller,
                      const webrtc::AudioReceiveStream::Config& config,
-                     VoiceEngine* voice_engine);
+                     const rtc::scoped_refptr<webrtc::AudioState>& audio_state);
   ~AudioReceiveStream() override;
 
   // webrtc::ReceiveStream implementation.
@@ -43,16 +45,19 @@ class AudioReceiveStream final : public webrtc::AudioReceiveStream {
   // webrtc::AudioReceiveStream implementation.
   webrtc::AudioReceiveStream::Stats GetStats() const override;
 
+  void SetSink(rtc::scoped_ptr<AudioSinkInterface> sink) override;
+
   const webrtc::AudioReceiveStream::Config& config() const;
 
  private:
+  VoiceEngine* voice_engine() const;
+
   rtc::ThreadChecker thread_checker_;
-  RemoteBitrateEstimator* const remote_bitrate_estimator_;
+  RemoteBitrateEstimator* remote_bitrate_estimator_ = nullptr;
   const webrtc::AudioReceiveStream::Config config_;
-  VoiceEngine* voice_engine_;
-  // We hold one interface pointer to the VoE to make sure it is kept alive.
-  ScopedVoEInterface<VoEBase> voe_base_;
+  rtc::scoped_refptr<webrtc::AudioState> audio_state_;
   rtc::scoped_ptr<RtpHeaderParser> rtp_header_parser_;
+  rtc::scoped_ptr<voe::ChannelProxy> channel_proxy_;
 
   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AudioReceiveStream);
 };
diff --git a/webrtc/audio/audio_receive_stream_unittest.cc b/webrtc/audio/audio_receive_stream_unittest.cc
index 4e267f1738..eb008b3045 100644
--- a/webrtc/audio/audio_receive_stream_unittest.cc
+++ b/webrtc/audio/audio_receive_stream_unittest.cc
@@ -8,154 +8,320 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <string>
+
 #include "testing/gtest/include/gtest/gtest.h"
 
 #include "webrtc/audio/audio_receive_stream.h"
 #include "webrtc/audio/conversion.h"
+#include "webrtc/call/mock/mock_congestion_controller.h"
+#include "webrtc/modules/bitrate_controller/include/mock/mock_bitrate_controller.h"
+#include "webrtc/modules/pacing/packet_router.h"
 #include "webrtc/modules/remote_bitrate_estimator/include/mock/mock_remote_bitrate_estimator.h"
 #include "webrtc/modules/rtp_rtcp/source/byte_io.h"
-#include "webrtc/test/fake_voice_engine.h"
+#include "webrtc/modules/utility/include/mock/mock_process_thread.h"
+#include "webrtc/system_wrappers/include/clock.h"
+#include "webrtc/test/mock_voe_channel_proxy.h"
+#include "webrtc/test/mock_voice_engine.h"
+#include "webrtc/video/call_stats.h"
 
+namespace webrtc {
+namespace test {
 namespace {
 
-using webrtc::ByteWriter;
+using testing::_;
+using testing::Return;
 
-const size_t kAbsoluteSendTimeLength = 4;
+AudioDecodingCallStats MakeAudioDecodeStatsForTest() {
+  AudioDecodingCallStats audio_decode_stats;
+  audio_decode_stats.calls_to_silence_generator = 234;
+  audio_decode_stats.calls_to_neteq = 567;
+  audio_decode_stats.decoded_normal = 890;
+  audio_decode_stats.decoded_plc = 123;
+  audio_decode_stats.decoded_cng = 456;
+  audio_decode_stats.decoded_plc_cng = 789;
+  return audio_decode_stats;
+}
 
-void BuildAbsoluteSendTimeExtension(uint8_t* buffer,
-                                    int id,
-                                    uint32_t abs_send_time) {
-  const size_t kRtpOneByteHeaderLength = 4;
-  const uint16_t kRtpOneByteHeaderExtensionId = 0xBEDE;
-  ByteWriter<uint16_t>::WriteBigEndian(buffer, kRtpOneByteHeaderExtensionId);
+const int kChannelId = 2;
+const uint32_t kRemoteSsrc = 1234;
+const uint32_t kLocalSsrc = 5678;
+const size_t kOneByteExtensionHeaderLength = 4;
+const size_t kOneByteExtensionLength = 4;
+const int kAbsSendTimeId = 2;
+const int kAudioLevelId = 3;
+const int kTransportSequenceNumberId = 4;
+const int kJitterBufferDelay = -7;
+const int kPlayoutBufferDelay = 302;
+const unsigned int kSpeechOutputLevel = 99;
+const CallStatistics kCallStats = {
+    345,  678,  901, 234, -12, 3456, 7890, 567, 890, 123};
+const CodecInst kCodecInst = {
+    123, "codec_name_recv", 96000, -187, 0, -103};
+const NetworkStatistics kNetworkStats = {
+    123, 456, false, 0, 0, 789, 12, 345, 678, 901, -1, -1, -1, -1, -1, 0};
+const AudioDecodingCallStats kAudioDecodeStats = MakeAudioDecodeStatsForTest();
+
+struct ConfigHelper {
+  ConfigHelper()
+      : simulated_clock_(123456),
+        call_stats_(&simulated_clock_),
+        congestion_controller_(&process_thread_,
+                               &call_stats_,
+                               &bitrate_observer_) {
+    using testing::Invoke;
+
+    EXPECT_CALL(voice_engine_,
+        RegisterVoiceEngineObserver(_)).WillOnce(Return(0));
+    EXPECT_CALL(voice_engine_,
+        DeRegisterVoiceEngineObserver()).WillOnce(Return(0));
+    AudioState::Config config;
+    config.voice_engine = &voice_engine_;
+    audio_state_ = AudioState::Create(config);
+
+    EXPECT_CALL(voice_engine_, ChannelProxyFactory(kChannelId))
+        .WillOnce(Invoke([this](int channel_id) {
+          EXPECT_FALSE(channel_proxy_);
+          channel_proxy_ = new testing::StrictMock<MockVoEChannelProxy>();
+          EXPECT_CALL(*channel_proxy_, SetLocalSSRC(kLocalSsrc)).Times(1);
+          EXPECT_CALL(*channel_proxy_,
+              SetReceiveAbsoluteSenderTimeStatus(true, kAbsSendTimeId))
+                  .Times(1);
+          EXPECT_CALL(*channel_proxy_,
+              SetReceiveAudioLevelIndicationStatus(true, kAudioLevelId))
+                  .Times(1);
+          EXPECT_CALL(*channel_proxy_, SetCongestionControlObjects(
+                                           nullptr, nullptr, &packet_router_))
+              .Times(1);
+          EXPECT_CALL(congestion_controller_, packet_router())
+              .WillOnce(Return(&packet_router_));
+          EXPECT_CALL(*channel_proxy_,
+                      SetCongestionControlObjects(nullptr, nullptr, nullptr))
+              .Times(1);
+          return channel_proxy_;
+        }));
+    stream_config_.voe_channel_id = kChannelId;
+    stream_config_.rtp.local_ssrc = kLocalSsrc;
+    stream_config_.rtp.remote_ssrc = kRemoteSsrc;
+    stream_config_.rtp.extensions.push_back(
+        RtpExtension(RtpExtension::kAbsSendTime, kAbsSendTimeId));
+    stream_config_.rtp.extensions.push_back(
+        RtpExtension(RtpExtension::kAudioLevel, kAudioLevelId));
+  }
+
+  MockCongestionController* congestion_controller() {
+    return &congestion_controller_;
+  }
+  MockRemoteBitrateEstimator* remote_bitrate_estimator() {
+    return &remote_bitrate_estimator_;
+  }
+  AudioReceiveStream::Config& config() { return stream_config_; }
+  rtc::scoped_refptr<AudioState> audio_state() { return audio_state_; }
+  MockVoiceEngine& voice_engine() { return voice_engine_; }
 
-  const uint32_t kPosLength = 2;
-  ByteWriter<uint16_t>::WriteBigEndian(buffer + kPosLength,
-                                       kAbsoluteSendTimeLength / 4);
+  void SetupMockForBweFeedback(bool send_side_bwe) {
+    EXPECT_CALL(congestion_controller_,
+                GetRemoteBitrateEstimator(send_side_bwe))
+        .WillOnce(Return(&remote_bitrate_estimator_));
+    EXPECT_CALL(remote_bitrate_estimator_,
+                RemoveStream(stream_config_.rtp.remote_ssrc));
+  }
 
-  const uint8_t kLengthOfData = 3;
-  buffer[kRtpOneByteHeaderLength] = (id << 4) + (kLengthOfData - 1);
-  ByteWriter<uint32_t, kLengthOfData>::WriteBigEndian(
-      buffer + kRtpOneByteHeaderLength + 1, abs_send_time);
+  void SetupMockForGetStats() {
+    using testing::DoAll;
+    using testing::SetArgReferee;
+
+    ASSERT_TRUE(channel_proxy_);
+    EXPECT_CALL(*channel_proxy_, GetRTCPStatistics())
+        .WillOnce(Return(kCallStats));
+    EXPECT_CALL(*channel_proxy_, GetDelayEstimate())
+        .WillOnce(Return(kJitterBufferDelay + kPlayoutBufferDelay));
+    EXPECT_CALL(*channel_proxy_, GetSpeechOutputLevelFullRange())
+        .WillOnce(Return(kSpeechOutputLevel));
+    EXPECT_CALL(*channel_proxy_, GetNetworkStatistics())
+        .WillOnce(Return(kNetworkStats));
+    EXPECT_CALL(*channel_proxy_, GetDecodingCallStatistics())
+        .WillOnce(Return(kAudioDecodeStats));
+
+    EXPECT_CALL(voice_engine_, GetRecCodec(kChannelId, _))
+        .WillOnce(DoAll(SetArgReferee<1>(kCodecInst), Return(0)));
+  }
+
+ private:
+  SimulatedClock simulated_clock_;
+  CallStats call_stats_;
+  PacketRouter packet_router_;
+  testing::NiceMock<MockBitrateObserver> bitrate_observer_;
+  testing::NiceMock<MockProcessThread> process_thread_;
+  MockCongestionController congestion_controller_;
+  MockRemoteBitrateEstimator remote_bitrate_estimator_;
+  testing::StrictMock<MockVoiceEngine> voice_engine_;
+  rtc::scoped_refptr<AudioState> audio_state_;
+  AudioReceiveStream::Config stream_config_;
+  testing::StrictMock<MockVoEChannelProxy>* channel_proxy_ = nullptr;
+};
+
+void BuildOneByteExtension(std::vector<uint8_t>::iterator it,
+                           int id,
+                           uint32_t extension_value,
+                           size_t value_length) {
+  const uint16_t kRtpOneByteHeaderExtensionId = 0xBEDE;
+  ByteWriter<uint16_t>::WriteBigEndian(&(*it), kRtpOneByteHeaderExtensionId);
+  it += 2;
+
+  ByteWriter<uint16_t>::WriteBigEndian(&(*it), kOneByteExtensionLength / 4);
+  it += 2;
+  const size_t kExtensionDataLength = kOneByteExtensionLength - 1;
+  uint32_t shifted_value = extension_value
+                           << (8 * (kExtensionDataLength - value_length));
+  *it = (id << 4) + (value_length - 1);
+  ++it;
+  ByteWriter<uint32_t, kExtensionDataLength>::WriteBigEndian(&(*it),
+                                                             shifted_value);
 }
 
-size_t CreateRtpHeaderWithAbsSendTime(uint8_t* header,
-                                      int extension_id,
-                                      uint32_t abs_send_time) {
+std::vector<uint8_t> CreateRtpHeaderWithOneByteExtension(
+    int extension_id,
+    uint32_t extension_value,
+    size_t value_length) {
+  std::vector<uint8_t> header;
+  header.resize(webrtc::kRtpHeaderSize + kOneByteExtensionHeaderLength +
+                kOneByteExtensionLength);
   header[0] = 0x80;   // Version 2.
   header[0] |= 0x10;  // Set extension bit.
   header[1] = 100;    // Payload type.
   header[1] |= 0x80;  // Marker bit is set.
-  ByteWriter<uint16_t>::WriteBigEndian(header + 2, 0x1234);  // Sequence number.
-  ByteWriter<uint32_t>::WriteBigEndian(header + 4, 0x5678);  // Timestamp.
-  ByteWriter<uint32_t>::WriteBigEndian(header + 8, 0x4321);  // SSRC.
-  int32_t rtp_header_length = webrtc::kRtpHeaderSize;
-
-  BuildAbsoluteSendTimeExtension(header + rtp_header_length, extension_id,
-                                 abs_send_time);
-  rtp_header_length += kAbsoluteSendTimeLength;
-  return rtp_header_length;
+  ByteWriter<uint16_t>::WriteBigEndian(&header[2], 0x1234);  // Sequence number.
+  ByteWriter<uint32_t>::WriteBigEndian(&header[4], 0x5678);  // Timestamp.
+  ByteWriter<uint32_t>::WriteBigEndian(&header[8], 0x4321);  // SSRC.
+
+  BuildOneByteExtension(header.begin() + webrtc::kRtpHeaderSize, extension_id,
+                        extension_value, value_length);
+  return header;
 }
 }  // namespace
 
-namespace webrtc {
-namespace test {
-
 TEST(AudioReceiveStreamTest, ConfigToString) {
-  const int kAbsSendTimeId = 3;
   AudioReceiveStream::Config config;
-  config.rtp.remote_ssrc = 1234;
-  config.rtp.local_ssrc = 5678;
+  config.rtp.remote_ssrc = kRemoteSsrc;
+  config.rtp.local_ssrc = kLocalSsrc;
   config.rtp.extensions.push_back(
       RtpExtension(RtpExtension::kAbsSendTime, kAbsSendTimeId));
-  config.voe_channel_id = 1;
+  config.voe_channel_id = kChannelId;
   config.combined_audio_video_bwe = true;
-  EXPECT_EQ("{rtp: {remote_ssrc: 1234, local_ssrc: 5678, extensions: [{name: "
-      "http://www.webrtc.org/experiments/rtp-hdrext/abs-send-time, id: 3}]}, "
+  EXPECT_EQ(
+      "{rtp: {remote_ssrc: 1234, local_ssrc: 5678, extensions: [{name: "
+      "http://www.webrtc.org/experiments/rtp-hdrext/abs-send-time, id: 2}]}, "
       "receive_transport: nullptr, rtcp_send_transport: nullptr, "
-      "voe_channel_id: 1, combined_audio_video_bwe: true}", config.ToString());
+      "voe_channel_id: 2, combined_audio_video_bwe: true}",
+      config.ToString());
 }
 
 TEST(AudioReceiveStreamTest, ConstructDestruct) {
-  MockRemoteBitrateEstimator remote_bitrate_estimator;
-  FakeVoiceEngine voice_engine;
-  AudioReceiveStream::Config config;
-  config.voe_channel_id = 1;
-  internal::AudioReceiveStream recv_stream(&remote_bitrate_estimator, config,
-                                           &voice_engine);
+  ConfigHelper helper;
+  internal::AudioReceiveStream recv_stream(
+      helper.congestion_controller(), helper.config(), helper.audio_state());
+}
+
+MATCHER_P(VerifyHeaderExtension, expected_extension, "") {
+  return arg.extension.hasAbsoluteSendTime ==
+             expected_extension.hasAbsoluteSendTime &&
+         arg.extension.absoluteSendTime ==
+             expected_extension.absoluteSendTime &&
+         arg.extension.hasTransportSequenceNumber ==
+             expected_extension.hasTransportSequenceNumber &&
+         arg.extension.transportSequenceNumber ==
+             expected_extension.transportSequenceNumber;
 }
 
 TEST(AudioReceiveStreamTest, AudioPacketUpdatesBweWithTimestamp) {
-  MockRemoteBitrateEstimator remote_bitrate_estimator;
-  FakeVoiceEngine voice_engine;
-  AudioReceiveStream::Config config;
-  config.combined_audio_video_bwe = true;
-  config.voe_channel_id = FakeVoiceEngine::kRecvChannelId;
-  const int kAbsSendTimeId = 3;
-  config.rtp.extensions.push_back(
-      RtpExtension(RtpExtension::kAbsSendTime, kAbsSendTimeId));
-  internal::AudioReceiveStream recv_stream(&remote_bitrate_estimator, config,
-                                           &voice_engine);
-  uint8_t rtp_packet[30];
+  ConfigHelper helper;
+  helper.config().combined_audio_video_bwe = true;
+  helper.SetupMockForBweFeedback(false);
+  internal::AudioReceiveStream recv_stream(
+      helper.congestion_controller(), helper.config(), helper.audio_state());
   const int kAbsSendTimeValue = 1234;
-  CreateRtpHeaderWithAbsSendTime(rtp_packet, kAbsSendTimeId, kAbsSendTimeValue);
+  std::vector<uint8_t> rtp_packet =
+      CreateRtpHeaderWithOneByteExtension(kAbsSendTimeId, kAbsSendTimeValue, 3);
   PacketTime packet_time(5678000, 0);
   const size_t kExpectedHeaderLength = 20;
-  EXPECT_CALL(remote_bitrate_estimator,
-      IncomingPacket(packet_time.timestamp / 1000,
-          sizeof(rtp_packet) - kExpectedHeaderLength, testing::_, false))
+  RTPHeaderExtension expected_extension;
+  expected_extension.hasAbsoluteSendTime = true;
+  expected_extension.absoluteSendTime = kAbsSendTimeValue;
+  EXPECT_CALL(*helper.remote_bitrate_estimator(),
+              IncomingPacket(packet_time.timestamp / 1000,
+                             rtp_packet.size() - kExpectedHeaderLength,
+                             VerifyHeaderExtension(expected_extension), false))
       .Times(1);
   EXPECT_TRUE(
-      recv_stream.DeliverRtp(rtp_packet, sizeof(rtp_packet), packet_time));
+      recv_stream.DeliverRtp(&rtp_packet[0], rtp_packet.size(), packet_time));
 }
 
-TEST(AudioReceiveStreamTest, GetStats) {
-  MockRemoteBitrateEstimator remote_bitrate_estimator;
-  FakeVoiceEngine voice_engine;
-  AudioReceiveStream::Config config;
-  config.rtp.remote_ssrc = FakeVoiceEngine::kRecvSsrc;
-  config.voe_channel_id = FakeVoiceEngine::kRecvChannelId;
-  internal::AudioReceiveStream recv_stream(&remote_bitrate_estimator, config,
-                                           &voice_engine);
+TEST(AudioReceiveStreamTest, AudioPacketUpdatesBweFeedback) {
+  ConfigHelper helper;
+  helper.config().combined_audio_video_bwe = true;
+  helper.config().rtp.transport_cc = true;
+  helper.config().rtp.extensions.push_back(RtpExtension(
+      RtpExtension::kTransportSequenceNumber, kTransportSequenceNumberId));
+  helper.SetupMockForBweFeedback(true);
+  internal::AudioReceiveStream recv_stream(
+      helper.congestion_controller(), helper.config(), helper.audio_state());
+  const int kTransportSequenceNumberValue = 1234;
+  std::vector<uint8_t> rtp_packet = CreateRtpHeaderWithOneByteExtension(
+      kTransportSequenceNumberId, kTransportSequenceNumberValue, 2);
+  PacketTime packet_time(5678000, 0);
+  const size_t kExpectedHeaderLength = 20;
+  RTPHeaderExtension expected_extension;
+  expected_extension.hasTransportSequenceNumber = true;
+  expected_extension.transportSequenceNumber = kTransportSequenceNumberValue;
+  EXPECT_CALL(*helper.remote_bitrate_estimator(),
+              IncomingPacket(packet_time.timestamp / 1000,
+                             rtp_packet.size() - kExpectedHeaderLength,
+                             VerifyHeaderExtension(expected_extension), false))
+      .Times(1);
+  EXPECT_TRUE(
+      recv_stream.DeliverRtp(&rtp_packet[0], rtp_packet.size(), packet_time));
+}
 
+TEST(AudioReceiveStreamTest, GetStats) {
+  ConfigHelper helper;
+  internal::AudioReceiveStream recv_stream(
+      helper.congestion_controller(), helper.config(), helper.audio_state());
+  helper.SetupMockForGetStats();
   AudioReceiveStream::Stats stats = recv_stream.GetStats();
-  const CallStatistics& call_stats = FakeVoiceEngine::kRecvCallStats;
-  const CodecInst& codec_inst = FakeVoiceEngine::kRecvCodecInst;
-  const NetworkStatistics& net_stats = FakeVoiceEngine::kRecvNetworkStats;
-  const AudioDecodingCallStats& decode_stats =
-      FakeVoiceEngine::kRecvAudioDecodingCallStats;
-  EXPECT_EQ(FakeVoiceEngine::kRecvSsrc, stats.remote_ssrc);
-  EXPECT_EQ(static_cast<int64_t>(call_stats.bytesReceived), stats.bytes_rcvd);
-  EXPECT_EQ(static_cast<uint32_t>(call_stats.packetsReceived),
+  EXPECT_EQ(kRemoteSsrc, stats.remote_ssrc);
+  EXPECT_EQ(static_cast<int64_t>(kCallStats.bytesReceived), stats.bytes_rcvd);
+  EXPECT_EQ(static_cast<uint32_t>(kCallStats.packetsReceived),
             stats.packets_rcvd);
-  EXPECT_EQ(call_stats.cumulativeLost, stats.packets_lost);
-  EXPECT_EQ(Q8ToFloat(call_stats.fractionLost), stats.fraction_lost);
-  EXPECT_EQ(std::string(codec_inst.plname), stats.codec_name);
-  EXPECT_EQ(call_stats.extendedMax, stats.ext_seqnum);
-  EXPECT_EQ(call_stats.jitterSamples / (codec_inst.plfreq / 1000),
+  EXPECT_EQ(kCallStats.cumulativeLost, stats.packets_lost);
+  EXPECT_EQ(Q8ToFloat(kCallStats.fractionLost), stats.fraction_lost);
+  EXPECT_EQ(std::string(kCodecInst.plname), stats.codec_name);
+  EXPECT_EQ(kCallStats.extendedMax, stats.ext_seqnum);
+  EXPECT_EQ(kCallStats.jitterSamples / (kCodecInst.plfreq / 1000),
             stats.jitter_ms);
-  EXPECT_EQ(net_stats.currentBufferSize, stats.jitter_buffer_ms);
-  EXPECT_EQ(net_stats.preferredBufferSize, stats.jitter_buffer_preferred_ms);
-  EXPECT_EQ(static_cast<uint32_t>(FakeVoiceEngine::kRecvJitterBufferDelay +
-      FakeVoiceEngine::kRecvPlayoutBufferDelay), stats.delay_estimate_ms);
-  EXPECT_EQ(static_cast<int32_t>(FakeVoiceEngine::kRecvSpeechOutputLevel),
-            stats.audio_level);
-  EXPECT_EQ(Q14ToFloat(net_stats.currentExpandRate), stats.expand_rate);
-  EXPECT_EQ(Q14ToFloat(net_stats.currentSpeechExpandRate),
+  EXPECT_EQ(kNetworkStats.currentBufferSize, stats.jitter_buffer_ms);
+  EXPECT_EQ(kNetworkStats.preferredBufferSize,
+            stats.jitter_buffer_preferred_ms);
+  EXPECT_EQ(static_cast<uint32_t>(kJitterBufferDelay + kPlayoutBufferDelay),
+            stats.delay_estimate_ms);
+  EXPECT_EQ(static_cast<int32_t>(kSpeechOutputLevel), stats.audio_level);
+  EXPECT_EQ(Q14ToFloat(kNetworkStats.currentExpandRate), stats.expand_rate);
+  EXPECT_EQ(Q14ToFloat(kNetworkStats.currentSpeechExpandRate),
             stats.speech_expand_rate);
-  EXPECT_EQ(Q14ToFloat(net_stats.currentSecondaryDecodedRate),
+  EXPECT_EQ(Q14ToFloat(kNetworkStats.currentSecondaryDecodedRate),
             stats.secondary_decoded_rate);
-  EXPECT_EQ(Q14ToFloat(net_stats.currentAccelerateRate), stats.accelerate_rate);
-  EXPECT_EQ(Q14ToFloat(net_stats.currentPreemptiveRate),
+  EXPECT_EQ(Q14ToFloat(kNetworkStats.currentAccelerateRate),
+            stats.accelerate_rate);
+  EXPECT_EQ(Q14ToFloat(kNetworkStats.currentPreemptiveRate),
             stats.preemptive_expand_rate);
-  EXPECT_EQ(decode_stats.calls_to_silence_generator,
+  EXPECT_EQ(kAudioDecodeStats.calls_to_silence_generator,
             stats.decoding_calls_to_silence_generator);
-  EXPECT_EQ(decode_stats.calls_to_neteq, stats.decoding_calls_to_neteq);
-  EXPECT_EQ(decode_stats.decoded_normal, stats.decoding_normal);
-  EXPECT_EQ(decode_stats.decoded_plc, stats.decoding_plc);
-  EXPECT_EQ(decode_stats.decoded_cng, stats.decoding_cng);
-  EXPECT_EQ(decode_stats.decoded_plc_cng, stats.decoding_plc_cng);
-  EXPECT_EQ(call_stats.capture_start_ntp_time_ms_,
+  EXPECT_EQ(kAudioDecodeStats.calls_to_neteq, stats.decoding_calls_to_neteq);
+  EXPECT_EQ(kAudioDecodeStats.decoded_normal, stats.decoding_normal);
+  EXPECT_EQ(kAudioDecodeStats.decoded_plc, stats.decoding_plc);
+  EXPECT_EQ(kAudioDecodeStats.decoded_cng, stats.decoding_cng);
+  EXPECT_EQ(kAudioDecodeStats.decoded_plc_cng, stats.decoding_plc_cng);
+  EXPECT_EQ(kCallStats.capture_start_ntp_time_ms_,
             stats.capture_start_ntp_time_ms);
 }
 }  // namespace test
diff --git a/webrtc/audio/audio_send_stream.cc b/webrtc/audio/audio_send_stream.cc
index ccfdca546d..35a65521dd 100644
--- a/webrtc/audio/audio_send_stream.cc
+++ b/webrtc/audio/audio_send_stream.cc
@@ -12,13 +12,20 @@
 
 #include <string>
 
+#include "webrtc/audio/audio_state.h"
 #include "webrtc/audio/conversion.h"
+#include "webrtc/audio/scoped_voe_interface.h"
 #include "webrtc/base/checks.h"
 #include "webrtc/base/logging.h"
+#include "webrtc/call/congestion_controller.h"
+#include "webrtc/modules/pacing/paced_sender.h"
+#include "webrtc/modules/rtp_rtcp/include/rtp_rtcp_defines.h"
+#include "webrtc/voice_engine/channel_proxy.h"
 #include "webrtc/voice_engine/include/voe_audio_processing.h"
 #include "webrtc/voice_engine/include/voe_codec.h"
 #include "webrtc/voice_engine/include/voe_rtp_rtcp.h"
 #include "webrtc/voice_engine/include/voe_volume_control.h"
+#include "webrtc/voice_engine/voice_engine_impl.h"
 
 namespace webrtc {
 std::string AudioSendStream::Config::Rtp::ToString() const {
@@ -32,6 +39,7 @@ std::string AudioSendStream::Config::Rtp::ToString() const {
     }
   }
   ss << ']';
+  ss << ", c_name: " << c_name;
   ss << '}';
   return ss.str();
 }
@@ -48,38 +56,91 @@ std::string AudioSendStream::Config::ToString() const {
 }
 
 namespace internal {
-AudioSendStream::AudioSendStream(const webrtc::AudioSendStream::Config& config,
-                                 VoiceEngine* voice_engine)
-    : config_(config),
-      voice_engine_(voice_engine),
-      voe_base_(voice_engine) {
+AudioSendStream::AudioSendStream(
+    const webrtc::AudioSendStream::Config& config,
+    const rtc::scoped_refptr<webrtc::AudioState>& audio_state,
+    CongestionController* congestion_controller)
+    : config_(config), audio_state_(audio_state) {
   LOG(LS_INFO) << "AudioSendStream: " << config_.ToString();
-  RTC_DCHECK_NE(config.voe_channel_id, -1);
-  RTC_DCHECK(voice_engine_);
+  RTC_DCHECK_NE(config_.voe_channel_id, -1);
+  RTC_DCHECK(audio_state_.get());
+  RTC_DCHECK(congestion_controller);
+
+  VoiceEngineImpl* voe_impl = static_cast<VoiceEngineImpl*>(voice_engine());
+  channel_proxy_ = voe_impl->GetChannelProxy(config_.voe_channel_id);
+  channel_proxy_->SetCongestionControlObjects(
+      congestion_controller->pacer(),
+      congestion_controller->GetTransportFeedbackObserver(),
+      congestion_controller->packet_router());
+  channel_proxy_->SetRTCPStatus(true);
+  channel_proxy_->SetLocalSSRC(config.rtp.ssrc);
+  channel_proxy_->SetRTCP_CNAME(config.rtp.c_name);
+
+  for (const auto& extension : config.rtp.extensions) {
+    if (extension.name == RtpExtension::kAbsSendTime) {
+      channel_proxy_->SetSendAbsoluteSenderTimeStatus(true, extension.id);
+    } else if (extension.name == RtpExtension::kAudioLevel) {
+      channel_proxy_->SetSendAudioLevelIndicationStatus(true, extension.id);
+    } else if (extension.name == RtpExtension::kTransportSequenceNumber) {
+      channel_proxy_->EnableSendTransportSequenceNumber(extension.id);
+    } else {
+      RTC_NOTREACHED() << "Registering unsupported RTP extension.";
+    }
+  }
 }
 
 AudioSendStream::~AudioSendStream() {
   RTC_DCHECK(thread_checker_.CalledOnValidThread());
   LOG(LS_INFO) << "~AudioSendStream: " << config_.ToString();
+  channel_proxy_->SetCongestionControlObjects(nullptr, nullptr, nullptr);
+}
+
+void AudioSendStream::Start() {
+  RTC_DCHECK(thread_checker_.CalledOnValidThread());
+}
+
+void AudioSendStream::Stop() {
+  RTC_DCHECK(thread_checker_.CalledOnValidThread());
+}
+
+void AudioSendStream::SignalNetworkState(NetworkState state) {
+  RTC_DCHECK(thread_checker_.CalledOnValidThread());
+}
+
+bool AudioSendStream::DeliverRtcp(const uint8_t* packet, size_t length) {
+  // TODO(solenberg): Tests call this function on a network thread, libjingle
+  // calls on the worker thread. We should move towards always using a network
+  // thread. Then this check can be enabled.
+  // RTC_DCHECK(!thread_checker_.CalledOnValidThread());
+  return false;
+}
+
+bool AudioSendStream::SendTelephoneEvent(int payload_type, uint8_t event,
+                                         uint32_t duration_ms) {
+  RTC_DCHECK(thread_checker_.CalledOnValidThread());
+  return channel_proxy_->SetSendTelephoneEventPayloadType(payload_type) &&
+         channel_proxy_->SendTelephoneEventOutband(event, duration_ms);
 }
 
 webrtc::AudioSendStream::Stats AudioSendStream::GetStats() const {
   RTC_DCHECK(thread_checker_.CalledOnValidThread());
   webrtc::AudioSendStream::Stats stats;
   stats.local_ssrc = config_.rtp.ssrc;
-  ScopedVoEInterface<VoEAudioProcessing> processing(voice_engine_);
-  ScopedVoEInterface<VoECodec> codec(voice_engine_);
-  ScopedVoEInterface<VoERTP_RTCP> rtp(voice_engine_);
-  ScopedVoEInterface<VoEVolumeControl> volume(voice_engine_);
-  unsigned int ssrc = 0;
-  webrtc::CallStatistics call_stats = {0};
-  if (rtp->GetLocalSSRC(config_.voe_channel_id, ssrc) == -1 ||
-      rtp->GetRTCPStatistics(config_.voe_channel_id, call_stats) == -1) {
-    return stats;
-  }
+  ScopedVoEInterface<VoEAudioProcessing> processing(voice_engine());
+  ScopedVoEInterface<VoECodec> codec(voice_engine());
+  ScopedVoEInterface<VoEVolumeControl> volume(voice_engine());
 
+  webrtc::CallStatistics call_stats = channel_proxy_->GetRTCPStatistics();
   stats.bytes_sent = call_stats.bytesSent;
   stats.packets_sent = call_stats.packetsSent;
+  // RTT isn't known until a RTCP report is received. Until then, VoiceEngine
+  // returns 0 to indicate an error value.
+  if (call_stats.rttMs > 0) {
+    stats.rtt_ms = call_stats.rttMs;
+  }
+  // TODO(solenberg): [was ajm]: Re-enable this metric once we have a reliable
+  //                  implementation.
+  stats.aec_quality_min = -1;
 
   webrtc::CodecInst codec_inst = {0};
   if (codec->GetSendCodec(config_.voe_channel_id, codec_inst) != -1) {
@@ -87,54 +148,43 @@ webrtc::AudioSendStream::Stats AudioSendStream::GetStats() const {
     stats.codec_name = codec_inst.plname;
 
     // Get data from the last remote RTCP report.
-    std::vector<webrtc::ReportBlock> blocks;
-    if (rtp->GetRemoteRTCPReportBlocks(config_.voe_channel_id, &blocks) != -1) {
-      for (const webrtc::ReportBlock& block : blocks) {
-        // Lookup report for send ssrc only.
-        if (block.source_SSRC == stats.local_ssrc) {
-          stats.packets_lost = block.cumulative_num_packets_lost;
-          stats.fraction_lost = Q8ToFloat(block.fraction_lost);
-          stats.ext_seqnum = block.extended_highest_sequence_number;
-          // Convert samples to milliseconds.
-          if (codec_inst.plfreq / 1000 > 0) {
-            stats.jitter_ms =
-                block.interarrival_jitter / (codec_inst.plfreq / 1000);
-          }
-          break;
+    for (const auto& block : channel_proxy_->GetRemoteRTCPReportBlocks()) {
+      // Lookup report for send ssrc only.
+      if (block.source_SSRC == stats.local_ssrc) {
+        stats.packets_lost = block.cumulative_num_packets_lost;
+        stats.fraction_lost = Q8ToFloat(block.fraction_lost);
+        stats.ext_seqnum = block.extended_highest_sequence_number;
+        // Convert samples to milliseconds.
+        if (codec_inst.plfreq / 1000 > 0) {
+          stats.jitter_ms =
+              block.interarrival_jitter / (codec_inst.plfreq / 1000);
         }
+        break;
       }
     }
   }
 
-  // RTT isn't known until a RTCP report is received. Until then, VoiceEngine
-  // returns 0 to indicate an error value.
-  if (call_stats.rttMs > 0) {
-    stats.rtt_ms = call_stats.rttMs;
-  }
-
   // Local speech level.
   {
     unsigned int level = 0;
-    if (volume->GetSpeechInputLevelFullRange(level) != -1) {
-      stats.audio_level = static_cast<int32_t>(level);
-    }
+    int error = volume->GetSpeechInputLevelFullRange(level);
+    RTC_DCHECK_EQ(0, error);
+    stats.audio_level = static_cast<int32_t>(level);
   }
 
-  // TODO(ajm): Re-enable this metric once we have a reliable implementation.
-  stats.aec_quality_min = -1;
-
   bool echo_metrics_on = false;
-  if (processing->GetEcMetricsStatus(echo_metrics_on) != -1 &&
-      echo_metrics_on) {
+  int error = processing->GetEcMetricsStatus(echo_metrics_on);
+  RTC_DCHECK_EQ(0, error);
+  if (echo_metrics_on) {
     // These can also be negative, but in practice -1 is only used to signal
     // insufficient data, since the resolution is limited to multiples of 4 ms.
     int median = -1;
     int std = -1;
     float dummy = 0.0f;
-    if (processing->GetEcDelayMetrics(median, std, dummy) != -1) {
-      stats.echo_delay_median_ms = median;
-      stats.echo_delay_std_ms = std;
-    }
+    error = processing->GetEcDelayMetrics(median, std, dummy);
+    RTC_DCHECK_EQ(0, error);
+    stats.echo_delay_median_ms = median;
+    stats.echo_delay_std_ms = std;
 
     // These can take on valid negative values, so use the lowest possible level
     // as default rather than -1.
@@ -142,14 +192,15 @@ webrtc::AudioSendStream::Stats AudioSendStream::GetStats() const {
     int erle = -100;
     int dummy1 = 0;
     int dummy2 = 0;
-    if (processing->GetEchoMetrics(erl, erle, dummy1, dummy2) != -1) {
-      stats.echo_return_loss = erl;
-      stats.echo_return_loss_enhancement = erle;
-    }
+    error = processing->GetEchoMetrics(erl, erle, dummy1, dummy2);
+    RTC_DCHECK_EQ(0, error);
+    stats.echo_return_loss = erl;
+    stats.echo_return_loss_enhancement = erle;
   }
 
-  // TODO(solenberg): Collect typing noise warnings here too!
-  // bool typing_noise_detected = typing_noise_detected_;
+  internal::AudioState* audio_state =
+      static_cast<internal::AudioState*>(audio_state_.get());
+  stats.typing_noise_detected = audio_state->typing_noise_detected();
 
   return stats;
 }
@@ -159,24 +210,12 @@ const webrtc::AudioSendStream::Config& AudioSendStream::config() const {
   return config_;
 }
 
-void AudioSendStream::Start() {
-  RTC_DCHECK(thread_checker_.CalledOnValidThread());
-}
-
-void AudioSendStream::Stop() {
-  RTC_DCHECK(thread_checker_.CalledOnValidThread());
-}
-
-void AudioSendStream::SignalNetworkState(NetworkState state) {
-  RTC_DCHECK(thread_checker_.CalledOnValidThread());
-}
-
-bool AudioSendStream::DeliverRtcp(const uint8_t* packet, size_t length) {
-  // TODO(solenberg): Tests call this function on a network thread, libjingle
-  // calls on the worker thread. We should move towards always using a network
-  // thread. Then this check can be enabled.
-  // RTC_DCHECK(!thread_checker_.CalledOnValidThread());
-  return false;
+VoiceEngine* AudioSendStream::voice_engine() const {
+  internal::AudioState* audio_state =
+      static_cast<internal::AudioState*>(audio_state_.get());
+  VoiceEngine* voice_engine = audio_state->voice_engine();
+  RTC_DCHECK(voice_engine);
+  return voice_engine;
 }
 }  // namespace internal
 }  // namespace webrtc
diff --git a/webrtc/audio/audio_send_stream.h b/webrtc/audio/audio_send_stream.h
index ae81dfc8fc..8b96350590 100644
--- a/webrtc/audio/audio_send_stream.h
+++ b/webrtc/audio/audio_send_stream.h
@@ -12,20 +12,24 @@
 #define WEBRTC_AUDIO_AUDIO_SEND_STREAM_H_
 
 #include "webrtc/audio_send_stream.h"
-#include "webrtc/audio/scoped_voe_interface.h"
+#include "webrtc/audio_state.h"
 #include "webrtc/base/thread_checker.h"
-#include "webrtc/voice_engine/include/voe_base.h"
+#include "webrtc/base/scoped_ptr.h"
 
 namespace webrtc {
-
+class CongestionController;
 class VoiceEngine;
 
-namespace internal {
+namespace voe {
+class ChannelProxy;
+}  // namespace voe
 
+namespace internal {
 class AudioSendStream final : public webrtc::AudioSendStream {
  public:
   AudioSendStream(const webrtc::AudioSendStream::Config& config,
-                  VoiceEngine* voice_engine);
+                  const rtc::scoped_refptr<webrtc::AudioState>& audio_state,
+                  CongestionController* congestion_controller);
   ~AudioSendStream() override;
 
   // webrtc::SendStream implementation.
@@ -35,16 +39,19 @@ class AudioSendStream final : public webrtc::AudioSendStream {
   bool DeliverRtcp(const uint8_t* packet, size_t length) override;
 
   // webrtc::AudioSendStream implementation.
+  bool SendTelephoneEvent(int payload_type, uint8_t event,
+                          uint32_t duration_ms) override;
   webrtc::AudioSendStream::Stats GetStats() const override;
 
   const webrtc::AudioSendStream::Config& config() const;
 
  private:
+  VoiceEngine* voice_engine() const;
+
   rtc::ThreadChecker thread_checker_;
   const webrtc::AudioSendStream::Config config_;
-  VoiceEngine* voice_engine_;
-  // We hold one interface pointer to the VoE to make sure it is kept alive.
-  ScopedVoEInterface<VoEBase> voe_base_;
+  rtc::scoped_refptr<webrtc::AudioState> audio_state_;
+  rtc::scoped_ptr<voe::ChannelProxy> channel_proxy_;
 
   RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AudioSendStream);
 };
diff --git a/webrtc/audio/audio_send_stream_unittest.cc b/webrtc/audio/audio_send_stream_unittest.cc
index 227ec83799..466c1571ac 100644
--- a/webrtc/audio/audio_send_stream_unittest.cc
+++ b/webrtc/audio/audio_send_stream_unittest.cc
@@ -8,69 +8,238 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
+#include <string>
+#include <vector>
+
 #include "testing/gtest/include/gtest/gtest.h"
 
 #include "webrtc/audio/audio_send_stream.h"
+#include "webrtc/audio/audio_state.h"
 #include "webrtc/audio/conversion.h"
-#include "webrtc/test/fake_voice_engine.h"
+#include "webrtc/call/congestion_controller.h"
+#include "webrtc/modules/bitrate_controller/include/mock/mock_bitrate_controller.h"
+#include "webrtc/modules/pacing/paced_sender.h"
+#include "webrtc/test/mock_voe_channel_proxy.h"
+#include "webrtc/test/mock_voice_engine.h"
+#include "webrtc/video/call_stats.h"
 
 namespace webrtc {
 namespace test {
+namespace {
+
+using testing::_;
+using testing::Return;
+
+const int kChannelId = 1;
+const uint32_t kSsrc = 1234;
+const char* kCName = "foo_name";
+const int kAudioLevelId = 2;
+const int kAbsSendTimeId = 3;
+const int kTransportSequenceNumberId = 4;
+const int kEchoDelayMedian = 254;
+const int kEchoDelayStdDev = -3;
+const int kEchoReturnLoss = -65;
+const int kEchoReturnLossEnhancement = 101;
+const unsigned int kSpeechInputLevel = 96;
+const CallStatistics kCallStats = {
+    1345,  1678,  1901, 1234,  112, 13456, 17890, 1567, -1890, -1123};
+const CodecInst kCodecInst = {-121, "codec_name_send", 48000, -231, 0, -671};
+const ReportBlock kReportBlock = {456, 780, 123, 567, 890, 132, 143, 13354};
+const int kTelephoneEventPayloadType = 123;
+const uint8_t kTelephoneEventCode = 45;
+const uint32_t kTelephoneEventDuration = 6789;
+
+struct ConfigHelper {
+  ConfigHelper()
+      : stream_config_(nullptr),
+        call_stats_(Clock::GetRealTimeClock()),
+        process_thread_(ProcessThread::Create("AudioTestThread")),
+        congestion_controller_(process_thread_.get(),
+                               &call_stats_,
+                               &bitrate_observer_) {
+    using testing::Invoke;
+    using testing::StrEq;
+
+    EXPECT_CALL(voice_engine_,
+        RegisterVoiceEngineObserver(_)).WillOnce(Return(0));
+    EXPECT_CALL(voice_engine_,
+        DeRegisterVoiceEngineObserver()).WillOnce(Return(0));
+    AudioState::Config config;
+    config.voice_engine = &voice_engine_;
+    audio_state_ = AudioState::Create(config);
+
+    EXPECT_CALL(voice_engine_, ChannelProxyFactory(kChannelId))
+        .WillOnce(Invoke([this](int channel_id) {
+          EXPECT_FALSE(channel_proxy_);
+          channel_proxy_ = new testing::StrictMock<MockVoEChannelProxy>();
+          EXPECT_CALL(*channel_proxy_, SetRTCPStatus(true)).Times(1);
+          EXPECT_CALL(*channel_proxy_, SetLocalSSRC(kSsrc)).Times(1);
+          EXPECT_CALL(*channel_proxy_, SetRTCP_CNAME(StrEq(kCName))).Times(1);
+          EXPECT_CALL(*channel_proxy_,
+              SetSendAbsoluteSenderTimeStatus(true, kAbsSendTimeId)).Times(1);
+          EXPECT_CALL(*channel_proxy_,
+              SetSendAudioLevelIndicationStatus(true, kAudioLevelId)).Times(1);
+          EXPECT_CALL(*channel_proxy_, EnableSendTransportSequenceNumber(
+                                           kTransportSequenceNumberId))
+              .Times(1);
+          EXPECT_CALL(*channel_proxy_,
+                      SetCongestionControlObjects(
+                          congestion_controller_.pacer(),
+                          congestion_controller_.GetTransportFeedbackObserver(),
+                          congestion_controller_.packet_router()))
+              .Times(1);
+          EXPECT_CALL(*channel_proxy_,
+                      SetCongestionControlObjects(nullptr, nullptr, nullptr))
+              .Times(1);
+          return channel_proxy_;
+        }));
+    stream_config_.voe_channel_id = kChannelId;
+    stream_config_.rtp.ssrc = kSsrc;
+    stream_config_.rtp.c_name = kCName;
+    stream_config_.rtp.extensions.push_back(
+        RtpExtension(RtpExtension::kAudioLevel, kAudioLevelId));
+    stream_config_.rtp.extensions.push_back(
+        RtpExtension(RtpExtension::kAbsSendTime, kAbsSendTimeId));
+    stream_config_.rtp.extensions.push_back(RtpExtension(
+        RtpExtension::kTransportSequenceNumber, kTransportSequenceNumberId));
+  }
+
+  AudioSendStream::Config& config() { return stream_config_; }
+  rtc::scoped_refptr<AudioState> audio_state() { return audio_state_; }
+  CongestionController* congestion_controller() {
+    return &congestion_controller_;
+  }
+
+  void SetupMockForSendTelephoneEvent() {
+    EXPECT_TRUE(channel_proxy_);
+    EXPECT_CALL(*channel_proxy_,
+        SetSendTelephoneEventPayloadType(kTelephoneEventPayloadType))
+            .WillOnce(Return(true));
+    EXPECT_CALL(*channel_proxy_,
+        SendTelephoneEventOutband(kTelephoneEventCode, kTelephoneEventDuration))
+            .WillOnce(Return(true));
+  }
+
+  void SetupMockForGetStats() {
+    using testing::DoAll;
+    using testing::SetArgReferee;
+
+    std::vector<ReportBlock> report_blocks;
+    webrtc::ReportBlock block = kReportBlock;
+    report_blocks.push_back(block);  // Has wrong SSRC.
+    block.source_SSRC = kSsrc;
+    report_blocks.push_back(block);  // Correct block.
+    block.fraction_lost = 0;
+    report_blocks.push_back(block);  // Duplicate SSRC, bad fraction_lost.
+
+    EXPECT_TRUE(channel_proxy_);
+    EXPECT_CALL(*channel_proxy_, GetRTCPStatistics())
+        .WillRepeatedly(Return(kCallStats));
+    EXPECT_CALL(*channel_proxy_, GetRemoteRTCPReportBlocks())
+        .WillRepeatedly(Return(report_blocks));
+
+    EXPECT_CALL(voice_engine_, GetSendCodec(kChannelId, _))
+        .WillRepeatedly(DoAll(SetArgReferee<1>(kCodecInst), Return(0)));
+    EXPECT_CALL(voice_engine_, GetSpeechInputLevelFullRange(_))
+        .WillRepeatedly(DoAll(SetArgReferee<0>(kSpeechInputLevel), Return(0)));
+    EXPECT_CALL(voice_engine_, GetEcMetricsStatus(_))
+        .WillRepeatedly(DoAll(SetArgReferee<0>(true), Return(0)));
+    EXPECT_CALL(voice_engine_, GetEchoMetrics(_, _, _, _))
+        .WillRepeatedly(DoAll(SetArgReferee<0>(kEchoReturnLoss),
+                        SetArgReferee<1>(kEchoReturnLossEnhancement),
+                        Return(0)));
+    EXPECT_CALL(voice_engine_, GetEcDelayMetrics(_, _, _))
+        .WillRepeatedly(DoAll(SetArgReferee<0>(kEchoDelayMedian),
+                        SetArgReferee<1>(kEchoDelayStdDev), Return(0)));
+  }
+
+ private:
+  testing::StrictMock<MockVoiceEngine> voice_engine_;
+  rtc::scoped_refptr<AudioState> audio_state_;
+  AudioSendStream::Config stream_config_;
+  testing::StrictMock<MockVoEChannelProxy>* channel_proxy_ = nullptr;
+  CallStats call_stats_;
+  testing::NiceMock<MockBitrateObserver> bitrate_observer_;
+  rtc::scoped_ptr<ProcessThread> process_thread_;
+  CongestionController congestion_controller_;
+};
+}  // namespace
 
 TEST(AudioSendStreamTest, ConfigToString) {
-  const int kAbsSendTimeId = 3;
   AudioSendStream::Config config(nullptr);
-  config.rtp.ssrc = 1234;
+  config.rtp.ssrc = kSsrc;
   config.rtp.extensions.push_back(
       RtpExtension(RtpExtension::kAbsSendTime, kAbsSendTimeId));
-  config.voe_channel_id = 1;
+  config.rtp.c_name = kCName;
+  config.voe_channel_id = kChannelId;
   config.cng_payload_type = 42;
   config.red_payload_type = 17;
-  EXPECT_EQ("{rtp: {ssrc: 1234, extensions: [{name: "
-      "http://www.webrtc.org/experiments/rtp-hdrext/abs-send-time, id: 3}]}, "
-      "voe_channel_id: 1, cng_payload_type: 42, red_payload_type: 17}",
+  EXPECT_EQ(
+      "{rtp: {ssrc: 1234, extensions: [{name: "
+      "http://www.webrtc.org/experiments/rtp-hdrext/abs-send-time, id: 3}], "
+      "c_name: foo_name}, voe_channel_id: 1, cng_payload_type: 42, "
+      "red_payload_type: 17}",
       config.ToString());
 }
 
 TEST(AudioSendStreamTest, ConstructDestruct) {
-  FakeVoiceEngine voice_engine;
-  AudioSendStream::Config config(nullptr);
-  config.voe_channel_id = 1;
-  internal::AudioSendStream send_stream(config, &voice_engine);
+  ConfigHelper helper;
+  internal::AudioSendStream send_stream(helper.config(), helper.audio_state(),
+                                        helper.congestion_controller());
 }
 
-TEST(AudioSendStreamTest, GetStats) {
-  FakeVoiceEngine voice_engine;
-  AudioSendStream::Config config(nullptr);
-  config.rtp.ssrc = FakeVoiceEngine::kSendSsrc;
-  config.voe_channel_id = FakeVoiceEngine::kSendChannelId;
-  internal::AudioSendStream send_stream(config, &voice_engine);
+TEST(AudioSendStreamTest, SendTelephoneEvent) {
+  ConfigHelper helper;
+  internal::AudioSendStream send_stream(helper.config(), helper.audio_state(),
+                                        helper.congestion_controller());
+  helper.SetupMockForSendTelephoneEvent();
+  EXPECT_TRUE(send_stream.SendTelephoneEvent(kTelephoneEventPayloadType,
+      kTelephoneEventCode, kTelephoneEventDuration));
+}
 
+TEST(AudioSendStreamTest, GetStats) {
+  ConfigHelper helper;
+  internal::AudioSendStream send_stream(helper.config(), helper.audio_state(),
+                                        helper.congestion_controller());
+  helper.SetupMockForGetStats();
   AudioSendStream::Stats stats = send_stream.GetStats();
-  const CallStatistics& call_stats = FakeVoiceEngine::kSendCallStats;
-  const CodecInst& codec_inst = FakeVoiceEngine::kSendCodecInst;
-  const ReportBlock& report_block = FakeVoiceEngine::kSendReportBlock;
-  EXPECT_EQ(FakeVoiceEngine::kSendSsrc, stats.local_ssrc);
-  EXPECT_EQ(static_cast<int64_t>(call_stats.bytesSent), stats.bytes_sent);
-  EXPECT_EQ(call_stats.packetsSent, stats.packets_sent);
-  EXPECT_EQ(static_cast<int32_t>(report_block.cumulative_num_packets_lost),
+  EXPECT_EQ(kSsrc, stats.local_ssrc);
+  EXPECT_EQ(static_cast<int64_t>(kCallStats.bytesSent), stats.bytes_sent);
+  EXPECT_EQ(kCallStats.packetsSent, stats.packets_sent);
+  EXPECT_EQ(static_cast<int32_t>(kReportBlock.cumulative_num_packets_lost),
             stats.packets_lost);
-  EXPECT_EQ(Q8ToFloat(report_block.fraction_lost), stats.fraction_lost);
-  EXPECT_EQ(std::string(codec_inst.plname), stats.codec_name);
-  EXPECT_EQ(static_cast<int32_t>(report_block.extended_highest_sequence_number),
+  EXPECT_EQ(Q8ToFloat(kReportBlock.fraction_lost), stats.fraction_lost);
+  EXPECT_EQ(std::string(kCodecInst.plname), stats.codec_name);
+  EXPECT_EQ(static_cast<int32_t>(kReportBlock.extended_highest_sequence_number),
             stats.ext_seqnum);
-  EXPECT_EQ(static_cast<int32_t>(report_block.interarrival_jitter /
-                (codec_inst.plfreq / 1000)), stats.jitter_ms);
-  EXPECT_EQ(call_stats.rttMs, stats.rtt_ms);
-  EXPECT_EQ(static_cast<int32_t>(FakeVoiceEngine::kSendSpeechInputLevel),
-            stats.audio_level);
+  EXPECT_EQ(static_cast<int32_t>(kReportBlock.interarrival_jitter /
+                                 (kCodecInst.plfreq / 1000)),
+            stats.jitter_ms);
+  EXPECT_EQ(kCallStats.rttMs, stats.rtt_ms);
+  EXPECT_EQ(static_cast<int32_t>(kSpeechInputLevel), stats.audio_level);
   EXPECT_EQ(-1, stats.aec_quality_min);
-  EXPECT_EQ(FakeVoiceEngine::kSendEchoDelayMedian, stats.echo_delay_median_ms);
-  EXPECT_EQ(FakeVoiceEngine::kSendEchoDelayStdDev, stats.echo_delay_std_ms);
-  EXPECT_EQ(FakeVoiceEngine::kSendEchoReturnLoss, stats.echo_return_loss);
-  EXPECT_EQ(FakeVoiceEngine::kSendEchoReturnLossEnhancement,
-            stats.echo_return_loss_enhancement);
+  EXPECT_EQ(kEchoDelayMedian, stats.echo_delay_median_ms);
+  EXPECT_EQ(kEchoDelayStdDev, stats.echo_delay_std_ms);
+  EXPECT_EQ(kEchoReturnLoss, stats.echo_return_loss);
+  EXPECT_EQ(kEchoReturnLossEnhancement, stats.echo_return_loss_enhancement);
   EXPECT_FALSE(stats.typing_noise_detected);
 }
+
+TEST(AudioSendStreamTest, GetStatsTypingNoiseDetected) {
+  ConfigHelper helper;
+  internal::AudioSendStream send_stream(helper.config(), helper.audio_state(),
+                                        helper.congestion_controller());
+  helper.SetupMockForGetStats();
+  EXPECT_FALSE(send_stream.GetStats().typing_noise_detected);
+
+  internal::AudioState* internal_audio_state =
+      static_cast<internal::AudioState*>(helper.audio_state().get());
+  VoiceEngineObserver* voe_observer =
+      static_cast<VoiceEngineObserver*>(internal_audio_state);
+  voe_observer->CallbackOnError(-1, VE_TYPING_NOISE_WARNING);
+  EXPECT_TRUE(send_stream.GetStats().typing_noise_detected);
+  voe_observer->CallbackOnError(-1, VE_TYPING_NOISE_OFF_WARNING);
+  EXPECT_FALSE(send_stream.GetStats().typing_noise_detected);
+}
 }  // namespace test
 }  // namespace webrtc
diff --git a/webrtc/audio/audio_sink.h b/webrtc/audio/audio_sink.h
new file mode 100644
index 0000000000..999644f4ce
--- /dev/null
+++ b/webrtc/audio/audio_sink.h
@@ -0,0 +1,53 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_AUDIO_AUDIO_SINK_H_
+#define WEBRTC_AUDIO_AUDIO_SINK_H_
+
+#if defined(WEBRTC_POSIX) && !defined(__STDC_FORMAT_MACROS)
+// Avoid conflict with format_macros.h.
+#define __STDC_FORMAT_MACROS
+#endif
+
+#include <inttypes.h>
+#include <stddef.h>
+
+namespace webrtc {
+
+// Represents a simple push audio sink.
+class AudioSinkInterface {
+ public:
+  virtual ~AudioSinkInterface() {}
+
+  struct Data {
+    Data(int16_t* data,
+         size_t samples_per_channel,
+         int sample_rate,
+         size_t channels,
+         uint32_t timestamp)
+        : data(data),
+          samples_per_channel(samples_per_channel),
+          sample_rate(sample_rate),
+          channels(channels),
+          timestamp(timestamp) {}
+
+    int16_t* data;               // The actual 16bit audio data.
+    size_t samples_per_channel;  // Number of frames in the buffer.
+    int sample_rate;             // Sample rate in Hz.
+    size_t channels;             // Number of channels in the audio data.
+    uint32_t timestamp;          // The RTP timestamp of the first sample.
+  };
+
+  virtual void OnData(const Data& audio) = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_AUDIO_AUDIO_SINK_H_
diff --git a/webrtc/audio/audio_state.cc b/webrtc/audio/audio_state.cc
new file mode 100644
index 0000000000..e63f97af2d
--- /dev/null
+++ b/webrtc/audio/audio_state.cc
@@ -0,0 +1,79 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/audio/audio_state.h"
+
+#include "webrtc/base/atomicops.h"
+#include "webrtc/base/checks.h"
+#include "webrtc/base/logging.h"
+#include "webrtc/voice_engine/include/voe_errors.h"
+
+namespace webrtc {
+namespace internal {
+
+AudioState::AudioState(const AudioState::Config& config)
+    : config_(config), voe_base_(config.voice_engine) {
+  process_thread_checker_.DetachFromThread();
+  // Only one AudioState should be created per VoiceEngine.
+  RTC_CHECK(voe_base_->RegisterVoiceEngineObserver(*this) != -1);
+}
+
+AudioState::~AudioState() {
+  RTC_DCHECK(thread_checker_.CalledOnValidThread());
+  voe_base_->DeRegisterVoiceEngineObserver();
+}
+
+VoiceEngine* AudioState::voice_engine() {
+  RTC_DCHECK(thread_checker_.CalledOnValidThread());
+  return config_.voice_engine;
+}
+
+bool AudioState::typing_noise_detected() const {
+  RTC_DCHECK(thread_checker_.CalledOnValidThread());
+  rtc::CritScope lock(&crit_sect_);
+  return typing_noise_detected_;
+}
+
+// Reference count; implementation copied from rtc::RefCountedObject.
+int AudioState::AddRef() const {
+  return rtc::AtomicOps::Increment(&ref_count_);
+}
+
+// Reference count; implementation copied from rtc::RefCountedObject.
+int AudioState::Release() const {
+  int count = rtc::AtomicOps::Decrement(&ref_count_);
+  if (!count) {
+    delete this;
+  }
+  return count;
+}
+
+void AudioState::CallbackOnError(int channel_id, int err_code) {
+  RTC_DCHECK(process_thread_checker_.CalledOnValidThread());
+
+  // All call sites in VoE, as of this writing, specify -1 as channel_id.
+  RTC_DCHECK(channel_id == -1);
+  LOG(LS_INFO) << "VoiceEngine error " << err_code << " reported on channel "
+               << channel_id << ".";
+  if (err_code == VE_TYPING_NOISE_WARNING) {
+    rtc::CritScope lock(&crit_sect_);
+    typing_noise_detected_ = true;
+  } else if (err_code == VE_TYPING_NOISE_OFF_WARNING) {
+    rtc::CritScope lock(&crit_sect_);
+    typing_noise_detected_ = false;
+  }
+}
+}  // namespace internal
+
+rtc::scoped_refptr<AudioState> AudioState::Create(
+    const AudioState::Config& config) {
+  return rtc::scoped_refptr<AudioState>(new internal::AudioState(config));
+}
+}  // namespace webrtc
diff --git a/webrtc/audio/audio_state.h b/webrtc/audio/audio_state.h
new file mode 100644
index 0000000000..2cb83e4989
--- /dev/null
+++ b/webrtc/audio/audio_state.h
@@ -0,0 +1,61 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_AUDIO_AUDIO_STATE_H_
+#define WEBRTC_AUDIO_AUDIO_STATE_H_
+
+#include "webrtc/audio_state.h"
+#include "webrtc/audio/scoped_voe_interface.h"
+#include "webrtc/base/constructormagic.h"
+#include "webrtc/base/criticalsection.h"
+#include "webrtc/base/thread_checker.h"
+#include "webrtc/voice_engine/include/voe_base.h"
+
+namespace webrtc {
+namespace internal {
+
+class AudioState final : public webrtc::AudioState,
+                         public webrtc::VoiceEngineObserver {
+ public:
+  explicit AudioState(const AudioState::Config& config);
+  ~AudioState() override;
+
+  VoiceEngine* voice_engine();
+  bool typing_noise_detected() const;
+
+ private:
+  // rtc::RefCountInterface implementation.
+  int AddRef() const override;
+  int Release() const override;
+
+  // webrtc::VoiceEngineObserver implementation.
+  void CallbackOnError(int channel_id, int err_code) override;
+
+  rtc::ThreadChecker thread_checker_;
+  rtc::ThreadChecker process_thread_checker_;
+  const webrtc::AudioState::Config config_;
+
+  // We hold one interface pointer to the VoE to make sure it is kept alive.
+  ScopedVoEInterface<VoEBase> voe_base_;
+
+  // The critical section isn't strictly needed in this case, but xSAN bots may
+  // trigger on unprotected cross-thread access.
+  mutable rtc::CriticalSection crit_sect_;
+  bool typing_noise_detected_ GUARDED_BY(crit_sect_) = false;
+
+  // Reference count; implementation copied from rtc::RefCountedObject.
+  mutable volatile int ref_count_ = 0;
+
+  RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(AudioState);
+};
+}  // namespace internal
+}  // namespace webrtc
+
+#endif  // WEBRTC_AUDIO_AUDIO_STATE_H_
diff --git a/webrtc/audio/audio_state_unittest.cc b/webrtc/audio/audio_state_unittest.cc
new file mode 100644
index 0000000000..11fbdb4a86
--- /dev/null
+++ b/webrtc/audio/audio_state_unittest.cc
@@ -0,0 +1,80 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+#include "webrtc/audio/audio_state.h"
+#include "webrtc/base/scoped_ptr.h"
+#include "webrtc/test/mock_voice_engine.h"
+
+namespace webrtc {
+namespace test {
+namespace {
+
+struct ConfigHelper {
+  ConfigHelper() {
+    EXPECT_CALL(voice_engine_,
+        RegisterVoiceEngineObserver(testing::_)).WillOnce(testing::Return(0));
+    EXPECT_CALL(voice_engine_,
+        DeRegisterVoiceEngineObserver()).WillOnce(testing::Return(0));
+    config_.voice_engine = &voice_engine_;
+  }
+  AudioState::Config& config() { return config_; }
+  MockVoiceEngine& voice_engine() { return voice_engine_; }
+
+ private:
+  testing::StrictMock<MockVoiceEngine> voice_engine_;
+  AudioState::Config config_;
+};
+}  // namespace
+
+TEST(AudioStateTest, Create) {
+  ConfigHelper helper;
+  rtc::scoped_refptr<AudioState> audio_state =
+      AudioState::Create(helper.config());
+  EXPECT_TRUE(audio_state.get());
+}
+
+TEST(AudioStateTest, ConstructDestruct) {
+  ConfigHelper helper;
+  rtc::scoped_ptr<internal::AudioState> audio_state(
+      new internal::AudioState(helper.config()));
+}
+
+TEST(AudioStateTest, GetVoiceEngine) {
+  ConfigHelper helper;
+  rtc::scoped_ptr<internal::AudioState> audio_state(
+      new internal::AudioState(helper.config()));
+  EXPECT_EQ(audio_state->voice_engine(), &helper.voice_engine());
+}
+
+TEST(AudioStateTest, TypingNoiseDetected) {
+  ConfigHelper helper;
+  rtc::scoped_ptr<internal::AudioState> audio_state(
+      new internal::AudioState(helper.config()));
+  VoiceEngineObserver* voe_observer =
+      static_cast<VoiceEngineObserver*>(audio_state.get());
+  EXPECT_FALSE(audio_state->typing_noise_detected());
+
+  voe_observer->CallbackOnError(-1, VE_NOT_INITED);
+  EXPECT_FALSE(audio_state->typing_noise_detected());
+
+  voe_observer->CallbackOnError(-1, VE_TYPING_NOISE_WARNING);
+  EXPECT_TRUE(audio_state->typing_noise_detected());
+  voe_observer->CallbackOnError(-1, VE_NOT_INITED);
+  EXPECT_TRUE(audio_state->typing_noise_detected());
+
+  voe_observer->CallbackOnError(-1, VE_TYPING_NOISE_OFF_WARNING);
+  EXPECT_FALSE(audio_state->typing_noise_detected());
+  voe_observer->CallbackOnError(-1, VE_NOT_INITED);
+  EXPECT_FALSE(audio_state->typing_noise_detected());
+}
+}  // namespace test
+}  // namespace webrtc
diff --git a/webrtc/audio/webrtc_audio.gypi b/webrtc/audio/webrtc_audio.gypi
index b9d45db56d..53b7d16b1a 100644
--- a/webrtc/audio/webrtc_audio.gypi
+++ b/webrtc/audio/webrtc_audio.gypi
@@ -18,6 +18,9 @@
       'audio/audio_receive_stream.h',
       'audio/audio_send_stream.cc',
       'audio/audio_send_stream.h',
+      'audio/audio_sink.h',
+      'audio/audio_state.cc',
+      'audio/audio_state.h',
       'audio/conversion.h',
       'audio/scoped_voe_interface.h',
     ],