10 files changed, 688 insertions, 654 deletions
diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c
index f8eed32372..901e0fde0b 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.c
+++ b/webrtc/modules/audio_processing/aec/aec_core.c
@@ -44,7 +44,6 @@ static const int countLen = 50;
 static const int kDelayMetricsAggregationWindow = 1250;  // 5 seconds at 16 kHz.
 
 // Quantities to control H band scaling for SWB input
-static const int flagHbandCn = 1;  // flag for adding comfort noise in H band
 static const float cnScaleHband =
     (float)0.4;  // scale for comfort noise in H band
 // Initial bin for averaging nlp gain in low band
@@ -135,6 +134,9 @@ WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
 WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
 WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
 WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
+WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex;
+WebRtcAecPartitionDelay WebRtcAec_PartitionDelay;
+WebRtcAecWindowData WebRtcAec_WindowData;
 
 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
   return aRe * bRe - aIm * bIm;
@@ -151,40 +153,49 @@ static int CmpFloat(const void* a, const void* b) {
   return (*da > *db) - (*da < *db);
 }
 
-static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) {
+static void FilterFar(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float y_fft[2][PART_LEN1]) {
   int i;
-  for (i = 0; i < aec->num_partitions; i++) {
+  for (i = 0; i < num_partitions; i++) {
     int j;
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-      xPos -= aec->num_partitions * (PART_LEN1);
+    if (i + x_fft_buf_block_pos >= num_partitions) {
+      xPos -= num_partitions * (PART_LEN1);
     }
 
     for (j = 0; j < PART_LEN1; j++) {
-      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
-      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
+      y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
+      y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
     }
   }
 }
 
-static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) {
-  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
-  const float error_threshold = aec->extended_filter_enabled
+static void ScaleErrorSignal(int extended_filter_enabled,
+                             float normal_mu,
+                             float normal_error_threshold,
+                             float x_pow[PART_LEN1],
+                             float ef[2][PART_LEN1]) {
+  const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
+  const float error_threshold = extended_filter_enabled
                                     ? kExtendedErrorThreshold
-                                    : aec->normal_error_threshold;
+                                    : normal_error_threshold;
   int i;
   float abs_ef;
   for (i = 0; i < (PART_LEN1); i++) {
-    ef[0][i] /= (aec->xPow[i] + 1e-10f);
-    ef[1][i] /= (aec->xPow[i] + 1e-10f);
+    ef[0][i] /= (x_pow[i] + 1e-10f);
+    ef[1][i] /= (x_pow[i] + 1e-10f);
     abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
 
     if (abs_ef > error_threshold) {
@@ -199,59 +210,40 @@ static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) {
   }
 }
 
-// Time-unconstrined filter adaptation.
-// TODO(andrew): consider for a low-complexity mode.
-// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft,
-//                                          float ef[2][PART_LEN1]) {
-//  int i, j;
-//  for (i = 0; i < aec->num_partitions; i++) {
-//    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
-//    int pos;
-//    // Check for wrap
-//    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-//      xPos -= aec->num_partitions * PART_LEN1;
-//    }
-//
-//    pos = i * PART_LEN1;
-//
-//    for (j = 0; j < PART_LEN1; j++) {
-//      aec->wfBuf[0][pos + j] += MulRe(aec->xfBuf[0][xPos + j],
-//                                      -aec->xfBuf[1][xPos + j],
-//                                      ef[0][j], ef[1][j]);
-//      aec->wfBuf[1][pos + j] += MulIm(aec->xfBuf[0][xPos + j],
-//                                      -aec->xfBuf[1][xPos + j],
-//                                      ef[0][j], ef[1][j]);
-//    }
-//  }
-//}
-
-static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) {
+
+static void FilterAdaptation(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
   int i, j;
-  for (i = 0; i < aec->num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
+  float fft[PART_LEN2];
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
     int pos;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-      xPos -= aec->num_partitions * PART_LEN1;
+    if (i + x_fft_buf_block_pos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
     }
 
     pos = i * PART_LEN1;
 
     for (j = 0; j < PART_LEN; j++) {
 
-      fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
-                         -aec->xfBuf[1][xPos + j],
-                         ef[0][j],
-                         ef[1][j]);
-      fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
-                             -aec->xfBuf[1][xPos + j],
-                             ef[0][j],
-                             ef[1][j]);
+      fft[2 * j] = MulRe(x_fft_buf[0][xPos + j],
+                         -x_fft_buf[1][xPos + j],
+                         e_fft[0][j],
+                         e_fft[1][j]);
+      fft[2 * j + 1] = MulIm(x_fft_buf[0][xPos + j],
+                             -x_fft_buf[1][xPos + j],
+                             e_fft[0][j],
+                             e_fft[1][j]);
     }
-    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-                   -aec->xfBuf[1][xPos + PART_LEN],
-                   ef[0][PART_LEN],
-                   ef[1][PART_LEN]);
+    fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN],
+                   -x_fft_buf[1][xPos + PART_LEN],
+                   e_fft[0][PART_LEN],
+                   e_fft[1][PART_LEN]);
 
     aec_rdft_inverse_128(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
@@ -265,12 +257,12 @@ static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) {
     }
     aec_rdft_forward_128(fft);
 
-    aec->wfBuf[0][pos] += fft[0];
-    aec->wfBuf[0][pos + PART_LEN] += fft[1];
+    h_fft_buf[0][pos] += fft[0];
+    h_fft_buf[0][pos + PART_LEN] += fft[1];
 
     for (j = 1; j < PART_LEN; j++) {
-      aec->wfBuf[0][pos + j] += fft[2 * j];
-      aec->wfBuf[1][pos + j] += fft[2 * j + 1];
+      h_fft_buf[0][pos + j] += fft[2 * j];
+      h_fft_buf[1][pos + j] += fft[2 * j + 1];
     }
   }
 }
@@ -334,12 +326,13 @@ const float WebRtcAec_kMinFarendPSD = 15;
 //  - sde : cross-PSD of near-end and residual echo
 //  - sxd : cross-PSD of near-end and far-end
 //
-// In addition to updating the PSDs, also the filter diverge state is determined
-// upon actions are taken.
+// In addition to updating the PSDs, also the filter diverge state is
+// determined.
 static void SmoothedPSD(AecCore* aec,
                         float efw[2][PART_LEN1],
                         float dfw[2][PART_LEN1],
-                        float xfw[2][PART_LEN1]) {
+                        float xfw[2][PART_LEN1],
+                        int* extreme_filter_divergence) {
   // Power estimate smoothing coefficients.
   const float* ptrGCoh = aec->extended_filter_enabled
       ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
@@ -380,15 +373,12 @@ static void SmoothedPSD(AecCore* aec,
     seSum += aec->se[i];
   }
 
-  // Divergent filter safeguard.
+  // Divergent filter safeguard update.
   aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
 
-  if (aec->divergeState)
-    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
-
-  // Reset if error is significantly larger than nearend (13 dB).
-  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
-    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+  // Signal extreme filter divergence if the error is significantly larger
+  // than the nearend (13 dB).
+  *extreme_filter_divergence = (seSum > (19.95f * sdSum));
 }
 
 // Window time domain data to be used by the fft.
@@ -417,32 +407,15 @@ __inline static void StoreAsComplex(const float* data,
 
 static void SubbandCoherence(AecCore* aec,
                              float efw[2][PART_LEN1],
+                             float dfw[2][PART_LEN1],
                              float xfw[2][PART_LEN1],
                              float* fft,
                              float* cohde,
-                             float* cohxd) {
-  float dfw[2][PART_LEN1];
+                             float* cohxd,
+                             int* extreme_filter_divergence) {
   int i;
 
-  if (aec->delayEstCtr == 0)
-    aec->delayIdx = PartitionDelay(aec);
-
-  // Use delayed far.
-  memcpy(xfw,
-         aec->xfwBuf + aec->delayIdx * PART_LEN1,
-         sizeof(xfw[0][0]) * 2 * PART_LEN1);
-
-  // Windowed near fft
-  WindowData(fft, aec->dBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, dfw);
-
-  // Windowed error fft
-  WindowData(fft, aec->eBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, efw);
-
-  SmoothedPSD(aec, efw, dfw, xfw);
+  SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence);
 
   // Subband coherence
   for (i = 0; i < PART_LEN1; i++) {
@@ -458,23 +431,23 @@ static void SubbandCoherence(AecCore* aec,
 static void GetHighbandGain(const float* lambda, float* nlpGainHband) {
   int i;
 
-  nlpGainHband[0] = (float)0.0;
+  *nlpGainHband = (float)0.0;
   for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
-    nlpGainHband[0] += lambda[i];
+    *nlpGainHband += lambda[i];
   }
-  nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
+  *nlpGainHband /= (float)(PART_LEN1 - 1 - freqAvgIc);
 }
 
 static void ComfortNoise(AecCore* aec,
                          float efw[2][PART_LEN1],
-                         complex_t* comfortNoiseHband,
+                         float comfortNoiseHband[2][PART_LEN1],
                          const float* noisePow,
                          const float* lambda) {
   int i, num;
   float rand[PART_LEN];
   float noise, noiseAvg, tmp, tmpAvg;
   int16_t randW16[PART_LEN];
-  complex_t u[PART_LEN1];
+  float u[2][PART_LEN1];
 
   const float pi2 = 6.28318530717959f;
 
@@ -486,22 +459,22 @@ static void ComfortNoise(AecCore* aec,
 
   // Reject LF noise
   u[0][0] = 0;
-  u[0][1] = 0;
+  u[1][0] = 0;
   for (i = 1; i < PART_LEN1; i++) {
     tmp = pi2 * rand[i - 1];
 
     noise = sqrtf(noisePow[i]);
-    u[i][0] = noise * cosf(tmp);
-    u[i][1] = -noise * sinf(tmp);
+    u[0][i] = noise * cosf(tmp);
+    u[1][i] = -noise * sinf(tmp);
   }
-  u[PART_LEN][1] = 0;
+  u[1][PART_LEN] = 0;
 
   for (i = 0; i < PART_LEN1; i++) {
     // This is the proper weighting to match the background noise power
     tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
     // tmp = 1 - lambda[i];
-    efw[0][i] += tmp * u[i][0];
-    efw[1][i] += tmp * u[i][1];
+    efw[0][i] += tmp * u[0][i];
+    efw[1][i] += tmp * u[1][i];
   }
 
   // For H band comfort noise
@@ -509,7 +482,7 @@ static void ComfortNoise(AecCore* aec,
   noiseAvg = 0.0;
   tmpAvg = 0.0;
   num = 0;
-  if (aec->num_bands > 1 && flagHbandCn == 1) {
+  if (aec->num_bands > 1) {
 
     // average noise scale
     // average over second half of freq spectrum (i.e., 4->8khz)
@@ -534,21 +507,24 @@ static void ComfortNoise(AecCore* aec,
     // TODO: we should probably have a new random vector here.
     // Reject LF noise
     u[0][0] = 0;
-    u[0][1] = 0;
+    u[1][0] = 0;
     for (i = 1; i < PART_LEN1; i++) {
       tmp = pi2 * rand[i - 1];
 
       // Use average noise for H band
-      u[i][0] = noiseAvg * (float)cos(tmp);
-      u[i][1] = -noiseAvg * (float)sin(tmp);
+      u[0][i] = noiseAvg * (float)cos(tmp);
+      u[1][i] = -noiseAvg * (float)sin(tmp);
     }
-    u[PART_LEN][1] = 0;
+    u[1][PART_LEN] = 0;
 
     for (i = 0; i < PART_LEN1; i++) {
       // Use average NLP weight for H band
-      comfortNoiseHband[i][0] = tmpAvg * u[i][0];
-      comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+      comfortNoiseHband[0][i] = tmpAvg * u[0][i];
+      comfortNoiseHband[1][i] = tmpAvg * u[1][i];
     }
+  } else {
+    memset(comfortNoiseHband, 0,
+           2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0]));
   }
 }
 
@@ -837,21 +813,29 @@ static void UpdateDelayMetrics(AecCore* self) {
   return;
 }
 
-static void TimeToFrequency(float time_data[PART_LEN2],
-                            float freq_data[2][PART_LEN1],
-                            int window) {
-  int i = 0;
-
-  // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
-  if (window) {
-    for (i = 0; i < PART_LEN; i++) {
-      time_data[i] *= WebRtcAec_sqrtHanning[i];
-      time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i];
-    }
+static void ScaledInverseFft(float freq_data[2][PART_LEN1],
+                             float time_data[PART_LEN2],
+                             float scale,
+                             int conjugate) {
+  int i;
+  const float normalization = scale / ((float)PART_LEN2);
+  const float sign = (conjugate ? -1 : 1);
+  time_data[0] = freq_data[0][0] * normalization;
+  time_data[1] = freq_data[0][PART_LEN] * normalization;
+  for (i = 1; i < PART_LEN; i++) {
+    time_data[2 * i] = freq_data[0][i] * normalization;
+    time_data[2 * i + 1] = sign * freq_data[1][i] * normalization;
   }
+  aec_rdft_inverse_128(time_data);
+}
+
 
+static void Fft(float time_data[PART_LEN2],
+                float freq_data[2][PART_LEN1]) {
+  int i;
   aec_rdft_forward_128(time_data);
-  // Reorder.
+
+  // Reorder fft output data.
   freq_data[1][0] = 0;
   freq_data[1][PART_LEN] = 0;
   freq_data[0][0] = time_data[0];
@@ -862,13 +846,6 @@ static void TimeToFrequency(float time_data[PART_LEN2],
   }
 }
 
-static int MoveFarReadPtrWithoutSystemDelayUpdate(AecCore* self, int elements) {
-  WebRtc_MoveReadPtr(self->far_buf_windowed, elements);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  WebRtc_MoveReadPtr(self->far_time_buf, elements);
-#endif
-  return WebRtc_MoveReadPtr(self->far_buf, elements);
-}
 
 static int SignalBasedDelayCorrection(AecCore* self) {
   int delay_correction = 0;
@@ -909,7 +886,7 @@ static int SignalBasedDelayCorrection(AecCore* self) {
     const int upper_bound = self->num_partitions * 3 / 4;
     const int do_correction = delay <= lower_bound || delay > upper_bound;
     if (do_correction == 1) {
-      int available_read = (int)WebRtc_available_read(self->far_buf);
+      int available_read = (int)WebRtc_available_read(self->far_time_buf);
       // With |shift_offset| we gradually rely on the delay estimates.  For
       // positive delays we reduce the correction by |shift_offset| to lower the
       // risk of pushing the AEC into a non causal state.  For negative delays
@@ -942,13 +919,94 @@ static int SignalBasedDelayCorrection(AecCore* self) {
   return delay_correction;
 }
 
-static void NonLinearProcessing(AecCore* aec,
-                                float* output,
-                                float* const* outputH) {
-  float efw[2][PART_LEN1], xfw[2][PART_LEN1];
-  complex_t comfortNoiseHband[PART_LEN1];
+static void EchoSubtraction(
+    AecCore* aec,
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    int metrics_mode,
+    int extended_filter_enabled,
+    float normal_mu,
+    float normal_error_threshold,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float* const y,
+    float x_pow[PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    PowerLevel* linout_level,
+    float echo_subtractor_output[PART_LEN]) {
+  float s_fft[2][PART_LEN1];
+  float e_extended[PART_LEN2];
+  float s_extended[PART_LEN2];
+  float *s;
+  float e[PART_LEN];
+  float e_fft[2][PART_LEN1];
+  int i;
+  memset(s_fft, 0, sizeof(s_fft));
+
+  // Conditionally reset the echo subtraction filter if the filter has diverged
+  // significantly.
+  if (!aec->extended_filter_enabled &&
+      aec->extreme_filter_divergence) {
+    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+    aec->extreme_filter_divergence = 0;
+  }
+
+  // Produce echo estimate s_fft.
+  WebRtcAec_FilterFar(num_partitions,
+                      x_fft_buf_block_pos,
+                      x_fft_buf,
+                      h_fft_buf,
+                      s_fft);
+
+  // Compute the time-domain echo estimate s.
+  ScaledInverseFft(s_fft, s_extended, 2.0f, 0);
+  s = &s_extended[PART_LEN];
+
+  // Compute the time-domain echo prediction error.
+  for (i = 0; i < PART_LEN; ++i) {
+    e[i] = y[i] - s[i];
+  }
+
+  // Compute the frequency domain echo prediction error.
+  memset(e_extended, 0, sizeof(float) * PART_LEN);
+  memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN);
+  Fft(e_extended, e_fft);
+
+  RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
+                          &e_fft[0][0],
+                          sizeof(e_fft[0][0]) * PART_LEN1 * 2);
+
+  if (metrics_mode == 1) {
+    // Note that the first PART_LEN samples in fft (before transformation) are
+    // zero. Hence, the scaling by two in UpdateLevel() should not be
+    // performed. That scaling is taken care of in UpdateMetrics() instead.
+    UpdateLevel(linout_level, e_fft);
+  }
+
+  // Scale error signal inversely with far power.
+  WebRtcAec_ScaleErrorSignal(extended_filter_enabled,
+                             normal_mu,
+                             normal_error_threshold,
+                             x_pow,
+                             e_fft);
+  WebRtcAec_FilterAdaptation(num_partitions,
+                             x_fft_buf_block_pos,
+                             x_fft_buf,
+                             e_fft,
+                             h_fft_buf);
+  memcpy(echo_subtractor_output, e, sizeof(float) * PART_LEN);
+}
+
+
+static void EchoSuppression(AecCore* aec,
+                            float farend[PART_LEN2],
+                            float* echo_subtractor_output,
+                            float* output,
+                            float* const* outputH) {
+  float efw[2][PART_LEN1];
+  float xfw[2][PART_LEN1];
+  float dfw[2][PART_LEN1];
+  float comfortNoiseHband[2][PART_LEN1];
   float fft[PART_LEN2];
-  float scale, dtmp;
   float nlpGainHband;
   int i;
   size_t j;
@@ -972,27 +1030,51 @@ static void NonLinearProcessing(AecCore* aec,
 
   float* xfw_ptr = NULL;
 
-  aec->delayEstCtr++;
-  if (aec->delayEstCtr == delayEstInterval) {
-    aec->delayEstCtr = 0;
-  }
+  // Update eBuf with echo subtractor output.
+  memcpy(aec->eBuf + PART_LEN,
+         echo_subtractor_output,
+         sizeof(float) * PART_LEN);
 
-  // initialize comfort noise for H band
-  memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband));
-  nlpGainHband = (float)0.0;
-  dtmp = (float)0.0;
+  // Analysis filter banks for the echo suppressor.
+  // Windowed near-end ffts.
+  WindowData(fft, aec->dBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, dfw);
+
+  // Windowed echo suppressor output ffts.
+  WindowData(fft, aec->eBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, efw);
 
-  // We should always have at least one element stored in |far_buf|.
-  assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
   // NLP
-  WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1);
 
-  // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
-  // |xfwBuf|.
+  // Convert far-end partition to the frequency domain with windowing.
+  WindowData(fft, farend);
+  Fft(fft, xfw);
+  xfw_ptr = &xfw[0][0];
+
   // Buffer far.
   memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
 
-  WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd);
+  aec->delayEstCtr++;
+  if (aec->delayEstCtr == delayEstInterval) {
+    aec->delayEstCtr = 0;
+    aec->delayIdx = WebRtcAec_PartitionDelay(aec);
+  }
+
+  // Use delayed far.
+  memcpy(xfw,
+         aec->xfwBuf + aec->delayIdx * PART_LEN1,
+         sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+  WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd,
+                             &aec->extreme_filter_divergence);
+
+  // Select the microphone signal as output if the filter is deemed to have
+  // diverged.
+  if (aec->divergeState) {
+    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+  }
 
   hNlXdAvg = 0;
   for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
@@ -1098,67 +1180,51 @@ static void NonLinearProcessing(AecCore* aec,
     // scaling only in UpdateMetrics().
     UpdateLevel(&aec->nlpoutlevel, efw);
   }
+
   // Inverse error fft.
-  fft[0] = efw[0][0];
-  fft[1] = efw[0][PART_LEN];
-  for (i = 1; i < PART_LEN; i++) {
-    fft[2 * i] = efw[0][i];
-    // Sign change required by Ooura fft.
-    fft[2 * i + 1] = -efw[1][i];
-  }
-  aec_rdft_inverse_128(fft);
+  ScaledInverseFft(efw, fft, 2.0f, 1);
 
   // Overlap and add to obtain output.
-  scale = 2.0f / PART_LEN2;
   for (i = 0; i < PART_LEN; i++) {
-    fft[i] *= scale;  // fft scaling
-    fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i];
-
-    fft[PART_LEN + i] *= scale;  // fft scaling
-    aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i];
+    output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] +
+                 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]);
 
     // Saturate output to keep it in the allowed range.
     output[i] = WEBRTC_SPL_SAT(
-        WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN);
+        WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN);
   }
+  memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0]));
 
   // For H band
   if (aec->num_bands > 1) {
-
     // H band gain
     // average nlp over low band: average over second half of freq spectrum
     // (4->8khz)
     GetHighbandGain(hNl, &nlpGainHband);
 
     // Inverse comfort_noise
-    if (flagHbandCn == 1) {
-      fft[0] = comfortNoiseHband[0][0];
-      fft[1] = comfortNoiseHband[PART_LEN][0];
-      for (i = 1; i < PART_LEN; i++) {
-        fft[2 * i] = comfortNoiseHband[i][0];
-        fft[2 * i + 1] = comfortNoiseHband[i][1];
-      }
-      aec_rdft_inverse_128(fft);
-      scale = 2.0f / PART_LEN2;
-    }
+    ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0);
 
     // compute gain factor
     for (j = 0; j < aec->num_bands - 1; ++j) {
       for (i = 0; i < PART_LEN; i++) {
-        dtmp = aec->dBufH[j][i];
-        dtmp = dtmp * nlpGainHband;  // for variable gain
+        outputH[j][i] = aec->dBufH[j][i] * nlpGainHband;
+      }
+    }
 
-        // add some comfort noise where Hband is attenuated
-        if (flagHbandCn == 1 && j == 0) {
-          fft[i] *= scale;  // fft scaling
-          dtmp += cnScaleHband * fft[i];
-        }
+    // Add some comfort noise where Hband is attenuated.
+    for (i = 0; i < PART_LEN; i++) {
+      outputH[0][i] += cnScaleHband * fft[i];
+    }
 
-        // Saturate output to keep it in the allowed range.
+    // Saturate output to keep it in the allowed range.
+    for (j = 0; j < aec->num_bands - 1; ++j) {
+      for (i = 0; i < PART_LEN; i++) {
         outputH[j][i] = WEBRTC_SPL_SAT(
-            WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN);
+            WEBRTC_SPL_WORD16_MAX, outputH[j][i], WEBRTC_SPL_WORD16_MIN);
       }
     }
+
   }
 
   // Copy the current block to the old position.
@@ -1177,11 +1243,9 @@ static void NonLinearProcessing(AecCore* aec,
 
 static void ProcessBlock(AecCore* aec) {
   size_t i;
-  float y[PART_LEN], e[PART_LEN];
-  float scale;
 
   float fft[PART_LEN2];
-  float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
+  float xf[2][PART_LEN1];
   float df[2][PART_LEN1];
   float far_spectrum = 0.0f;
   float near_spectrum = 0.0f;
@@ -1198,15 +1262,18 @@ static void ProcessBlock(AecCore* aec) {
 
   float nearend[PART_LEN];
   float* nearend_ptr = NULL;
+  float farend[PART_LEN2];
+  float* farend_ptr = NULL;
+  float echo_subtractor_output[PART_LEN];
   float output[PART_LEN];
   float outputH[NUM_HIGH_BANDS_MAX][PART_LEN];
   float* outputH_ptr[NUM_HIGH_BANDS_MAX];
+  float* xf_ptr = NULL;
+
   for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
     outputH_ptr[i] = outputH[i];
   }
 
-  float* xf_ptr = NULL;
-
   // Concatenate old and new nearend blocks.
   for (i = 0; i < aec->num_bands - 1; ++i) {
     WebRtc_ReadBuffer(aec->nearFrBufH[i],
@@ -1218,25 +1285,28 @@ static void ProcessBlock(AecCore* aec) {
   WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN);
   memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend));
 
-  // ---------- Ooura fft ----------
+  // We should always have at least one element stored in |far_buf|.
+  assert(WebRtc_available_read(aec->far_time_buf) > 0);
+  WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1);
 
 #ifdef WEBRTC_AEC_DEBUG_DUMP
   {
-    float farend[PART_LEN];
-    float* farend_ptr = NULL;
-    WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1);
-    RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, farend_ptr, PART_LEN);
+    // TODO(minyue): |farend_ptr| starts from buffered samples. This will be
+    // modified when |aec->far_time_buf| is revised.
+    RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, &farend_ptr[PART_LEN], PART_LEN);
+
     RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN);
   }
 #endif
 
-  // We should always have at least one element stored in |far_buf|.
-  assert(WebRtc_available_read(aec->far_buf) > 0);
-  WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1);
+  // Convert far-end signal to the frequency domain.
+  memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2);
+  Fft(fft, xf);
+  xf_ptr = &xf[0][0];
 
   // Near fft
   memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, df, 0);
+  Fft(fft, df);
 
   // Power smoothing
   for (i = 0; i < PART_LEN1; i++) {
@@ -1314,60 +1384,25 @@ static void ProcessBlock(AecCore* aec) {
          &xf_ptr[PART_LEN1],
          sizeof(float) * PART_LEN1);
 
-  memset(yf, 0, sizeof(yf));
-
-  // Filter far
-  WebRtcAec_FilterFar(aec, yf);
-
-  // Inverse fft to obtain echo estimate and error.
-  fft[0] = yf[0][0];
-  fft[1] = yf[0][PART_LEN];
-  for (i = 1; i < PART_LEN; i++) {
-    fft[2 * i] = yf[0][i];
-    fft[2 * i + 1] = yf[1][i];
-  }
-  aec_rdft_inverse_128(fft);
-
-  scale = 2.0f / PART_LEN2;
-  for (i = 0; i < PART_LEN; i++) {
-    y[i] = fft[PART_LEN + i] * scale;  // fft scaling
-  }
-
-  for (i = 0; i < PART_LEN; i++) {
-    e[i] = nearend_ptr[i] - y[i];
-  }
-
-  // Error fft
-  memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
-  memset(fft, 0, sizeof(float) * PART_LEN);
-  memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
-  // TODO(bjornv): Change to use TimeToFrequency().
-  aec_rdft_forward_128(fft);
-
-  ef[1][0] = 0;
-  ef[1][PART_LEN] = 0;
-  ef[0][0] = fft[0];
-  ef[0][PART_LEN] = fft[1];
-  for (i = 1; i < PART_LEN; i++) {
-    ef[0][i] = fft[2 * i];
-    ef[1][i] = fft[2 * i + 1];
-  }
-
-  RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
-                          &ef[0][0],
-                          sizeof(ef[0][0]) * PART_LEN1 * 2);
-
-  if (aec->metricsMode == 1) {
-    // Note that the first PART_LEN samples in fft (before transformation) are
-    // zero. Hence, the scaling by two in UpdateLevel() should not be
-    // performed. That scaling is taken care of in UpdateMetrics() instead.
-    UpdateLevel(&aec->linoutlevel, ef);
-  }
-
-  // Scale error signal inversely with far power.
-  WebRtcAec_ScaleErrorSignal(aec, ef);
-  WebRtcAec_FilterAdaptation(aec, fft, ef);
-  NonLinearProcessing(aec, output, outputH_ptr);
+  // Perform echo subtraction.
+  EchoSubtraction(aec,
+                  aec->num_partitions,
+                  aec->xfBufBlockPos,
+                  aec->metricsMode,
+                  aec->extended_filter_enabled,
+                  aec->normal_mu,
+                  aec->normal_error_threshold,
+                  aec->xfBuf,
+                  nearend_ptr,
+                  aec->xPow,
+                  aec->wfBuf,
+                  &aec->linoutlevel,
+                  echo_subtractor_output);
+
+  RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN);
+
+  // Perform echo suppression.
+  EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr);
 
   if (aec->metricsMode == 1) {
     // Update power levels and echo metrics
@@ -1383,7 +1418,6 @@ static void ProcessBlock(AecCore* aec) {
     WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN);
   }
 
-  RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN);
   RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN);
 }
 
@@ -1422,26 +1456,20 @@ AecCore* WebRtcAec_CreateAec() {
   }
 
   // Create far-end buffers.
-  aec->far_buf =
-      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
-  if (!aec->far_buf) {
-    WebRtcAec_FreeAec(aec);
-    return NULL;
-  }
-  aec->far_buf_windowed =
-      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
-  if (!aec->far_buf_windowed) {
-    WebRtcAec_FreeAec(aec);
-    return NULL;
-  }
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  aec->instance_index = webrtc_aec_instance_count;
+  // For bit exactness with legacy code, each element in |far_time_buf| is
+  // supposed to contain |PART_LEN2| samples with an overlap of |PART_LEN|
+  // samples from the last frame.
+  // TODO(minyue): reduce |far_time_buf| to non-overlapped |PART_LEN| samples.
   aec->far_time_buf =
-      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN);
+      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN2);
   if (!aec->far_time_buf) {
     WebRtcAec_FreeAec(aec);
     return NULL;
   }
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  aec->instance_index = webrtc_aec_instance_count;
+
   aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL;
   aec->debug_dump_count = 0;
 #endif
@@ -1477,6 +1505,10 @@ AecCore* WebRtcAec_CreateAec() {
   WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
   WebRtcAec_ComfortNoise = ComfortNoise;
   WebRtcAec_SubbandCoherence = SubbandCoherence;
+  WebRtcAec_StoreAsComplex = StoreAsComplex;
+  WebRtcAec_PartitionDelay = PartitionDelay;
+  WebRtcAec_WindowData = WindowData;
+
 
 #if defined(WEBRTC_ARCH_X86_FAMILY)
   if (WebRtc_GetCPUInfo(kSSE2)) {
@@ -1515,11 +1547,8 @@ void WebRtcAec_FreeAec(AecCore* aec) {
     WebRtc_FreeBuffer(aec->outFrBufH[i]);
   }
 
-  WebRtc_FreeBuffer(aec->far_buf);
-  WebRtc_FreeBuffer(aec->far_buf_windowed);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
   WebRtc_FreeBuffer(aec->far_time_buf);
-#endif
+
   RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile);
   RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile);
   RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile);
@@ -1555,10 +1584,9 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
   }
 
   // Initialize far-end buffers.
-  WebRtc_InitBuffer(aec->far_buf);
-  WebRtc_InitBuffer(aec->far_buf_windowed);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
   WebRtc_InitBuffer(aec->far_time_buf);
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
   {
     int process_rate = sampFreq > 16000 ? 16000 : sampFreq;
     RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index,
@@ -1693,6 +1721,8 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
   aec->seed = 777;
   aec->delayEstCtr = 0;
 
+  aec->extreme_filter_divergence = 0;
+
   // Metrics disabled by default
   aec->metricsMode = 0;
   InitMetrics(aec);
@@ -1700,27 +1730,22 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
   return 0;
 }
 
-void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) {
-  float fft[PART_LEN2];
-  float xf[2][PART_LEN1];
 
+// For bit exactness with a legacy code, |farend| is supposed to contain
+// |PART_LEN2| samples with an overlap of |PART_LEN| samples from the last
+// frame.
+// TODO(minyue): reduce |farend| to non-overlapped |PART_LEN| samples.
+void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) {
   // Check if the buffer is full, and in that case flush the oldest data.
-  if (WebRtc_available_write(aec->far_buf) < 1) {
+  if (WebRtc_available_write(aec->far_time_buf) < 1) {
     WebRtcAec_MoveFarReadPtr(aec, 1);
   }
-  // Convert far-end partition to the frequency domain without windowing.
-  memcpy(fft, farend, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, xf, 0);
-  WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
 
-  // Convert far-end partition to the frequency domain with windowing.
-  memcpy(fft, farend, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, xf, 1);
-  WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
+  WebRtc_WriteBuffer(aec->far_time_buf, farend, 1);
 }
 
 int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) {
-  int elements_moved = MoveFarReadPtrWithoutSystemDelayUpdate(aec, elements);
+  int elements_moved = WebRtc_MoveReadPtr(aec->far_time_buf, elements);
   aec->system_delay -= elements_moved * PART_LEN;
   return elements_moved;
 }
@@ -1794,14 +1819,14 @@ void WebRtcAec_ProcessFrames(AecCore* aec,
       // rounding, like -16.
       int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
       int moved_elements =
-          MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements);
+          WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
       aec->knownDelay -= moved_elements * PART_LEN;
     } else {
       // 2 b) Apply signal based delay correction.
       int move_elements = SignalBasedDelayCorrection(aec);
       int moved_elements =
-          MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements);
-      int far_near_buffer_diff = WebRtc_available_read(aec->far_buf) -
+          WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
+      int far_near_buffer_diff = WebRtc_available_read(aec->far_time_buf) -
           WebRtc_available_read(aec->nearFrBuf) / PART_LEN;
       WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements);
       WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend,
@@ -1880,10 +1905,6 @@ void WebRtcAec_GetEchoStats(AecCore* self,
   *a_nlp = self->aNlp;
 }
 
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; }
-#endif
-
 void WebRtcAec_SetConfigCore(AecCore* self,
                              int nlp_mode,
                              int metrics_mode,
diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h
index 2de028379b..3809c82567 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_internal.h
+++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h
@@ -95,8 +95,8 @@ struct AecCore {
 
   int xfBufBlockPos;
 
-  RingBuffer* far_buf;
-  RingBuffer* far_buf_windowed;
+  RingBuffer* far_time_buf;
+
   int system_delay;  // Current system delay buffered in AEC.
 
   int mult;  // sampling frequency multiple
@@ -152,6 +152,10 @@ struct AecCore {
   // Runtime selection of number of filter partitions.
   int num_partitions;
 
+  // Flag that extreme filter divergence has been detected by the Echo
+  // Suppressor.
+  int extreme_filter_divergence;
+
 #ifdef WEBRTC_AEC_DEBUG_DUMP
   // Sequence number of this AEC instance, so that different instances can
   // choose different dump file names.
@@ -161,7 +165,6 @@ struct AecCore {
   // each time.
   int debug_dump_count;
 
-  RingBuffer* far_time_buf;
   rtc_WavWriter* farFile;
   rtc_WavWriter* nearFile;
   rtc_WavWriter* outFile;
@@ -170,13 +173,25 @@ struct AecCore {
 #endif
 };
 
-typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]);
+typedef void (*WebRtcAecFilterFar)(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float y_fft[2][PART_LEN1]);
 extern WebRtcAecFilterFar WebRtcAec_FilterFar;
-typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]);
-extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
-typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
-                                          float* fft,
+typedef void (*WebRtcAecScaleErrorSignal)(int extended_filter_enabled,
+                                          float normal_mu,
+                                          float normal_error_threshold,
+                                          float x_pow[PART_LEN1],
                                           float ef[2][PART_LEN1]);
+extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
+typedef void (*WebRtcAecFilterAdaptation)(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]);
 extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
 typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec,
                                               float hNl[PART_LEN1],
@@ -186,17 +201,29 @@ extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
 
 typedef void (*WebRtcAecComfortNoise)(AecCore* aec,
                                       float efw[2][PART_LEN1],
-                                      complex_t* comfortNoiseHband,
+                                      float comfortNoiseHband[2][PART_LEN1],
                                       const float* noisePow,
                                       const float* lambda);
 extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
 
 typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec,
                                           float efw[2][PART_LEN1],
+                                          float dfw[2][PART_LEN1],
                                           float xfw[2][PART_LEN1],
                                           float* fft,
                                           float* cohde,
-                                          float* cohxd);
+                                          float* cohxd,
+                                          int* extreme_filter_divergence);
 extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
 
+typedef int (*WebRtcAecPartitionDelay)(const AecCore* aec);
+extern WebRtcAecPartitionDelay WebRtcAec_PartitionDelay;
+
+typedef void (*WebRtcAecStoreAsComplex)(const float* data,
+                                        float data_complex[2][PART_LEN1]);
+extern WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex;
+
+typedef void (*WebRtcAecWindowData)(float* x_windowed, const float* x);
+extern WebRtcAecWindowData WebRtcAec_WindowData;
+
 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
diff --git a/webrtc/modules/audio_processing/aec/aec_core_mips.c b/webrtc/modules/audio_processing/aec/aec_core_mips.c
index bb33087aee..035a4b76af 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_mips.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_mips.c
@@ -20,13 +20,12 @@
 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
 #include "webrtc/modules/audio_processing/aec/aec_rdft.h"
 
-static const int flagHbandCn = 1; // flag for adding comfort noise in H band
 extern const float WebRtcAec_weightCurve[65];
 extern const float WebRtcAec_overDriveCurve[65];
 
 void WebRtcAec_ComfortNoise_mips(AecCore* aec,
                                  float efw[2][PART_LEN1],
-                                 complex_t* comfortNoiseHband,
+                                 float comfortNoiseHband[2][PART_LEN1],
                                  const float* noisePow,
                                  const float* lambda) {
   int i, num;
@@ -274,7 +273,7 @@ void WebRtcAec_ComfortNoise_mips(AecCore* aec,
   noiseAvg = 0.0;
   tmpAvg = 0.0;
   num = 0;
-  if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) {
+  if (aec->num_bands > 1) {
     for (i = 0; i < PART_LEN; i++) {
       rand[i] = ((float)randW16[i]) / 32768;
     }
@@ -314,27 +313,35 @@ void WebRtcAec_ComfortNoise_mips(AecCore* aec,
 
     for (i = 0; i < PART_LEN1; i++) {
       // Use average NLP weight for H band
-      comfortNoiseHband[i][0] = tmpAvg * u[i][0];
-      comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+      comfortNoiseHband[0][i] = tmpAvg * u[i][0];
+      comfortNoiseHband[1][i] = tmpAvg * u[i][1];
     }
+  } else {
+    memset(comfortNoiseHband, 0,
+           2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0]));
   }
 }
 
-void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) {
+void WebRtcAec_FilterFar_mips(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float y_fft[2][PART_LEN1]) {
   int i;
-  for (i = 0; i < aec->num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >=  aec->num_partitions) {
-      xPos -=  aec->num_partitions * (PART_LEN1);
+    if (i + x_fft_buf_block_pos >=  num_partitions) {
+      xPos -=  num_partitions * (PART_LEN1);
     }
-    float* yf0 = yf[0];
-    float* yf1 = yf[1];
-    float* aRe = aec->xfBuf[0] + xPos;
-    float* aIm = aec->xfBuf[1] + xPos;
-    float* bRe = aec->wfBuf[0] + pos;
-    float* bIm = aec->wfBuf[1] + pos;
+    float* yf0 = y_fft[0];
+    float* yf1 = y_fft[1];
+    float* aRe = x_fft_buf[0] + xPos;
+    float* aIm = x_fft_buf[1] + xPos;
+    float* bRe = h_fft_buf[0] + pos;
+    float* bIm = h_fft_buf[1] + pos;
     float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
     int len = PART_LEN1 >> 1;
 
@@ -432,23 +439,27 @@ void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) {
   }
 }
 
-void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
-                                     float* fft,
-                                     float ef[2][PART_LEN1]) {
+void WebRtcAec_FilterAdaptation_mips(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
+  float fft[PART_LEN2];
   int i;
-  for (i = 0; i < aec->num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + x_fft_buf_block_pos)*(PART_LEN1);
     int pos;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-      xPos -= aec->num_partitions * PART_LEN1;
+    if (i + x_fft_buf_block_pos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
     }
 
     pos = i * PART_LEN1;
-    float* aRe = aec->xfBuf[0] + xPos;
-    float* aIm = aec->xfBuf[1] + xPos;
-    float* bRe = ef[0];
-    float* bIm = ef[1];
+    float* aRe = x_fft_buf[0] + xPos;
+    float* aIm = x_fft_buf[1] + xPos;
+    float* bRe = e_fft[0];
+    float* bIm = e_fft[1];
     float* fft_tmp;
 
     float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
@@ -573,8 +584,8 @@ void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
       );
     }
     aec_rdft_forward_128(fft);
-    aRe = aec->wfBuf[0] + pos;
-    aIm = aec->wfBuf[1] + pos;
+    aRe = h_fft_buf[0] + pos;
+    aIm = h_fft_buf[1] + pos;
     __asm __volatile (
       ".set     push                                    \n\t"
       ".set     noreorder                               \n\t"
@@ -699,15 +710,18 @@ void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
   }
 }
 
-void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
-  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
-  const float error_threshold = aec->extended_filter_enabled
+void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled,
+                                     float normal_mu,
+                                     float normal_error_threshold,
+                                     float x_pow[PART_LEN1],
+                                     float ef[2][PART_LEN1]) {
+  const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
+  const float error_threshold = extended_filter_enabled
                                     ? kExtendedErrorThreshold
-                                    : aec->normal_error_threshold;
+                                    : normal_error_threshold;
   int len = (PART_LEN1);
   float* ef0 = ef[0];
   float* ef1 = ef[1];
-  float* xPow = aec->xPow;
   float fac1 = 1e-10f;
   float err_th2 = error_threshold * error_threshold;
   float f0, f1, f2;
@@ -719,7 +733,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
     ".set       push                                   \n\t"
     ".set       noreorder                              \n\t"
    "1:                                                 \n\t"
-    "lwc1       %[f0],     0(%[xPow])                  \n\t"
+    "lwc1       %[f0],     0(%[x_pow])                 \n\t"
     "lwc1       %[f1],     0(%[ef0])                   \n\t"
     "lwc1       %[f2],     0(%[ef1])                   \n\t"
     "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
@@ -747,7 +761,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
     "swc1       %[f1],     0(%[ef0])                   \n\t"
     "swc1       %[f2],     0(%[ef1])                   \n\t"
     "addiu      %[len],    %[len],      -1             \n\t"
-    "addiu      %[xPow],   %[xPow],     4              \n\t"
+    "addiu      %[x_pow],  %[x_pow],    4              \n\t"
     "addiu      %[ef0],    %[ef0],      4              \n\t"
     "bgtz       %[len],    1b                          \n\t"
     " addiu     %[ef1],    %[ef1],      4              \n\t"
@@ -756,7 +770,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
 #if !defined(MIPS32_R2_LE)
       [f3] "=&f" (f3),
 #endif
-      [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
+      [x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
       [len] "+r" (len)
     : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
       [err_th] "f" (error_threshold)
@@ -771,4 +785,3 @@ void WebRtcAec_InitAec_mips(void) {
   WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
   WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
 }
-
diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.c b/webrtc/modules/audio_processing/aec/aec_core_neon.c
index 9a677aaa67..7898ab2543 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_neon.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c
@@ -34,45 +34,49 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
   return aRe * bIm + aIm * bRe;
 }
 
-static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) {
+static void FilterFarNEON(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float y_fft[2][PART_LEN1]) {
   int i;
-  const int num_partitions = aec->num_partitions;
   for (i = 0; i < num_partitions; i++) {
     int j;
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= num_partitions) {
+    if (i + x_fft_buf_block_pos >= num_partitions) {
       xPos -= num_partitions * PART_LEN1;
     }
 
     // vectorized code (four at once)
     for (j = 0; j + 3 < PART_LEN1; j += 4) {
-      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
-      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
-      const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
-      const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
-      const float32x4_t yf_re = vld1q_f32(&yf[0][j]);
-      const float32x4_t yf_im = vld1q_f32(&yf[1][j]);
-      const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re);
-      const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im);
-      const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im);
-      const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re);
-      const float32x4_t g = vaddq_f32(yf_re, e);
-      const float32x4_t h = vaddq_f32(yf_im, f);
-      vst1q_f32(&yf[0][j], g);
-      vst1q_f32(&yf[1][j], h);
+      const float32x4_t x_fft_buf_re = vld1q_f32(&x_fft_buf[0][xPos + j]);
+      const float32x4_t x_fft_buf_im = vld1q_f32(&x_fft_buf[1][xPos + j]);
+      const float32x4_t h_fft_buf_re = vld1q_f32(&h_fft_buf[0][pos + j]);
+      const float32x4_t h_fft_buf_im = vld1q_f32(&h_fft_buf[1][pos + j]);
+      const float32x4_t y_fft_re = vld1q_f32(&y_fft[0][j]);
+      const float32x4_t y_fft_im = vld1q_f32(&y_fft[1][j]);
+      const float32x4_t a = vmulq_f32(x_fft_buf_re, h_fft_buf_re);
+      const float32x4_t e = vmlsq_f32(a, x_fft_buf_im, h_fft_buf_im);
+      const float32x4_t c = vmulq_f32(x_fft_buf_re, h_fft_buf_im);
+      const float32x4_t f = vmlaq_f32(c, x_fft_buf_im, h_fft_buf_re);
+      const float32x4_t g = vaddq_f32(y_fft_re, e);
+      const float32x4_t h = vaddq_f32(y_fft_im, f);
+      vst1q_f32(&y_fft[0][j], g);
+      vst1q_f32(&y_fft[1][j], h);
     }
     // scalar code for the remaining items.
     for (; j < PART_LEN1; j++) {
-      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
-      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
+      y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
+      y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
     }
   }
 }
@@ -122,20 +126,24 @@ static float32x4_t vsqrtq_f32(float32x4_t s) {
 }
 #endif  // WEBRTC_ARCH_ARM64
 
-static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
-  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
-  const float error_threshold = aec->extended_filter_enabled ?
-      kExtendedErrorThreshold : aec->normal_error_threshold;
+static void ScaleErrorSignalNEON(int extended_filter_enabled,
+                                 float normal_mu,
+                                 float normal_error_threshold,
+                                 float x_pow[PART_LEN1],
+                                 float ef[2][PART_LEN1]) {
+  const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
+  const float error_threshold = extended_filter_enabled ?
+      kExtendedErrorThreshold : normal_error_threshold;
   const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
   const float32x4_t kMu = vmovq_n_f32(mu);
   const float32x4_t kThresh = vmovq_n_f32(error_threshold);
   int i;
   // vectorized code (four at once)
   for (i = 0; i + 3 < PART_LEN1; i += 4) {
-    const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);
+    const float32x4_t x_pow_local = vld1q_f32(&x_pow[i]);
     const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
     const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
-    const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);
+    const float32x4_t xPowPlus = vaddq_f32(x_pow_local, k1e_10f);
     float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
     float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
     const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
@@ -162,8 +170,8 @@ static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
   // scalar code for the remaining items.
   for (; i < PART_LEN1; i++) {
     float abs_ef;
-    ef[0][i] /= (aec->xPow[i] + 1e-10f);
-    ef[1][i] /= (aec->xPow[i] + 1e-10f);
+    ef[0][i] /= (x_pow[i] + 1e-10f);
+    ef[1][i] /= (x_pow[i] + 1e-10f);
     abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
 
     if (abs_ef > error_threshold) {
@@ -178,34 +186,37 @@ static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
   }
 }
 
-static void FilterAdaptationNEON(AecCore* aec,
-                                 float* fft,
-                                 float ef[2][PART_LEN1]) {
+static void FilterAdaptationNEON(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
+  float fft[PART_LEN2];
   int i;
-  const int num_partitions = aec->num_partitions;
   for (i = 0; i < num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
     int j;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= num_partitions) {
+    if (i + x_fft_buf_block_pos >= num_partitions) {
       xPos -= num_partitions * PART_LEN1;
     }
 
     // Process the whole array...
     for (j = 0; j < PART_LEN; j += 4) {
-      // Load xfBuf and ef.
-      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
-      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
-      const float32x4_t ef_re = vld1q_f32(&ef[0][j]);
-      const float32x4_t ef_im = vld1q_f32(&ef[1][j]);
-      // Calculate the product of conjugate(xfBuf) by ef.
+      // Load x_fft_buf and e_fft.
+      const float32x4_t x_fft_buf_re = vld1q_f32(&x_fft_buf[0][xPos + j]);
+      const float32x4_t x_fft_buf_im = vld1q_f32(&x_fft_buf[1][xPos + j]);
+      const float32x4_t e_fft_re = vld1q_f32(&e_fft[0][j]);
+      const float32x4_t e_fft_im = vld1q_f32(&e_fft[1][j]);
+      // Calculate the product of conjugate(x_fft_buf) by e_fft.
       //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
       //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
-      const float32x4_t a = vmulq_f32(xfBuf_re, ef_re);
-      const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im);
-      const float32x4_t c = vmulq_f32(xfBuf_re, ef_im);
-      const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re);
+      const float32x4_t a = vmulq_f32(x_fft_buf_re, e_fft_re);
+      const float32x4_t e = vmlaq_f32(a, x_fft_buf_im, e_fft_im);
+      const float32x4_t c = vmulq_f32(x_fft_buf_re, e_fft_im);
+      const float32x4_t f = vmlsq_f32(c, x_fft_buf_im, e_fft_re);
       // Interleave real and imaginary parts.
       const float32x4x2_t g_n_h = vzipq_f32(e, f);
       // Store
@@ -213,10 +224,10 @@ static void FilterAdaptationNEON(AecCore* aec,
       vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]);
     }
     // ... and fixup the first imaginary entry.
-    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-                   -aec->xfBuf[1][xPos + PART_LEN],
-                   ef[0][PART_LEN],
-                   ef[1][PART_LEN]);
+    fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN],
+                   -x_fft_buf[1][xPos + PART_LEN],
+                   e_fft[0][PART_LEN],
+                   e_fft[1][PART_LEN]);
 
     aec_rdft_inverse_128(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
@@ -234,21 +245,21 @@ static void FilterAdaptationNEON(AecCore* aec,
     aec_rdft_forward_128(fft);
 
     {
-      const float wt1 = aec->wfBuf[1][pos];
-      aec->wfBuf[0][pos + PART_LEN] += fft[1];
+      const float wt1 = h_fft_buf[1][pos];
+      h_fft_buf[0][pos + PART_LEN] += fft[1];
       for (j = 0; j < PART_LEN; j += 4) {
-        float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
-        float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
+        float32x4_t wtBuf_re = vld1q_f32(&h_fft_buf[0][pos + j]);
+        float32x4_t wtBuf_im = vld1q_f32(&h_fft_buf[1][pos + j]);
         const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]);
         const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]);
         const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4);
         wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]);
         wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]);
 
-        vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re);
-        vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im);
+        vst1q_f32(&h_fft_buf[0][pos + j], wtBuf_re);
+        vst1q_f32(&h_fft_buf[1][pos + j], wtBuf_im);
       }
-      aec->wfBuf[1][pos] = wt1;
+      h_fft_buf[1][pos] = wt1;
     }
   }
 }
@@ -442,7 +453,7 @@ static void OverdriveAndSuppressNEON(AecCore* aec,
   }
 }
 
-static int PartitionDelay(const AecCore* aec) {
+static int PartitionDelayNEON(const AecCore* aec) {
   // Measures the energy in each filter partition and returns the partition with
   // highest energy.
   // TODO(bjornv): Spread computational cost by computing one partition per
@@ -499,7 +510,8 @@ static int PartitionDelay(const AecCore* aec) {
 static void SmoothedPSD(AecCore* aec,
                         float efw[2][PART_LEN1],
                         float dfw[2][PART_LEN1],
-                        float xfw[2][PART_LEN1]) {
+                        float xfw[2][PART_LEN1],
+                        int* extreme_filter_divergence) {
   // Power estimate smoothing coefficients.
   const float* ptrGCoh = aec->extended_filter_enabled
       ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
@@ -615,19 +627,16 @@ static void SmoothedPSD(AecCore* aec,
     seSum += aec->se[i];
   }
 
-  // Divergent filter safeguard.
+  // Divergent filter safeguard update.
   aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
 
-  if (aec->divergeState)
-    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
-
-  // Reset if error is significantly larger than nearend (13 dB).
-  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
-    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+  // Signal extreme filter divergence if the error is significantly larger
+  // than the nearend (13 dB).
+  *extreme_filter_divergence = (seSum > (19.95f * sdSum));
 }
 
 // Window time domain data to be used by the fft.
-__inline static void WindowData(float* x_windowed, const float* x) {
+static void WindowDataNEON(float* x_windowed, const float* x) {
   int i;
   for (i = 0; i < PART_LEN; i += 4) {
     const float32x4_t vec_Buf1 = vld1q_f32(&x[i]);
@@ -648,8 +657,8 @@ __inline static void WindowData(float* x_windowed, const float* x) {
 }
 
 // Puts fft output data into a complex valued array.
-__inline static void StoreAsComplex(const float* data,
-                                    float data_complex[2][PART_LEN1]) {
+static void StoreAsComplexNEON(const float* data,
+                               float data_complex[2][PART_LEN1]) {
   int i;
   for (i = 0; i < PART_LEN; i += 4) {
     const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]);
@@ -665,32 +674,15 @@ __inline static void StoreAsComplex(const float* data,
 
 static void SubbandCoherenceNEON(AecCore* aec,
                                  float efw[2][PART_LEN1],
+                                 float dfw[2][PART_LEN1],
                                  float xfw[2][PART_LEN1],
                                  float* fft,
                                  float* cohde,
-                                 float* cohxd) {
-  float dfw[2][PART_LEN1];
+                                 float* cohxd,
+                                 int* extreme_filter_divergence) {
   int i;
 
-  if (aec->delayEstCtr == 0)
-    aec->delayIdx = PartitionDelay(aec);
-
-  // Use delayed far.
-  memcpy(xfw,
-         aec->xfwBuf + aec->delayIdx * PART_LEN1,
-         sizeof(xfw[0][0]) * 2 * PART_LEN1);
-
-  // Windowed near fft
-  WindowData(fft, aec->dBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, dfw);
-
-  // Windowed error fft
-  WindowData(fft, aec->eBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, efw);
-
-  SmoothedPSD(aec, efw, dfw, xfw);
+  SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence);
 
   {
     const float32x4_t vec_1eminus10 =  vdupq_n_f32(1e-10f);
@@ -732,5 +724,7 @@ void WebRtcAec_InitAec_neon(void) {
   WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
   WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
   WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
+  WebRtcAec_StoreAsComplex = StoreAsComplexNEON;
+  WebRtcAec_PartitionDelay = PartitionDelayNEON;
+  WebRtcAec_WindowData = WindowDataNEON;
 }
-
diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c
index b1bffcbb9f..f897a4c0c7 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c
@@ -29,67 +29,76 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
   return aRe * bIm + aIm * bRe;
 }
 
-static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
+static void FilterFarSSE2(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float y_fft[2][PART_LEN1]) {
+
   int i;
-  const int num_partitions = aec->num_partitions;
   for (i = 0; i < num_partitions; i++) {
     int j;
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= num_partitions) {
+    if (i + x_fft_buf_block_pos >= num_partitions) {
       xPos -= num_partitions * (PART_LEN1);
     }
 
     // vectorized code (four at once)
     for (j = 0; j + 3 < PART_LEN1; j += 4) {
-      const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
-      const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
-      const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
-      const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
-      const __m128 yf_re = _mm_loadu_ps(&yf[0][j]);
-      const __m128 yf_im = _mm_loadu_ps(&yf[1][j]);
-      const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re);
-      const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im);
-      const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im);
-      const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re);
+      const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]);
+      const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]);
+      const __m128 h_fft_buf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]);
+      const __m128 h_fft_buf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]);
+      const __m128 y_fft_re = _mm_loadu_ps(&y_fft[0][j]);
+      const __m128 y_fft_im = _mm_loadu_ps(&y_fft[1][j]);
+      const __m128 a = _mm_mul_ps(x_fft_buf_re, h_fft_buf_re);
+      const __m128 b = _mm_mul_ps(x_fft_buf_im, h_fft_buf_im);
+      const __m128 c = _mm_mul_ps(x_fft_buf_re, h_fft_buf_im);
+      const __m128 d = _mm_mul_ps(x_fft_buf_im, h_fft_buf_re);
       const __m128 e = _mm_sub_ps(a, b);
       const __m128 f = _mm_add_ps(c, d);
-      const __m128 g = _mm_add_ps(yf_re, e);
-      const __m128 h = _mm_add_ps(yf_im, f);
-      _mm_storeu_ps(&yf[0][j], g);
-      _mm_storeu_ps(&yf[1][j], h);
+      const __m128 g = _mm_add_ps(y_fft_re, e);
+      const __m128 h = _mm_add_ps(y_fft_im, f);
+      _mm_storeu_ps(&y_fft[0][j], g);
+      _mm_storeu_ps(&y_fft[1][j], h);
     }
     // scalar code for the remaining items.
     for (; j < PART_LEN1; j++) {
-      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
-      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
+      y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
+      y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
     }
   }
 }
 
-static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
+static void ScaleErrorSignalSSE2(int extended_filter_enabled,
+                                 float normal_mu,
+                                 float normal_error_threshold,
+                                 float x_pow[PART_LEN1],
+                                 float ef[2][PART_LEN1]) {
   const __m128 k1e_10f = _mm_set1_ps(1e-10f);
-  const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
-                                                  : _mm_set1_ps(aec->normal_mu);
-  const __m128 kThresh = aec->extended_filter_enabled
+  const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
+      : _mm_set1_ps(normal_mu);
+  const __m128 kThresh = extended_filter_enabled
                              ? _mm_set1_ps(kExtendedErrorThreshold)
-                             : _mm_set1_ps(aec->normal_error_threshold);
+                             : _mm_set1_ps(normal_error_threshold);
 
   int i;
   // vectorized code (four at once)
   for (i = 0; i + 3 < PART_LEN1; i += 4) {
-    const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]);
+    const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]);
     const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]);
     const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]);
 
-    const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f);
+    const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f);
     __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus);
     __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus);
     const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re);
@@ -116,14 +125,14 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
   // scalar code for the remaining items.
   {
     const float mu =
-        aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
-    const float error_threshold = aec->extended_filter_enabled
+        extended_filter_enabled ? kExtendedMu : normal_mu;
+    const float error_threshold = extended_filter_enabled
                                       ? kExtendedErrorThreshold
-                                      : aec->normal_error_threshold;
+                                      : normal_error_threshold;
     for (; i < (PART_LEN1); i++) {
       float abs_ef;
-      ef[0][i] /= (aec->xPow[i] + 1e-10f);
-      ef[1][i] /= (aec->xPow[i] + 1e-10f);
+      ef[0][i] /= (x_pow[i] + 1e-10f);
+      ef[1][i] /= (x_pow[i] + 1e-10f);
       abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
 
       if (abs_ef > error_threshold) {
@@ -139,33 +148,36 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
   }
 }
 
-static void FilterAdaptationSSE2(AecCore* aec,
-                                 float* fft,
-                                 float ef[2][PART_LEN1]) {
+static void FilterAdaptationSSE2(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
+  float fft[PART_LEN2];
   int i, j;
-  const int num_partitions = aec->num_partitions;
   for (i = 0; i < num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
+    int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
     int pos = i * PART_LEN1;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= num_partitions) {
+    if (i + x_fft_buf_block_pos >= num_partitions) {
       xPos -= num_partitions * PART_LEN1;
     }
 
     // Process the whole array...
     for (j = 0; j < PART_LEN; j += 4) {
-      // Load xfBuf and ef.
-      const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
-      const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
-      const __m128 ef_re = _mm_loadu_ps(&ef[0][j]);
-      const __m128 ef_im = _mm_loadu_ps(&ef[1][j]);
-      // Calculate the product of conjugate(xfBuf) by ef.
+      // Load x_fft_buf and e_fft.
+      const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]);
+      const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]);
+      const __m128 e_fft_re = _mm_loadu_ps(&e_fft[0][j]);
+      const __m128 e_fft_im = _mm_loadu_ps(&e_fft[1][j]);
+      // Calculate the product of conjugate(x_fft_buf) by e_fft.
       //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
       //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
-      const __m128 a = _mm_mul_ps(xfBuf_re, ef_re);
-      const __m128 b = _mm_mul_ps(xfBuf_im, ef_im);
-      const __m128 c = _mm_mul_ps(xfBuf_re, ef_im);
-      const __m128 d = _mm_mul_ps(xfBuf_im, ef_re);
+      const __m128 a = _mm_mul_ps(x_fft_buf_re, e_fft_re);
+      const __m128 b = _mm_mul_ps(x_fft_buf_im, e_fft_im);
+      const __m128 c = _mm_mul_ps(x_fft_buf_re, e_fft_im);
+      const __m128 d = _mm_mul_ps(x_fft_buf_im, e_fft_re);
       const __m128 e = _mm_add_ps(a, b);
       const __m128 f = _mm_sub_ps(c, d);
       // Interleave real and imaginary parts.
@@ -176,10 +188,10 @@ static void FilterAdaptationSSE2(AecCore* aec,
       _mm_storeu_ps(&fft[2 * j + 4], h);
     }
     // ... and fixup the first imaginary entry.
-    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-                   -aec->xfBuf[1][xPos + PART_LEN],
-                   ef[0][PART_LEN],
-                   ef[1][PART_LEN]);
+    fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN],
+                   -x_fft_buf[1][xPos + PART_LEN],
+                   e_fft[0][PART_LEN],
+                   e_fft[1][PART_LEN]);
 
     aec_rdft_inverse_128(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
@@ -197,11 +209,11 @@ static void FilterAdaptationSSE2(AecCore* aec,
     aec_rdft_forward_128(fft);
 
     {
-      float wt1 = aec->wfBuf[1][pos];
-      aec->wfBuf[0][pos + PART_LEN] += fft[1];
+      float wt1 = h_fft_buf[1][pos];
+      h_fft_buf[0][pos + PART_LEN] += fft[1];
       for (j = 0; j < PART_LEN; j += 4) {
-        __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
-        __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+        __m128 wtBuf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]);
+        __m128 wtBuf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]);
         const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
         const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
         const __m128 fft_re =
@@ -210,10 +222,10 @@ static void FilterAdaptationSSE2(AecCore* aec,
             _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
         wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
         wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
-        _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);
-        _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im);
+        _mm_storeu_ps(&h_fft_buf[0][pos + j], wtBuf_re);
+        _mm_storeu_ps(&h_fft_buf[1][pos + j], wtBuf_im);
       }
-      aec->wfBuf[1][pos] = wt1;
+      h_fft_buf[1][pos] = wt1;
     }
   }
 }
@@ -427,7 +439,8 @@ __inline static void _mm_add_ps_4x1(__m128 sum, float *dst) {
   sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
   _mm_store_ss(dst, sum);
 }
-static int PartitionDelay(const AecCore* aec) {
+
+static int PartitionDelaySSE2(const AecCore* aec) {
   // Measures the energy in each filter partition and returns the partition with
   // highest energy.
   // TODO(bjornv): Spread computational cost by computing one partition per
@@ -476,7 +489,8 @@ static int PartitionDelay(const AecCore* aec) {
 static void SmoothedPSD(AecCore* aec,
                         float efw[2][PART_LEN1],
                         float dfw[2][PART_LEN1],
-                        float xfw[2][PART_LEN1]) {
+                        float xfw[2][PART_LEN1],
+                        int* extreme_filter_divergence) {
   // Power estimate smoothing coefficients.
   const float* ptrGCoh = aec->extended_filter_enabled
       ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
@@ -595,19 +609,16 @@ static void SmoothedPSD(AecCore* aec,
     seSum += aec->se[i];
   }
 
-  // Divergent filter safeguard.
+  // Divergent filter safeguard update.
   aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
 
-  if (aec->divergeState)
-    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
-
-  // Reset if error is significantly larger than nearend (13 dB).
-  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
-    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+  // Signal extreme filter divergence if the error is significantly larger
+  // than the nearend (13 dB).
+  *extreme_filter_divergence = (seSum > (19.95f * sdSum));
 }
 
 // Window time domain data to be used by the fft.
-__inline static void WindowData(float* x_windowed, const float* x) {
+static void WindowDataSSE2(float* x_windowed, const float* x) {
   int i;
   for (i = 0; i < PART_LEN; i += 4) {
     const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]);
@@ -627,8 +638,8 @@ __inline static void WindowData(float* x_windowed, const float* x) {
 }
 
 // Puts fft output data into a complex valued array.
-__inline static void StoreAsComplex(const float* data,
-                                    float data_complex[2][PART_LEN1]) {
+static void StoreAsComplexSSE2(const float* data,
+                               float data_complex[2][PART_LEN1]) {
   int i;
   for (i = 0; i < PART_LEN; i += 4) {
     const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]);
@@ -649,32 +660,15 @@ __inline static void StoreAsComplex(const float* data,
 
 static void SubbandCoherenceSSE2(AecCore* aec,
                                  float efw[2][PART_LEN1],
+                                 float dfw[2][PART_LEN1],
                                  float xfw[2][PART_LEN1],
                                  float* fft,
                                  float* cohde,
-                                 float* cohxd) {
-  float dfw[2][PART_LEN1];
+                                 float* cohxd,
+                                 int* extreme_filter_divergence) {
   int i;
 
-  if (aec->delayEstCtr == 0)
-    aec->delayIdx = PartitionDelay(aec);
-
-  // Use delayed far.
-  memcpy(xfw,
-         aec->xfwBuf + aec->delayIdx * PART_LEN1,
-         sizeof(xfw[0][0]) * 2 * PART_LEN1);
-
-  // Windowed near fft
-  WindowData(fft, aec->dBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, dfw);
-
-  // Windowed error fft
-  WindowData(fft, aec->eBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, efw);
-
-  SmoothedPSD(aec, efw, dfw, xfw);
+  SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence);
 
   {
     const __m128 vec_1eminus10 =  _mm_set1_ps(1e-10f);
@@ -728,4 +722,7 @@ void WebRtcAec_InitAec_SSE2(void) {
   WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
   WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
   WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
+  WebRtcAec_StoreAsComplex = StoreAsComplexSSE2;
+  WebRtcAec_PartitionDelay = PartitionDelaySSE2;
+  WebRtcAec_WindowData = WindowDataSSE2;
 }
diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation.c b/webrtc/modules/audio_processing/aec/echo_cancellation.c
index 0f5cd31ddb..aab1718b24 100644
--- a/webrtc/modules/audio_processing/aec/echo_cancellation.c
+++ b/webrtc/modules/audio_processing/aec/echo_cancellation.c
@@ -11,7 +11,7 @@
 /*
  * Contains the API functions for the AEC.
  */
-#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+#include "webrtc/modules/audio_processing/aec/echo_cancellation.h"
 
 #include <math.h>
 #ifdef WEBRTC_AEC_DEBUG_DUMP
@@ -146,7 +146,6 @@ void* WebRtcAec_Create() {
   }
 
   aecpc->initFlag = 0;
-  aecpc->lastError = 0;
 
 #ifdef WEBRTC_AEC_DEBUG_DUMP
   {
@@ -192,26 +191,22 @@ int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) {
       sampFreq != 16000 &&
       sampFreq != 32000 &&
       sampFreq != 48000) {
-    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
+    return AEC_BAD_PARAMETER_ERROR;
   }
   aecpc->sampFreq = sampFreq;
 
   if (scSampFreq < 1 || scSampFreq > 96000) {
-    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
+    return AEC_BAD_PARAMETER_ERROR;
   }
   aecpc->scSampFreq = scSampFreq;
 
   // Initialize echo canceller core
   if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) {
-    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
-    return -1;
+    return AEC_UNSPECIFIED_ERROR;
   }
 
   if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) {
-    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
-    return -1;
+    return AEC_UNSPECIFIED_ERROR;
   }
 
   WebRtc_InitBuffer(aecpc->far_pre_buf);
@@ -261,13 +256,32 @@ int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) {
   aecConfig.delay_logging = kAecFalse;
 
   if (WebRtcAec_set_config(aecpc, aecConfig) == -1) {
-    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
-    return -1;
+    return AEC_UNSPECIFIED_ERROR;
   }
 
   return 0;
 }
 
+// Returns any error that is caused when buffering the
+// far-end signal.
+int32_t WebRtcAec_GetBufferFarendError(void* aecInst,
+                                       const float* farend,
+                                       size_t nrOfSamples) {
+  Aec* aecpc = aecInst;
+
+  if (!farend)
+    return AEC_NULL_POINTER_ERROR;
+
+  if (aecpc->initFlag != initCheck)
+    return AEC_UNINITIALIZED_ERROR;
+
+  // number of samples == 160 for SWB input
+  if (nrOfSamples != 80 && nrOfSamples != 160)
+    return AEC_BAD_PARAMETER_ERROR;
+
+  return 0;
+}
+
 // only buffer L band for farend
 int32_t WebRtcAec_BufferFarend(void* aecInst,
                                const float* farend,
@@ -277,21 +291,13 @@ int32_t WebRtcAec_BufferFarend(void* aecInst,
   float new_farend[MAX_RESAMP_LEN];
   const float* farend_ptr = farend;
 
-  if (farend == NULL) {
-    aecpc->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
-  }
+  // Get any error caused by buffering the farend signal.
+  int32_t error_code = WebRtcAec_GetBufferFarendError(aecInst, farend,
+                                                      nrOfSamples);
 
-  if (aecpc->initFlag != initCheck) {
-    aecpc->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
-  }
+  if (error_code != 0)
+    return error_code;
 
-  // number of samples == 160 for SWB input
-  if (nrOfSamples != 80 && nrOfSamples != 160) {
-    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
-  }
 
   if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
     // Resample and get a new number of samples
@@ -311,7 +317,8 @@ int32_t WebRtcAec_BufferFarend(void* aecInst,
   // Write the time-domain data to |far_pre_buf|.
   WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples);
 
-  // Transform to frequency domain if we have enough data.
+  // TODO(minyue): reduce to |PART_LEN| samples for each buffering, when
+  // WebRtcAec_BufferFarendPartition() is changed to take |PART_LEN| samples.
   while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) {
     // We have enough data to pass to the FFT, hence read PART_LEN2 samples.
     {
@@ -319,10 +326,6 @@ int32_t WebRtcAec_BufferFarend(void* aecInst,
       float tmp[PART_LEN2];
       WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2);
       WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-      WebRtc_WriteBuffer(
-          WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1);
-#endif
     }
 
     // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing.
@@ -343,29 +346,24 @@ int32_t WebRtcAec_Process(void* aecInst,
   int32_t retVal = 0;
 
   if (out == NULL) {
-    aecpc->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
+    return AEC_NULL_POINTER_ERROR;
   }
 
   if (aecpc->initFlag != initCheck) {
-    aecpc->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
+    return AEC_UNINITIALIZED_ERROR;
   }
 
   // number of samples == 160 for SWB input
   if (nrOfSamples != 80 && nrOfSamples != 160) {
-    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
+    return AEC_BAD_PARAMETER_ERROR;
   }
 
   if (msInSndCardBuf < 0) {
     msInSndCardBuf = 0;
-    aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
-    retVal = -1;
+    retVal = AEC_BAD_PARAMETER_WARNING;
   } else if (msInSndCardBuf > kMaxTrustedDelayMs) {
     // The clamping is now done in ProcessExtended/Normal().
-    aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
-    retVal = -1;
+    retVal = AEC_BAD_PARAMETER_WARNING;
   }
 
   // This returns the value of aec->extended_filter_enabled.
@@ -378,15 +376,13 @@ int32_t WebRtcAec_Process(void* aecInst,
                     msInSndCardBuf,
                     skew);
   } else {
-    if (ProcessNormal(aecpc,
-                      nearend,
-                      num_bands,
-                      out,
-                      nrOfSamples,
-                      msInSndCardBuf,
-                      skew) != 0) {
-      retVal = -1;
-    }
+    retVal = ProcessNormal(aecpc,
+                           nearend,
+                           num_bands,
+                           out,
+                           nrOfSamples,
+                           msInSndCardBuf,
+                           skew);
   }
 
 #ifdef WEBRTC_AEC_DEBUG_DUMP
@@ -405,31 +401,26 @@ int32_t WebRtcAec_Process(void* aecInst,
 int WebRtcAec_set_config(void* handle, AecConfig config) {
   Aec* self = (Aec*)handle;
   if (self->initFlag != initCheck) {
-    self->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
+    return AEC_UNINITIALIZED_ERROR;
   }
 
   if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) {
-    self->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
+    return AEC_BAD_PARAMETER_ERROR;
   }
   self->skewMode = config.skewMode;
 
   if (config.nlpMode != kAecNlpConservative &&
       config.nlpMode != kAecNlpModerate &&
       config.nlpMode != kAecNlpAggressive) {
-    self->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
+    return AEC_BAD_PARAMETER_ERROR;
   }
 
   if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) {
-    self->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
+    return AEC_BAD_PARAMETER_ERROR;
   }
 
   if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) {
-    self->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
+    return AEC_BAD_PARAMETER_ERROR;
   }
 
   WebRtcAec_SetConfigCore(
@@ -440,12 +431,10 @@ int WebRtcAec_set_config(void* handle, AecConfig config) {
 int WebRtcAec_get_echo_status(void* handle, int* status) {
   Aec* self = (Aec*)handle;
   if (status == NULL) {
-    self->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
+    return AEC_NULL_POINTER_ERROR;
   }
   if (self->initFlag != initCheck) {
-    self->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
+    return AEC_UNINITIALIZED_ERROR;
   }
 
   *status = WebRtcAec_echo_state(self->aec);
@@ -466,12 +455,10 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) {
     return -1;
   }
   if (metrics == NULL) {
-    self->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
+    return AEC_NULL_POINTER_ERROR;
   }
   if (self->initFlag != initCheck) {
-    self->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
+    return AEC_UNINITIALIZED_ERROR;
   }
 
   WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp);
@@ -556,32 +543,24 @@ int WebRtcAec_GetDelayMetrics(void* handle,
                               float* fraction_poor_delays) {
   Aec* self = handle;
   if (median == NULL) {
-    self->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
+    return AEC_NULL_POINTER_ERROR;
   }
   if (std == NULL) {
-    self->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
+    return AEC_NULL_POINTER_ERROR;
   }
   if (self->initFlag != initCheck) {
-    self->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
+    return AEC_UNINITIALIZED_ERROR;
   }
   if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std,
                                     fraction_poor_delays) ==
       -1) {
     // Logging disabled.
-    self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR;
-    return -1;
+    return AEC_UNSUPPORTED_FUNCTION_ERROR;
   }
 
   return 0;
 }
 
-int32_t WebRtcAec_get_error_code(void* aecInst) {
-  Aec* aecpc = aecInst;
-  return aecpc->lastError;
-}
 
 AecCore* WebRtcAec_aec_core(void* handle) {
   if (!handle) {
@@ -617,7 +596,7 @@ static int ProcessNormal(Aec* aecpc,
       retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew);
       if (retVal == -1) {
         aecpc->skew = 0;
-        aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
+        retVal = AEC_BAD_PARAMETER_WARNING;
       }
 
       aecpc->skew /= aecpc->sampFactor * nrOfSamples;
diff --git a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h b/webrtc/modules/audio_processing/aec/echo_cancellation.h
index a340cf84d0..de84b2e6d1 100644
--- a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
+++ b/webrtc/modules/audio_processing/aec/echo_cancellation.h
@@ -8,8 +8,8 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_
 
 #include <stddef.h>
 
@@ -109,13 +109,32 @@ int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq);
  * Outputs                      Description
  * -------------------------------------------------------------------
  * int32_t        return        0: OK
- *                             -1: error
+ *                              12000-12050: error code
  */
 int32_t WebRtcAec_BufferFarend(void* aecInst,
                                const float* farend,
                                size_t nrOfSamples);
 
 /*
+ * Reports any errors that would arise if buffering a farend buffer
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecInst       Pointer to the AEC instance
+ * const float*   farend        In buffer containing one frame of
+ *                              farend signal for L band
+ * int16_t        nrOfSamples   Number of samples in farend buffer
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                              12000-12050: error code
+ */
+int32_t WebRtcAec_GetBufferFarendError(void* aecInst,
+                                       const float* farend,
+                                       size_t nrOfSamples);
+
+/*
  * Runs the echo canceller on an 80 or 160 sample blocks of data.
  *
  * Inputs                       Description
@@ -136,7 +155,7 @@ int32_t WebRtcAec_BufferFarend(void* aecInst,
  * float* const* out            Out buffer, one frame of processed nearend
  *                              for each band
  * int32_t       return         0: OK
- *                             -1: error
+ *                              12000-12050: error code
  */
 int32_t WebRtcAec_Process(void* aecInst,
                           const float* const* nearend,
@@ -157,8 +176,8 @@ int32_t WebRtcAec_Process(void* aecInst,
  *
  * Outputs                      Description
  * -------------------------------------------------------------------
- * int            return         0: OK
- *                              -1: error
+ * int            return        0: OK
+ *                              12000-12050: error code
  */
 int WebRtcAec_set_config(void* handle, AecConfig config);
 
@@ -173,8 +192,8 @@ int WebRtcAec_set_config(void* handle, AecConfig config);
  * -------------------------------------------------------------------
  * int*           status        0: Almost certainly nearend single-talk
  *                              1: Might not be neared single-talk
- * int            return         0: OK
- *                              -1: error
+ * int            return        0: OK
+ *                              12000-12050: error code
  */
 int WebRtcAec_get_echo_status(void* handle, int* status);
 
@@ -189,8 +208,8 @@ int WebRtcAec_get_echo_status(void* handle, int* status);
  * -------------------------------------------------------------------
  * AecMetrics*    metrics       Struct which will be filled out with the
  *                              current echo metrics.
- * int            return         0: OK
- *                              -1: error
+ * int            return        0: OK
+ *                              12000-12050: error code
  */
 int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics);
 
@@ -208,27 +227,14 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics);
  * float*  fraction_poor_delays Fraction of the delay estimates that may
  *                              cause the AEC to perform poorly.
  *
- * int     return                0: OK
- *                              -1: error
+ * int            return        0: OK
+ *                              12000-12050: error code
  */
 int WebRtcAec_GetDelayMetrics(void* handle,
                               int* median,
                               int* std,
                               float* fraction_poor_delays);
 
-/*
- * Gets the last error code.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*          aecInst       Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * int32_t        return        11000-11100: error code
- */
-int32_t WebRtcAec_get_error_code(void* aecInst);
-
 // Returns a pointer to the low level AEC handle.
 //
 // Input:
@@ -242,4 +248,4 @@ struct AecCore* WebRtcAec_aec_core(void* handle);
 #ifdef __cplusplus
 }
 #endif
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_
diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h b/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
index 95a6cf3324..e87219f33d 100644
--- a/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
+++ b/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
@@ -57,8 +57,6 @@ typedef struct {
 
   RingBuffer* far_pre_buf;  // Time domain far-end pre-buffer.
 
-  int lastError;
-
   int farend_started;
 
   AecCore* aec;
diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc b/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
index 315ac3e9f9..42db082ff9 100644
--- a/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
+++ b/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc
@@ -10,7 +10,7 @@
 
 // TODO(bjornv): Make this a comprehensive test.
 
-#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+#include "webrtc/modules/audio_processing/aec/echo_cancellation.h"
 
 #include <stdlib.h>
 #include <time.h>
diff --git a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
index 07e3cf8add..567118d828 100644
--- a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
+++ b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc
@@ -13,8 +13,7 @@ extern "C" {
 #include "webrtc/modules/audio_processing/aec/aec_core.h"
 }
 #include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h"
-#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
-#include "webrtc/test/testsupport/gtest_disable.h"
+#include "webrtc/modules/audio_processing/aec/echo_cancellation.h"
 #include "webrtc/typedefs.h"
 
 namespace {