1 files changed, 323 insertions, 302 deletions
diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c
index f8eed32372..901e0fde0b 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.c
+++ b/webrtc/modules/audio_processing/aec/aec_core.c
@@ -44,7 +44,6 @@ static const int countLen = 50;
 static const int kDelayMetricsAggregationWindow = 1250;  // 5 seconds at 16 kHz.
 
 // Quantities to control H band scaling for SWB input
-static const int flagHbandCn = 1;  // flag for adding comfort noise in H band
 static const float cnScaleHband =
     (float)0.4;  // scale for comfort noise in H band
 // Initial bin for averaging nlp gain in low band
@@ -135,6 +134,9 @@ WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
 WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
 WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
 WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
+WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex;
+WebRtcAecPartitionDelay WebRtcAec_PartitionDelay;
+WebRtcAecWindowData WebRtcAec_WindowData;
 
 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
   return aRe * bRe - aIm * bIm;
@@ -151,40 +153,49 @@ static int CmpFloat(const void* a, const void* b) {
   return (*da > *db) - (*da < *db);
 }
 
-static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) {
+static void FilterFar(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float y_fft[2][PART_LEN1]) {
   int i;
-  for (i = 0; i < aec->num_partitions; i++) {
+  for (i = 0; i < num_partitions; i++) {
     int j;
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-      xPos -= aec->num_partitions * (PART_LEN1);
+    if (i + x_fft_buf_block_pos >= num_partitions) {
+      xPos -= num_partitions * (PART_LEN1);
     }
 
     for (j = 0; j < PART_LEN1; j++) {
-      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
-      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
+      y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
+      y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
     }
   }
 }
 
-static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) {
-  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
-  const float error_threshold = aec->extended_filter_enabled
+static void ScaleErrorSignal(int extended_filter_enabled,
+                             float normal_mu,
+                             float normal_error_threshold,
+                             float x_pow[PART_LEN1],
+                             float ef[2][PART_LEN1]) {
+  const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
+  const float error_threshold = extended_filter_enabled
                                     ? kExtendedErrorThreshold
-                                    : aec->normal_error_threshold;
+                                    : normal_error_threshold;
   int i;
   float abs_ef;
   for (i = 0; i < (PART_LEN1); i++) {
-    ef[0][i] /= (aec->xPow[i] + 1e-10f);
-    ef[1][i] /= (aec->xPow[i] + 1e-10f);
+    ef[0][i] /= (x_pow[i] + 1e-10f);
+    ef[1][i] /= (x_pow[i] + 1e-10f);
     abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
 
     if (abs_ef > error_threshold) {
@@ -199,59 +210,40 @@ static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) {
   }
 }
 
-// Time-unconstrined filter adaptation.
-// TODO(andrew): consider for a low-complexity mode.
-// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft,
-//                                          float ef[2][PART_LEN1]) {
-//  int i, j;
-//  for (i = 0; i < aec->num_partitions; i++) {
-//    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
-//    int pos;
-//    // Check for wrap
-//    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-//      xPos -= aec->num_partitions * PART_LEN1;
-//    }
-//
-//    pos = i * PART_LEN1;
-//
-//    for (j = 0; j < PART_LEN1; j++) {
-//      aec->wfBuf[0][pos + j] += MulRe(aec->xfBuf[0][xPos + j],
-//                                      -aec->xfBuf[1][xPos + j],
-//                                      ef[0][j], ef[1][j]);
-//      aec->wfBuf[1][pos + j] += MulIm(aec->xfBuf[0][xPos + j],
-//                                      -aec->xfBuf[1][xPos + j],
-//                                      ef[0][j], ef[1][j]);
-//    }
-//  }
-//}
-
-static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) {
+
+static void FilterAdaptation(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
   int i, j;
-  for (i = 0; i < aec->num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
+  float fft[PART_LEN2];
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
     int pos;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-      xPos -= aec->num_partitions * PART_LEN1;
+    if (i + x_fft_buf_block_pos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
     }
 
     pos = i * PART_LEN1;
 
     for (j = 0; j < PART_LEN; j++) {
 
-      fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
-                         -aec->xfBuf[1][xPos + j],
-                         ef[0][j],
-                         ef[1][j]);
-      fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
-                             -aec->xfBuf[1][xPos + j],
-                             ef[0][j],
-                             ef[1][j]);
+      fft[2 * j] = MulRe(x_fft_buf[0][xPos + j],
+                         -x_fft_buf[1][xPos + j],
+                         e_fft[0][j],
+                         e_fft[1][j]);
+      fft[2 * j + 1] = MulIm(x_fft_buf[0][xPos + j],
+                             -x_fft_buf[1][xPos + j],
+                             e_fft[0][j],
+                             e_fft[1][j]);
     }
-    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-                   -aec->xfBuf[1][xPos + PART_LEN],
-                   ef[0][PART_LEN],
-                   ef[1][PART_LEN]);
+    fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN],
+                   -x_fft_buf[1][xPos + PART_LEN],
+                   e_fft[0][PART_LEN],
+                   e_fft[1][PART_LEN]);
 
     aec_rdft_inverse_128(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
@@ -265,12 +257,12 @@ static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) {
     }
     aec_rdft_forward_128(fft);
 
-    aec->wfBuf[0][pos] += fft[0];
-    aec->wfBuf[0][pos + PART_LEN] += fft[1];
+    h_fft_buf[0][pos] += fft[0];
+    h_fft_buf[0][pos + PART_LEN] += fft[1];
 
     for (j = 1; j < PART_LEN; j++) {
-      aec->wfBuf[0][pos + j] += fft[2 * j];
-      aec->wfBuf[1][pos + j] += fft[2 * j + 1];
+      h_fft_buf[0][pos + j] += fft[2 * j];
+      h_fft_buf[1][pos + j] += fft[2 * j + 1];
     }
   }
 }
@@ -334,12 +326,13 @@ const float WebRtcAec_kMinFarendPSD = 15;
 //  - sde : cross-PSD of near-end and residual echo
 //  - sxd : cross-PSD of near-end and far-end
 //
-// In addition to updating the PSDs, also the filter diverge state is determined
-// upon actions are taken.
+// In addition to updating the PSDs, also the filter diverge state is
+// determined.
 static void SmoothedPSD(AecCore* aec,
                         float efw[2][PART_LEN1],
                         float dfw[2][PART_LEN1],
-                        float xfw[2][PART_LEN1]) {
+                        float xfw[2][PART_LEN1],
+                        int* extreme_filter_divergence) {
   // Power estimate smoothing coefficients.
   const float* ptrGCoh = aec->extended_filter_enabled
       ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
@@ -380,15 +373,12 @@ static void SmoothedPSD(AecCore* aec,
     seSum += aec->se[i];
   }
 
-  // Divergent filter safeguard.
+  // Divergent filter safeguard update.
   aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
 
-  if (aec->divergeState)
-    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
-
-  // Reset if error is significantly larger than nearend (13 dB).
-  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
-    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+  // Signal extreme filter divergence if the error is significantly larger
+  // than the nearend (13 dB).
+  *extreme_filter_divergence = (seSum > (19.95f * sdSum));
 }
 
 // Window time domain data to be used by the fft.
@@ -417,32 +407,15 @@ __inline static void StoreAsComplex(const float* data,
 
 static void SubbandCoherence(AecCore* aec,
                              float efw[2][PART_LEN1],
+                             float dfw[2][PART_LEN1],
                              float xfw[2][PART_LEN1],
                              float* fft,
                              float* cohde,
-                             float* cohxd) {
-  float dfw[2][PART_LEN1];
+                             float* cohxd,
+                             int* extreme_filter_divergence) {
   int i;
 
-  if (aec->delayEstCtr == 0)
-    aec->delayIdx = PartitionDelay(aec);
-
-  // Use delayed far.
-  memcpy(xfw,
-         aec->xfwBuf + aec->delayIdx * PART_LEN1,
-         sizeof(xfw[0][0]) * 2 * PART_LEN1);
-
-  // Windowed near fft
-  WindowData(fft, aec->dBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, dfw);
-
-  // Windowed error fft
-  WindowData(fft, aec->eBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, efw);
-
-  SmoothedPSD(aec, efw, dfw, xfw);
+  SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence);
 
   // Subband coherence
   for (i = 0; i < PART_LEN1; i++) {
@@ -458,23 +431,23 @@ static void SubbandCoherence(AecCore* aec,
 static void GetHighbandGain(const float* lambda, float* nlpGainHband) {
   int i;
 
-  nlpGainHband[0] = (float)0.0;
+  *nlpGainHband = (float)0.0;
   for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
-    nlpGainHband[0] += lambda[i];
+    *nlpGainHband += lambda[i];
   }
-  nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
+  *nlpGainHband /= (float)(PART_LEN1 - 1 - freqAvgIc);
 }
 
 static void ComfortNoise(AecCore* aec,
                          float efw[2][PART_LEN1],
-                         complex_t* comfortNoiseHband,
+                         float comfortNoiseHband[2][PART_LEN1],
                          const float* noisePow,
                          const float* lambda) {
   int i, num;
   float rand[PART_LEN];
   float noise, noiseAvg, tmp, tmpAvg;
   int16_t randW16[PART_LEN];
-  complex_t u[PART_LEN1];
+  float u[2][PART_LEN1];
 
   const float pi2 = 6.28318530717959f;
 
@@ -486,22 +459,22 @@ static void ComfortNoise(AecCore* aec,
 
   // Reject LF noise
   u[0][0] = 0;
-  u[0][1] = 0;
+  u[1][0] = 0;
   for (i = 1; i < PART_LEN1; i++) {
     tmp = pi2 * rand[i - 1];
 
     noise = sqrtf(noisePow[i]);
-    u[i][0] = noise * cosf(tmp);
-    u[i][1] = -noise * sinf(tmp);
+    u[0][i] = noise * cosf(tmp);
+    u[1][i] = -noise * sinf(tmp);
   }
-  u[PART_LEN][1] = 0;
+  u[1][PART_LEN] = 0;
 
   for (i = 0; i < PART_LEN1; i++) {
     // This is the proper weighting to match the background noise power
     tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
     // tmp = 1 - lambda[i];
-    efw[0][i] += tmp * u[i][0];
-    efw[1][i] += tmp * u[i][1];
+    efw[0][i] += tmp * u[0][i];
+    efw[1][i] += tmp * u[1][i];
   }
 
   // For H band comfort noise
@@ -509,7 +482,7 @@ static void ComfortNoise(AecCore* aec,
   noiseAvg = 0.0;
   tmpAvg = 0.0;
   num = 0;
-  if (aec->num_bands > 1 && flagHbandCn == 1) {
+  if (aec->num_bands > 1) {
 
     // average noise scale
     // average over second half of freq spectrum (i.e., 4->8khz)
@@ -534,21 +507,24 @@ static void ComfortNoise(AecCore* aec,
     // TODO: we should probably have a new random vector here.
     // Reject LF noise
     u[0][0] = 0;
-    u[0][1] = 0;
+    u[1][0] = 0;
     for (i = 1; i < PART_LEN1; i++) {
       tmp = pi2 * rand[i - 1];
 
       // Use average noise for H band
-      u[i][0] = noiseAvg * (float)cos(tmp);
-      u[i][1] = -noiseAvg * (float)sin(tmp);
+      u[0][i] = noiseAvg * (float)cos(tmp);
+      u[1][i] = -noiseAvg * (float)sin(tmp);
     }
-    u[PART_LEN][1] = 0;
+    u[1][PART_LEN] = 0;
 
     for (i = 0; i < PART_LEN1; i++) {
       // Use average NLP weight for H band
-      comfortNoiseHband[i][0] = tmpAvg * u[i][0];
-      comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+      comfortNoiseHband[0][i] = tmpAvg * u[0][i];
+      comfortNoiseHband[1][i] = tmpAvg * u[1][i];
     }
+  } else {
+    memset(comfortNoiseHband, 0,
+           2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0]));
   }
 }
 
@@ -837,21 +813,29 @@ static void UpdateDelayMetrics(AecCore* self) {
   return;
 }
 
-static void TimeToFrequency(float time_data[PART_LEN2],
-                            float freq_data[2][PART_LEN1],
-                            int window) {
-  int i = 0;
-
-  // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
-  if (window) {
-    for (i = 0; i < PART_LEN; i++) {
-      time_data[i] *= WebRtcAec_sqrtHanning[i];
-      time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i];
-    }
+static void ScaledInverseFft(float freq_data[2][PART_LEN1],
+                             float time_data[PART_LEN2],
+                             float scale,
+                             int conjugate) {
+  int i;
+  const float normalization = scale / ((float)PART_LEN2);
+  const float sign = (conjugate ? -1 : 1);
+  time_data[0] = freq_data[0][0] * normalization;
+  time_data[1] = freq_data[0][PART_LEN] * normalization;
+  for (i = 1; i < PART_LEN; i++) {
+    time_data[2 * i] = freq_data[0][i] * normalization;
+    time_data[2 * i + 1] = sign * freq_data[1][i] * normalization;
   }
+  aec_rdft_inverse_128(time_data);
+}
+
 
+static void Fft(float time_data[PART_LEN2],
+                float freq_data[2][PART_LEN1]) {
+  int i;
   aec_rdft_forward_128(time_data);
-  // Reorder.
+
+  // Reorder fft output data.
   freq_data[1][0] = 0;
   freq_data[1][PART_LEN] = 0;
   freq_data[0][0] = time_data[0];
@@ -862,13 +846,6 @@ static void TimeToFrequency(float time_data[PART_LEN2],
   }
 }
 
-static int MoveFarReadPtrWithoutSystemDelayUpdate(AecCore* self, int elements) {
-  WebRtc_MoveReadPtr(self->far_buf_windowed, elements);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  WebRtc_MoveReadPtr(self->far_time_buf, elements);
-#endif
-  return WebRtc_MoveReadPtr(self->far_buf, elements);
-}
 
 static int SignalBasedDelayCorrection(AecCore* self) {
   int delay_correction = 0;
@@ -909,7 +886,7 @@ static int SignalBasedDelayCorrection(AecCore* self) {
     const int upper_bound = self->num_partitions * 3 / 4;
     const int do_correction = delay <= lower_bound || delay > upper_bound;
     if (do_correction == 1) {
-      int available_read = (int)WebRtc_available_read(self->far_buf);
+      int available_read = (int)WebRtc_available_read(self->far_time_buf);
       // With |shift_offset| we gradually rely on the delay estimates.  For
       // positive delays we reduce the correction by |shift_offset| to lower the
       // risk of pushing the AEC into a non causal state.  For negative delays
@@ -942,13 +919,94 @@ static int SignalBasedDelayCorrection(AecCore* self) {
   return delay_correction;
 }
 
-static void NonLinearProcessing(AecCore* aec,
-                                float* output,
-                                float* const* outputH) {
-  float efw[2][PART_LEN1], xfw[2][PART_LEN1];
-  complex_t comfortNoiseHband[PART_LEN1];
+static void EchoSubtraction(
+    AecCore* aec,
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    int metrics_mode,
+    int extended_filter_enabled,
+    float normal_mu,
+    float normal_error_threshold,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float* const y,
+    float x_pow[PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    PowerLevel* linout_level,
+    float echo_subtractor_output[PART_LEN]) {
+  float s_fft[2][PART_LEN1];
+  float e_extended[PART_LEN2];
+  float s_extended[PART_LEN2];
+  float *s;
+  float e[PART_LEN];
+  float e_fft[2][PART_LEN1];
+  int i;
+  memset(s_fft, 0, sizeof(s_fft));
+
+  // Conditionally reset the echo subtraction filter if the filter has diverged
+  // significantly.
+  if (!aec->extended_filter_enabled &&
+      aec->extreme_filter_divergence) {
+    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+    aec->extreme_filter_divergence = 0;
+  }
+
+  // Produce echo estimate s_fft.
+  WebRtcAec_FilterFar(num_partitions,
+                      x_fft_buf_block_pos,
+                      x_fft_buf,
+                      h_fft_buf,
+                      s_fft);
+
+  // Compute the time-domain echo estimate s.
+  ScaledInverseFft(s_fft, s_extended, 2.0f, 0);
+  s = &s_extended[PART_LEN];
+
+  // Compute the time-domain echo prediction error.
+  for (i = 0; i < PART_LEN; ++i) {
+    e[i] = y[i] - s[i];
+  }
+
+  // Compute the frequency domain echo prediction error.
+  memset(e_extended, 0, sizeof(float) * PART_LEN);
+  memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN);
+  Fft(e_extended, e_fft);
+
+  RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
+                          &e_fft[0][0],
+                          sizeof(e_fft[0][0]) * PART_LEN1 * 2);
+
+  if (metrics_mode == 1) {
+    // Note that the first PART_LEN samples in fft (before transformation) are
+    // zero. Hence, the scaling by two in UpdateLevel() should not be
+    // performed. That scaling is taken care of in UpdateMetrics() instead.
+    UpdateLevel(linout_level, e_fft);
+  }
+
+  // Scale error signal inversely with far power.
+  WebRtcAec_ScaleErrorSignal(extended_filter_enabled,
+                             normal_mu,
+                             normal_error_threshold,
+                             x_pow,
+                             e_fft);
+  WebRtcAec_FilterAdaptation(num_partitions,
+                             x_fft_buf_block_pos,
+                             x_fft_buf,
+                             e_fft,
+                             h_fft_buf);
+  memcpy(echo_subtractor_output, e, sizeof(float) * PART_LEN);
+}
+
+
+static void EchoSuppression(AecCore* aec,
+                            float farend[PART_LEN2],
+                            float* echo_subtractor_output,
+                            float* output,
+                            float* const* outputH) {
+  float efw[2][PART_LEN1];
+  float xfw[2][PART_LEN1];
+  float dfw[2][PART_LEN1];
+  float comfortNoiseHband[2][PART_LEN1];
   float fft[PART_LEN2];
-  float scale, dtmp;
   float nlpGainHband;
   int i;
   size_t j;
@@ -972,27 +1030,51 @@ static void NonLinearProcessing(AecCore* aec,
 
   float* xfw_ptr = NULL;
 
-  aec->delayEstCtr++;
-  if (aec->delayEstCtr == delayEstInterval) {
-    aec->delayEstCtr = 0;
-  }
+  // Update eBuf with echo subtractor output.
+  memcpy(aec->eBuf + PART_LEN,
+         echo_subtractor_output,
+         sizeof(float) * PART_LEN);
 
-  // initialize comfort noise for H band
-  memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband));
-  nlpGainHband = (float)0.0;
-  dtmp = (float)0.0;
+  // Analysis filter banks for the echo suppressor.
+  // Windowed near-end ffts.
+  WindowData(fft, aec->dBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, dfw);
+
+  // Windowed echo suppressor output ffts.
+  WindowData(fft, aec->eBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, efw);
 
-  // We should always have at least one element stored in |far_buf|.
-  assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
   // NLP
-  WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1);
 
-  // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
-  // |xfwBuf|.
+  // Convert far-end partition to the frequency domain with windowing.
+  WindowData(fft, farend);
+  Fft(fft, xfw);
+  xfw_ptr = &xfw[0][0];
+
   // Buffer far.
   memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
 
-  WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd);
+  aec->delayEstCtr++;
+  if (aec->delayEstCtr == delayEstInterval) {
+    aec->delayEstCtr = 0;
+    aec->delayIdx = WebRtcAec_PartitionDelay(aec);
+  }
+
+  // Use delayed far.
+  memcpy(xfw,
+         aec->xfwBuf + aec->delayIdx * PART_LEN1,
+         sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+  WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd,
+                             &aec->extreme_filter_divergence);
+
+  // Select the microphone signal as output if the filter is deemed to have
+  // diverged.
+  if (aec->divergeState) {
+    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+  }
 
   hNlXdAvg = 0;
   for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
@@ -1098,67 +1180,51 @@ static void NonLinearProcessing(AecCore* aec,
     // scaling only in UpdateMetrics().
     UpdateLevel(&aec->nlpoutlevel, efw);
   }
+
   // Inverse error fft.
-  fft[0] = efw[0][0];
-  fft[1] = efw[0][PART_LEN];
-  for (i = 1; i < PART_LEN; i++) {
-    fft[2 * i] = efw[0][i];
-    // Sign change required by Ooura fft.
-    fft[2 * i + 1] = -efw[1][i];
-  }
-  aec_rdft_inverse_128(fft);
+  ScaledInverseFft(efw, fft, 2.0f, 1);
 
   // Overlap and add to obtain output.
-  scale = 2.0f / PART_LEN2;
   for (i = 0; i < PART_LEN; i++) {
-    fft[i] *= scale;  // fft scaling
-    fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i];
-
-    fft[PART_LEN + i] *= scale;  // fft scaling
-    aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i];
+    output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] +
+                 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]);
 
     // Saturate output to keep it in the allowed range.
     output[i] = WEBRTC_SPL_SAT(
-        WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN);
+        WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN);
   }
+  memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0]));
 
   // For H band
   if (aec->num_bands > 1) {
-
     // H band gain
     // average nlp over low band: average over second half of freq spectrum
     // (4->8khz)
     GetHighbandGain(hNl, &nlpGainHband);
 
     // Inverse comfort_noise
-    if (flagHbandCn == 1) {
-      fft[0] = comfortNoiseHband[0][0];
-      fft[1] = comfortNoiseHband[PART_LEN][0];
-      for (i = 1; i < PART_LEN; i++) {
-        fft[2 * i] = comfortNoiseHband[i][0];
-        fft[2 * i + 1] = comfortNoiseHband[i][1];
-      }
-      aec_rdft_inverse_128(fft);
-      scale = 2.0f / PART_LEN2;
-    }
+    ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0);
 
     // compute gain factor
     for (j = 0; j < aec->num_bands - 1; ++j) {
       for (i = 0; i < PART_LEN; i++) {
-        dtmp = aec->dBufH[j][i];
-        dtmp = dtmp * nlpGainHband;  // for variable gain
+        outputH[j][i] = aec->dBufH[j][i] * nlpGainHband;
+      }
+    }
 
-        // add some comfort noise where Hband is attenuated
-        if (flagHbandCn == 1 && j == 0) {
-          fft[i] *= scale;  // fft scaling
-          dtmp += cnScaleHband * fft[i];
-        }
+    // Add some comfort noise where Hband is attenuated.
+    for (i = 0; i < PART_LEN; i++) {
+      outputH[0][i] += cnScaleHband * fft[i];
+    }
 
-        // Saturate output to keep it in the allowed range.
+    // Saturate output to keep it in the allowed range.
+    for (j = 0; j < aec->num_bands - 1; ++j) {
+      for (i = 0; i < PART_LEN; i++) {
         outputH[j][i] = WEBRTC_SPL_SAT(
-            WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN);
+            WEBRTC_SPL_WORD16_MAX, outputH[j][i], WEBRTC_SPL_WORD16_MIN);
       }
     }
+
   }
 
   // Copy the current block to the old position.
@@ -1177,11 +1243,9 @@ static void NonLinearProcessing(AecCore* aec,
 
 static void ProcessBlock(AecCore* aec) {
   size_t i;
-  float y[PART_LEN], e[PART_LEN];
-  float scale;
 
   float fft[PART_LEN2];
-  float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
+  float xf[2][PART_LEN1];
   float df[2][PART_LEN1];
   float far_spectrum = 0.0f;
   float near_spectrum = 0.0f;
@@ -1198,15 +1262,18 @@ static void ProcessBlock(AecCore* aec) {
 
   float nearend[PART_LEN];
   float* nearend_ptr = NULL;
+  float farend[PART_LEN2];
+  float* farend_ptr = NULL;
+  float echo_subtractor_output[PART_LEN];
   float output[PART_LEN];
   float outputH[NUM_HIGH_BANDS_MAX][PART_LEN];
   float* outputH_ptr[NUM_HIGH_BANDS_MAX];
+  float* xf_ptr = NULL;
+
   for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) {
     outputH_ptr[i] = outputH[i];
   }
 
-  float* xf_ptr = NULL;
-
   // Concatenate old and new nearend blocks.
   for (i = 0; i < aec->num_bands - 1; ++i) {
     WebRtc_ReadBuffer(aec->nearFrBufH[i],
@@ -1218,25 +1285,28 @@ static void ProcessBlock(AecCore* aec) {
   WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN);
   memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend));
 
-  // ---------- Ooura fft ----------
+  // We should always have at least one element stored in |far_buf|.
+  assert(WebRtc_available_read(aec->far_time_buf) > 0);
+  WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1);
 
 #ifdef WEBRTC_AEC_DEBUG_DUMP
   {
-    float farend[PART_LEN];
-    float* farend_ptr = NULL;
-    WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1);
-    RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, farend_ptr, PART_LEN);
+    // TODO(minyue): |farend_ptr| starts from buffered samples. This will be
+    // modified when |aec->far_time_buf| is revised.
+    RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, &farend_ptr[PART_LEN], PART_LEN);
+
     RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN);
   }
 #endif
 
-  // We should always have at least one element stored in |far_buf|.
-  assert(WebRtc_available_read(aec->far_buf) > 0);
-  WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1);
+  // Convert far-end signal to the frequency domain.
+  memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2);
+  Fft(fft, xf);
+  xf_ptr = &xf[0][0];
 
   // Near fft
   memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, df, 0);
+  Fft(fft, df);
 
   // Power smoothing
   for (i = 0; i < PART_LEN1; i++) {
@@ -1314,60 +1384,25 @@ static void ProcessBlock(AecCore* aec) {
          &xf_ptr[PART_LEN1],
          sizeof(float) * PART_LEN1);
 
-  memset(yf, 0, sizeof(yf));
-
-  // Filter far
-  WebRtcAec_FilterFar(aec, yf);
-
-  // Inverse fft to obtain echo estimate and error.
-  fft[0] = yf[0][0];
-  fft[1] = yf[0][PART_LEN];
-  for (i = 1; i < PART_LEN; i++) {
-    fft[2 * i] = yf[0][i];
-    fft[2 * i + 1] = yf[1][i];
-  }
-  aec_rdft_inverse_128(fft);
-
-  scale = 2.0f / PART_LEN2;
-  for (i = 0; i < PART_LEN; i++) {
-    y[i] = fft[PART_LEN + i] * scale;  // fft scaling
-  }
-
-  for (i = 0; i < PART_LEN; i++) {
-    e[i] = nearend_ptr[i] - y[i];
-  }
-
-  // Error fft
-  memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
-  memset(fft, 0, sizeof(float) * PART_LEN);
-  memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
-  // TODO(bjornv): Change to use TimeToFrequency().
-  aec_rdft_forward_128(fft);
-
-  ef[1][0] = 0;
-  ef[1][PART_LEN] = 0;
-  ef[0][0] = fft[0];
-  ef[0][PART_LEN] = fft[1];
-  for (i = 1; i < PART_LEN; i++) {
-    ef[0][i] = fft[2 * i];
-    ef[1][i] = fft[2 * i + 1];
-  }
-
-  RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
-                          &ef[0][0],
-                          sizeof(ef[0][0]) * PART_LEN1 * 2);
-
-  if (aec->metricsMode == 1) {
-    // Note that the first PART_LEN samples in fft (before transformation) are
-    // zero. Hence, the scaling by two in UpdateLevel() should not be
-    // performed. That scaling is taken care of in UpdateMetrics() instead.
-    UpdateLevel(&aec->linoutlevel, ef);
-  }
-
-  // Scale error signal inversely with far power.
-  WebRtcAec_ScaleErrorSignal(aec, ef);
-  WebRtcAec_FilterAdaptation(aec, fft, ef);
-  NonLinearProcessing(aec, output, outputH_ptr);
+  // Perform echo subtraction.
+  EchoSubtraction(aec,
+                  aec->num_partitions,
+                  aec->xfBufBlockPos,
+                  aec->metricsMode,
+                  aec->extended_filter_enabled,
+                  aec->normal_mu,
+                  aec->normal_error_threshold,
+                  aec->xfBuf,
+                  nearend_ptr,
+                  aec->xPow,
+                  aec->wfBuf,
+                  &aec->linoutlevel,
+                  echo_subtractor_output);
+
+  RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN);
+
+  // Perform echo suppression.
+  EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr);
 
   if (aec->metricsMode == 1) {
     // Update power levels and echo metrics
@@ -1383,7 +1418,6 @@ static void ProcessBlock(AecCore* aec) {
     WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN);
   }
 
-  RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN);
   RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN);
 }
 
@@ -1422,26 +1456,20 @@ AecCore* WebRtcAec_CreateAec() {
   }
 
   // Create far-end buffers.
-  aec->far_buf =
-      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
-  if (!aec->far_buf) {
-    WebRtcAec_FreeAec(aec);
-    return NULL;
-  }
-  aec->far_buf_windowed =
-      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
-  if (!aec->far_buf_windowed) {
-    WebRtcAec_FreeAec(aec);
-    return NULL;
-  }
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  aec->instance_index = webrtc_aec_instance_count;
+  // For bit exactness with legacy code, each element in |far_time_buf| is
+  // supposed to contain |PART_LEN2| samples with an overlap of |PART_LEN|
+  // samples from the last frame.
+  // TODO(minyue): reduce |far_time_buf| to non-overlapped |PART_LEN| samples.
   aec->far_time_buf =
-      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN);
+      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN2);
   if (!aec->far_time_buf) {
     WebRtcAec_FreeAec(aec);
     return NULL;
   }
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  aec->instance_index = webrtc_aec_instance_count;
+
   aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL;
   aec->debug_dump_count = 0;
 #endif
@@ -1477,6 +1505,10 @@ AecCore* WebRtcAec_CreateAec() {
   WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
   WebRtcAec_ComfortNoise = ComfortNoise;
   WebRtcAec_SubbandCoherence = SubbandCoherence;
+  WebRtcAec_StoreAsComplex = StoreAsComplex;
+  WebRtcAec_PartitionDelay = PartitionDelay;
+  WebRtcAec_WindowData = WindowData;
+
 
 #if defined(WEBRTC_ARCH_X86_FAMILY)
   if (WebRtc_GetCPUInfo(kSSE2)) {
@@ -1515,11 +1547,8 @@ void WebRtcAec_FreeAec(AecCore* aec) {
     WebRtc_FreeBuffer(aec->outFrBufH[i]);
   }
 
-  WebRtc_FreeBuffer(aec->far_buf);
-  WebRtc_FreeBuffer(aec->far_buf_windowed);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
   WebRtc_FreeBuffer(aec->far_time_buf);
-#endif
+
   RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile);
   RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile);
   RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile);
@@ -1555,10 +1584,9 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
   }
 
   // Initialize far-end buffers.
-  WebRtc_InitBuffer(aec->far_buf);
-  WebRtc_InitBuffer(aec->far_buf_windowed);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
   WebRtc_InitBuffer(aec->far_time_buf);
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
   {
     int process_rate = sampFreq > 16000 ? 16000 : sampFreq;
     RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index,
@@ -1693,6 +1721,8 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
   aec->seed = 777;
   aec->delayEstCtr = 0;
 
+  aec->extreme_filter_divergence = 0;
+
   // Metrics disabled by default
   aec->metricsMode = 0;
   InitMetrics(aec);
@@ -1700,27 +1730,22 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
   return 0;
 }
 
-void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) {
-  float fft[PART_LEN2];
-  float xf[2][PART_LEN1];
 
+// For bit exactness with a legacy code, |farend| is supposed to contain
+// |PART_LEN2| samples with an overlap of |PART_LEN| samples from the last
+// frame.
+// TODO(minyue): reduce |farend| to non-overlapped |PART_LEN| samples.
+void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) {
   // Check if the buffer is full, and in that case flush the oldest data.
-  if (WebRtc_available_write(aec->far_buf) < 1) {
+  if (WebRtc_available_write(aec->far_time_buf) < 1) {
     WebRtcAec_MoveFarReadPtr(aec, 1);
   }
-  // Convert far-end partition to the frequency domain without windowing.
-  memcpy(fft, farend, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, xf, 0);
-  WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
 
-  // Convert far-end partition to the frequency domain with windowing.
-  memcpy(fft, farend, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, xf, 1);
-  WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
+  WebRtc_WriteBuffer(aec->far_time_buf, farend, 1);
 }
 
 int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) {
-  int elements_moved = MoveFarReadPtrWithoutSystemDelayUpdate(aec, elements);
+  int elements_moved = WebRtc_MoveReadPtr(aec->far_time_buf, elements);
   aec->system_delay -= elements_moved * PART_LEN;
   return elements_moved;
 }
@@ -1794,14 +1819,14 @@ void WebRtcAec_ProcessFrames(AecCore* aec,
       // rounding, like -16.
       int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
       int moved_elements =
-          MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements);
+          WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
       aec->knownDelay -= moved_elements * PART_LEN;
     } else {
       // 2 b) Apply signal based delay correction.
       int move_elements = SignalBasedDelayCorrection(aec);
       int moved_elements =
-          MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements);
-      int far_near_buffer_diff = WebRtc_available_read(aec->far_buf) -
+          WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
+      int far_near_buffer_diff = WebRtc_available_read(aec->far_time_buf) -
           WebRtc_available_read(aec->nearFrBuf) / PART_LEN;
       WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements);
       WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend,
@@ -1880,10 +1905,6 @@ void WebRtcAec_GetEchoStats(AecCore* self,
   *a_nlp = self->aNlp;
 }
 
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; }
-#endif
-
 void WebRtcAec_SetConfigCore(AecCore* self,
                              int nlp_mode,
                              int metrics_mode,