3 files changed, 1023 insertions, 1134 deletions
diff --git a/modules/audio_processing/aec/aec_core.c b/modules/audio_processing/aec/aec_core.c
index 139d37d7..37ae6216 100644
--- a/modules/audio_processing/aec/aec_core.c
+++ b/modules/audio_processing/aec/aec_core.c
@@ -114,30 +114,12 @@ enum {
 extern int webrtc_aec_instance_count;
 #endif
 
-// "Private" function prototypes.
-static void ProcessBlock(AecCore* aec);
-
-static void NonLinearProcessing(AecCore* aec, float* output, float* outputH);
-
-static void GetHighbandGain(const float* lambda, float* nlpGainHband);
-
-// Comfort_noise also computes noise for H band returned in comfortNoiseHband
-static void ComfortNoise(AecCore* aec,
-                         float efw[2][PART_LEN1],
-                         complex_t* comfortNoiseHband,
-                         const float* noisePow,
-                         const float* lambda);
-
-static void InitLevel(PowerLevel* level);
-static void InitStats(Stats* stats);
-static void InitMetrics(AecCore* aec);
-static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]);
-static void UpdateMetrics(AecCore* aec);
-// Convert from time domain to frequency domain. Note that |time_data| are
-// overwritten.
-static void TimeToFrequency(float time_data[PART_LEN2],
-                            float freq_data[2][PART_LEN1],
-                            int window);
+WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
+WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
+WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
+WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
+WebRtcAec_ComfortNoise_t WebRtcAec_ComfortNoise;
+WebRtcAec_SubbandCoherence_t WebRtcAec_SubbandCoherence;
 
 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
   return aRe * bRe - aIm * bIm;
@@ -154,121 +136,6 @@ static int CmpFloat(const void* a, const void* b) {
   return (*da > *db) - (*da < *db);
 }
 
-int WebRtcAec_CreateAec(AecCore** aecInst) {
-  AecCore* aec = malloc(sizeof(AecCore));
-  *aecInst = aec;
-  if (aec == NULL) {
-    return -1;
-  }
-
-  aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
-  if (!aec->nearFrBuf) {
-    WebRtcAec_FreeAec(aec);
-    aec = NULL;
-    return -1;
-  }
-
-  aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
-  if (!aec->outFrBuf) {
-    WebRtcAec_FreeAec(aec);
-    aec = NULL;
-    return -1;
-  }
-
-  aec->nearFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
-  if (!aec->nearFrBufH) {
-    WebRtcAec_FreeAec(aec);
-    aec = NULL;
-    return -1;
-  }
-
-  aec->outFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
-  if (!aec->outFrBufH) {
-    WebRtcAec_FreeAec(aec);
-    aec = NULL;
-    return -1;
-  }
-
-  // Create far-end buffers.
-  aec->far_buf =
-      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
-  if (!aec->far_buf) {
-    WebRtcAec_FreeAec(aec);
-    aec = NULL;
-    return -1;
-  }
-  aec->far_buf_windowed =
-      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
-  if (!aec->far_buf_windowed) {
-    WebRtcAec_FreeAec(aec);
-    aec = NULL;
-    return -1;
-  }
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  aec->far_time_buf =
-      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(int16_t) * PART_LEN);
-  if (!aec->far_time_buf) {
-    WebRtcAec_FreeAec(aec);
-    aec = NULL;
-    return -1;
-  }
-  {
-    char filename[64];
-    sprintf(filename, "aec_far%d.pcm", webrtc_aec_instance_count);
-    aec->farFile = fopen(filename, "wb");
-    sprintf(filename, "aec_near%d.pcm", webrtc_aec_instance_count);
-    aec->nearFile = fopen(filename, "wb");
-    sprintf(filename, "aec_out%d.pcm", webrtc_aec_instance_count);
-    aec->outFile = fopen(filename, "wb");
-    sprintf(filename, "aec_out_linear%d.pcm", webrtc_aec_instance_count);
-    aec->outLinearFile = fopen(filename, "wb");
-  }
-#endif
-  aec->delay_estimator_farend =
-      WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks);
-  if (aec->delay_estimator_farend == NULL) {
-    WebRtcAec_FreeAec(aec);
-    aec = NULL;
-    return -1;
-  }
-  aec->delay_estimator = WebRtc_CreateDelayEstimator(
-      aec->delay_estimator_farend, kLookaheadBlocks);
-  if (aec->delay_estimator == NULL) {
-    WebRtcAec_FreeAec(aec);
-    aec = NULL;
-    return -1;
-  }
-
-  return 0;
-}
-
-int WebRtcAec_FreeAec(AecCore* aec) {
-  if (aec == NULL) {
-    return -1;
-  }
-
-  WebRtc_FreeBuffer(aec->nearFrBuf);
-  WebRtc_FreeBuffer(aec->outFrBuf);
-
-  WebRtc_FreeBuffer(aec->nearFrBufH);
-  WebRtc_FreeBuffer(aec->outFrBufH);
-
-  WebRtc_FreeBuffer(aec->far_buf);
-  WebRtc_FreeBuffer(aec->far_buf_windowed);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  WebRtc_FreeBuffer(aec->far_time_buf);
-  fclose(aec->farFile);
-  fclose(aec->nearFile);
-  fclose(aec->outFile);
-  fclose(aec->outLinearFile);
-#endif
-  WebRtc_FreeDelayEstimator(aec->delay_estimator);
-  WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend);
-
-  free(aec);
-  return 0;
-}
-
 static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) {
   int i;
   for (i = 0; i < aec->num_partitions; i++) {
@@ -573,614 +440,350 @@ static void SubbandCoherence(AecCore* aec,
   }
 }
 
-WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
-WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
-WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
-WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
-WebRtcAec_ComfortNoise_t WebRtcAec_ComfortNoise;
-WebRtcAec_SubbandCoherence_t WebRtcAec_SubbandCoherence;
-
-int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
+static void GetHighbandGain(const float* lambda, float* nlpGainHband) {
   int i;
 
-  aec->sampFreq = sampFreq;
-
-  if (sampFreq == 8000) {
-    aec->normal_mu = 0.6f;
-    aec->normal_error_threshold = 2e-6f;
-  } else {
-    aec->normal_mu = 0.5f;
-    aec->normal_error_threshold = 1.5e-6f;
-  }
-
-  if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) {
-    return -1;
-  }
-
-  if (WebRtc_InitBuffer(aec->outFrBuf) == -1) {
-    return -1;
-  }
-
-  if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) {
-    return -1;
-  }
-
-  if (WebRtc_InitBuffer(aec->outFrBufH) == -1) {
-    return -1;
-  }
-
-  // Initialize far-end buffers.
-  if (WebRtc_InitBuffer(aec->far_buf) == -1) {
-    return -1;
-  }
-  if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) {
-    return -1;
-  }
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  if (WebRtc_InitBuffer(aec->far_time_buf) == -1) {
-    return -1;
-  }
-#endif
-  aec->system_delay = 0;
-
-  if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) {
-    return -1;
-  }
-  if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) {
-    return -1;
+  nlpGainHband[0] = (float)0.0;
+  for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
+    nlpGainHband[0] += lambda[i];
   }
-  aec->delay_logging_enabled = 0;
-  memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
-
-  aec->reported_delay_enabled = 1;
-  aec->extended_filter_enabled = 0;
-  aec->num_partitions = kNormalNumPartitions;
+  nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
+}
 
-  // Update the delay estimator with filter length.  We use half the
-  // |num_partitions| to take the echo path into account.  In practice we say
-  // that the echo has a duration of maximum half |num_partitions|, which is not
-  // true, but serves as a crude measure.
-  WebRtc_set_allowed_offset(aec->delay_estimator, aec->num_partitions / 2);
-  // TODO(bjornv): I currently hard coded the enable.  Once we've established
-  // that AECM has no performance regression, robust_validation will be enabled
-  // all the time and the APIs to turn it on/off will be removed.  Hence, remove
-  // this line then.
-  WebRtc_enable_robust_validation(aec->delay_estimator, 1);
+static void ComfortNoise(AecCore* aec,
+                         float efw[2][PART_LEN1],
+                         complex_t* comfortNoiseHband,
+                         const float* noisePow,
+                         const float* lambda) {
+  int i, num;
+  float rand[PART_LEN];
+  float noise, noiseAvg, tmp, tmpAvg;
+  int16_t randW16[PART_LEN];
+  complex_t u[PART_LEN1];
 
-  // Default target suppression mode.
-  aec->nlp_mode = 1;
+  const float pi2 = 6.28318530717959f;
 
-  // Sampling frequency multiplier
-  // SWB is processed as 160 frame size
-  if (aec->sampFreq == 32000) {
-    aec->mult = (short)aec->sampFreq / 16000;
-  } else {
-    aec->mult = (short)aec->sampFreq / 8000;
+  // Generate a uniform random array on [0 1]
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
+  for (i = 0; i < PART_LEN; i++) {
+    rand[i] = ((float)randW16[i]) / 32768;
   }
 
-  aec->farBufWritePos = 0;
-  aec->farBufReadPos = 0;
-
-  aec->inSamples = 0;
-  aec->outSamples = 0;
-  aec->knownDelay = 0;
-
-  // Initialize buffers
-  memset(aec->dBuf, 0, sizeof(aec->dBuf));
-  memset(aec->eBuf, 0, sizeof(aec->eBuf));
-  // For H band
-  memset(aec->dBufH, 0, sizeof(aec->dBufH));
-
-  memset(aec->xPow, 0, sizeof(aec->xPow));
-  memset(aec->dPow, 0, sizeof(aec->dPow));
-  memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow));
-  aec->noisePow = aec->dInitMinPow;
-  aec->noiseEstCtr = 0;
+  // Reject LF noise
+  u[0][0] = 0;
+  u[0][1] = 0;
+  for (i = 1; i < PART_LEN1; i++) {
+    tmp = pi2 * rand[i - 1];
 
-  // Initial comfort noise power
-  for (i = 0; i < PART_LEN1; i++) {
-    aec->dMinPow[i] = 1.0e6f;
+    noise = sqrtf(noisePow[i]);
+    u[i][0] = noise * cosf(tmp);
+    u[i][1] = -noise * sinf(tmp);
   }
+  u[PART_LEN][1] = 0;
 
-  // Holds the last block written to
-  aec->xfBufBlockPos = 0;
-  // TODO: Investigate need for these initializations. Deleting them doesn't
-  //       change the output at all and yields 0.4% overall speedup.
-  memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
-  memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
-  memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
-  memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
-  memset(
-      aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
-  memset(aec->se, 0, sizeof(float) * PART_LEN1);
-
-  // To prevent numerical instability in the first block.
-  for (i = 0; i < PART_LEN1; i++) {
-    aec->sd[i] = 1;
-  }
   for (i = 0; i < PART_LEN1; i++) {
-    aec->sx[i] = 1;
+    // This is the proper weighting to match the background noise power
+    tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+    // tmp = 1 - lambda[i];
+    efw[0][i] += tmp * u[i][0];
+    efw[1][i] += tmp * u[i][1];
   }
 
-  memset(aec->hNs, 0, sizeof(aec->hNs));
-  memset(aec->outBuf, 0, sizeof(float) * PART_LEN);
+  // For H band comfort noise
+  // TODO: don't compute noise and "tmp" twice. Use the previous results.
+  noiseAvg = 0.0;
+  tmpAvg = 0.0;
+  num = 0;
+  if (aec->sampFreq == 32000 && flagHbandCn == 1) {
 
-  aec->hNlFbMin = 1;
-  aec->hNlFbLocalMin = 1;
-  aec->hNlXdAvgMin = 1;
-  aec->hNlNewMin = 0;
-  aec->hNlMinCtr = 0;
-  aec->overDrive = 2;
-  aec->overDriveSm = 2;
-  aec->delayIdx = 0;
-  aec->stNearState = 0;
-  aec->echoState = 0;
-  aec->divergeState = 0;
+    // average noise scale
+    // average over second half of freq spectrum (i.e., 4->8khz)
+    // TODO: we shouldn't need num. We know how many elements we're summing.
+    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+      num++;
+      noiseAvg += sqrtf(noisePow[i]);
+    }
+    noiseAvg /= (float)num;
 
-  aec->seed = 777;
-  aec->delayEstCtr = 0;
+    // average nlp scale
+    // average over second half of freq spectrum (i.e., 4->8khz)
+    // TODO: we shouldn't need num. We know how many elements we're summing.
+    num = 0;
+    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+      num++;
+      tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+    }
+    tmpAvg /= (float)num;
 
-  // Metrics disabled by default
-  aec->metricsMode = 0;
-  InitMetrics(aec);
+    // Use average noise for H band
+    // TODO: we should probably have a new random vector here.
+    // Reject LF noise
+    u[0][0] = 0;
+    u[0][1] = 0;
+    for (i = 1; i < PART_LEN1; i++) {
+      tmp = pi2 * rand[i - 1];
 
-  // Assembly optimization
-  WebRtcAec_FilterFar = FilterFar;
-  WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
-  WebRtcAec_FilterAdaptation = FilterAdaptation;
-  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
-  WebRtcAec_ComfortNoise = ComfortNoise;
-  WebRtcAec_SubbandCoherence = SubbandCoherence;
+      // Use average noise for H band
+      u[i][0] = noiseAvg * (float)cos(tmp);
+      u[i][1] = -noiseAvg * (float)sin(tmp);
+    }
+    u[PART_LEN][1] = 0;
 
-#if defined(WEBRTC_ARCH_X86_FAMILY)
-  if (WebRtc_GetCPUInfo(kSSE2)) {
-    WebRtcAec_InitAec_SSE2();
+    for (i = 0; i < PART_LEN1; i++) {
+      // Use average NLP weight for H band
+      comfortNoiseHband[i][0] = tmpAvg * u[i][0];
+      comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+    }
   }
-#endif
-
-#if defined(MIPS_FPU_LE)
-  WebRtcAec_InitAec_mips();
-#endif
-
-#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
-  WebRtcAec_InitAec_neon();
-#endif
-
-  aec_rdft_init();
-
-  return 0;
 }
 
-void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) {
-  float fft[PART_LEN2];
-  float xf[2][PART_LEN1];
-
-  // Check if the buffer is full, and in that case flush the oldest data.
-  if (WebRtc_available_write(aec->far_buf) < 1) {
-    WebRtcAec_MoveFarReadPtr(aec, 1);
-  }
-  // Convert far-end partition to the frequency domain without windowing.
-  memcpy(fft, farend, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, xf, 0);
-  WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
+static void InitLevel(PowerLevel* level) {
+  const float kBigFloat = 1E17f;
 
-  // Convert far-end partition to the frequency domain with windowing.
-  memcpy(fft, farend, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, xf, 1);
-  WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
+  level->averagelevel = 0;
+  level->framelevel = 0;
+  level->minlevel = kBigFloat;
+  level->frsum = 0;
+  level->sfrsum = 0;
+  level->frcounter = 0;
+  level->sfrcounter = 0;
 }
 
-int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) {
-  int elements_moved = WebRtc_MoveReadPtr(aec->far_buf_windowed, elements);
-  WebRtc_MoveReadPtr(aec->far_buf, elements);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  WebRtc_MoveReadPtr(aec->far_time_buf, elements);
-#endif
-  aec->system_delay -= elements_moved * PART_LEN;
-  return elements_moved;
+static void InitStats(Stats* stats) {
+  stats->instant = kOffsetLevel;
+  stats->average = kOffsetLevel;
+  stats->max = kOffsetLevel;
+  stats->min = kOffsetLevel * (-1);
+  stats->sum = 0;
+  stats->hisum = 0;
+  stats->himean = kOffsetLevel;
+  stats->counter = 0;
+  stats->hicounter = 0;
 }
 
-void WebRtcAec_ProcessFrame(AecCore* aec,
-                            const float* nearend,
-                            const float* nearendH,
-                            int knownDelay,
-                            float* out,
-                            float* outH) {
-  int out_elements = 0;
-
-  // For each frame the process is as follows:
-  // 1) If the system_delay indicates on being too small for processing a
-  //    frame we stuff the buffer with enough data for 10 ms.
-  // 2) Adjust the buffer to the system delay, by moving the read pointer.
-  // 3) TODO(bjornv): Investigate if we need to add this:
-  //    If we can't move read pointer due to buffer size limitations we
-  //    flush/stuff the buffer.
-  // 4) Process as many partitions as possible.
-  // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN
-  //    samples. Even though we will have data left to process (we work with
-  //    partitions) we consider updating a whole frame, since that's the
-  //    amount of data we input and output in audio_processing.
-  // 6) Update the outputs.
-
-  // TODO(bjornv): Investigate how we should round the delay difference; right
-  // now we know that incoming |knownDelay| is underestimated when it's less
-  // than |aec->knownDelay|. We therefore, round (-32) in that direction. In
-  // the other direction, we don't have this situation, but might flush one
-  // partition too little. This can cause non-causality, which should be
-  // investigated. Maybe, allow for a non-symmetric rounding, like -16.
-  int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
-  int moved_elements = 0;
-
-  // TODO(bjornv): Change the near-end buffer handling to be the same as for
-  // far-end, that is, with a near_pre_buf.
-  // Buffer the near-end frame.
-  WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN);
-  // For H band
-  if (aec->sampFreq == 32000) {
-    WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN);
-  }
-
-  // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
-  // have enough far-end data for that by stuffing the buffer if the
-  // |system_delay| indicates others.
-  if (aec->system_delay < FRAME_LEN) {
-    // We don't have enough data so we rewind 10 ms.
-    WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1));
-  }
-
-  // 2) Compensate for a possible change in the system delay.
-  WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
-  moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
-  aec->knownDelay -= moved_elements * PART_LEN;
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
-#endif
-
-  // 4) Process as many blocks as possible.
-  while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
-    ProcessBlock(aec);
-  }
-
-  // 5) Update system delay with respect to the entire frame.
-  aec->system_delay -= FRAME_LEN;
+static void InitMetrics(AecCore* self) {
+  self->stateCounter = 0;
+  InitLevel(&self->farlevel);
+  InitLevel(&self->nearlevel);
+  InitLevel(&self->linoutlevel);
+  InitLevel(&self->nlpoutlevel);
 
-  // 6) Update output frame.
-  // Stuff the out buffer if we have less than a frame to output.
-  // This should only happen for the first frame.
-  out_elements = (int)WebRtc_available_read(aec->outFrBuf);
-  if (out_elements < FRAME_LEN) {
-    WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN);
-    if (aec->sampFreq == 32000) {
-      WebRtc_MoveReadPtr(aec->outFrBufH, out_elements - FRAME_LEN);
-    }
-  }
-  // Obtain an output frame.
-  WebRtc_ReadBuffer(aec->outFrBuf, NULL, out, FRAME_LEN);
-  // For H band.
-  if (aec->sampFreq == 32000) {
-    WebRtc_ReadBuffer(aec->outFrBufH, NULL, outH, FRAME_LEN);
-  }
+  InitStats(&self->erl);
+  InitStats(&self->erle);
+  InitStats(&self->aNlp);
+  InitStats(&self->rerl);
 }
 
-int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std) {
-  int i = 0;
-  int delay_values = 0;
-  int num_delay_values = 0;
-  int my_median = 0;
-  const int kMsPerBlock = PART_LEN / (self->mult * 8);
-  float l1_norm = 0;
+static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) {
+  // Do the energy calculation in the frequency domain. The FFT is performed on
+  // a segment of PART_LEN2 samples due to overlap, but we only want the energy
+  // of half that data (the last PART_LEN samples). Parseval's relation states
+  // that the energy is preserved according to
+  //
+  // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
+  //                           = ENERGY,
+  //
+  // where N = PART_LEN2. Since we are only interested in calculating the energy
+  // for the last PART_LEN samples we approximate by calculating ENERGY and
+  // divide by 2,
+  //
+  // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
+  //
+  // Since we deal with real valued time domain signals we only store frequency
+  // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
+  // need to add the contribution from the missing part in
+  // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
+  // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
+  // is the values in the for loop below, but multiplication by 2 and division
+  // by 2 cancel.
 
-  assert(self != NULL);
-  assert(median != NULL);
-  assert(std != NULL);
+  // TODO(bjornv): Investigate reusing energy calculations performed at other
+  // places in the code.
+  int k = 1;
+  // Imaginary parts are zero at end points and left out of the calculation.
+  float energy = (in[0][0] * in[0][0]) / 2;
+  energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
 
-  if (self->delay_logging_enabled == 0) {
-    // Logging disabled.
-    return -1;
+  for (k = 1; k < PART_LEN; k++) {
+    energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
   }
+  energy /= PART_LEN2;
 
-  // Get number of delay values since last update.
-  for (i = 0; i < kHistorySizeBlocks; i++) {
-    num_delay_values += self->delay_histogram[i];
-  }
-  if (num_delay_values == 0) {
-    // We have no new delay value data. Even though -1 is a valid estimate, it
-    // will practically never be used since multiples of |kMsPerBlock| will
-    // always be returned.
-    *median = -1;
-    *std = -1;
-    return 0;
-  }
+  level->sfrsum += energy;
+  level->sfrcounter++;
 
-  delay_values = num_delay_values >> 1;  // Start value for median count down.
-  // Get median of delay values since last update.
-  for (i = 0; i < kHistorySizeBlocks; i++) {
-    delay_values -= self->delay_histogram[i];
-    if (delay_values < 0) {
-      my_median = i;
-      break;
+  if (level->sfrcounter > subCountLen) {
+    level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
+    level->sfrsum = 0;
+    level->sfrcounter = 0;
+    if (level->framelevel > 0) {
+      if (level->framelevel < level->minlevel) {
+        level->minlevel = level->framelevel;  // New minimum.
+      } else {
+        level->minlevel *= (1 + 0.001f);  // Small increase.
+      }
+    }
+    level->frcounter++;
+    level->frsum += level->framelevel;
+    if (level->frcounter > countLen) {
+      level->averagelevel = level->frsum / countLen;
+      level->frsum = 0;
+      level->frcounter = 0;
     }
   }
-  // Account for lookahead.
-  *median = (my_median - kLookaheadBlocks) * kMsPerBlock;
-
-  // Calculate the L1 norm, with median value as central moment.
-  for (i = 0; i < kHistorySizeBlocks; i++) {
-    l1_norm += (float)abs(i - my_median) * self->delay_histogram[i];
-  }
-  *std = (int)(l1_norm / (float)num_delay_values + 0.5f) * kMsPerBlock;
-
-  // Reset histogram.
-  memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
-
-  return 0;
 }
 
-int WebRtcAec_echo_state(AecCore* self) { return self->echoState; }
+static void UpdateMetrics(AecCore* aec) {
+  float dtmp, dtmp2;
 
-void WebRtcAec_GetEchoStats(AecCore* self,
-                            Stats* erl,
-                            Stats* erle,
-                            Stats* a_nlp) {
-  assert(erl != NULL);
-  assert(erle != NULL);
-  assert(a_nlp != NULL);
-  *erl = self->erl;
-  *erle = self->erle;
-  *a_nlp = self->aNlp;
-}
+  const float actThresholdNoisy = 8.0f;
+  const float actThresholdClean = 40.0f;
+  const float safety = 0.99995f;
+  const float noisyPower = 300000.0f;
 
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; }
-#endif
+  float actThreshold;
+  float echo, suppressedEcho;
 
-void WebRtcAec_SetConfigCore(AecCore* self,
-                             int nlp_mode,
-                             int metrics_mode,
-                             int delay_logging) {
-  assert(nlp_mode >= 0 && nlp_mode < 3);
-  self->nlp_mode = nlp_mode;
-  self->metricsMode = metrics_mode;
-  if (self->metricsMode) {
-    InitMetrics(self);
-  }
-  self->delay_logging_enabled = delay_logging;
-  if (self->delay_logging_enabled) {
-    memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
+  if (aec->echoState) {  // Check if echo is likely present
+    aec->stateCounter++;
   }
-}
 
-void WebRtcAec_enable_reported_delay(AecCore* self, int enable) {
-  self->reported_delay_enabled = enable;
-}
-
-int WebRtcAec_reported_delay_enabled(AecCore* self) {
-  return self->reported_delay_enabled;
-}
-
-void WebRtcAec_enable_delay_correction(AecCore* self, int enable) {
-  self->extended_filter_enabled = enable;
-  self->num_partitions = enable ? kExtendedNumPartitions : kNormalNumPartitions;
-  // Update the delay estimator with filter length.  See InitAEC() for details.
-  WebRtc_set_allowed_offset(self->delay_estimator, self->num_partitions / 2);
-}
-
-int WebRtcAec_delay_correction_enabled(AecCore* self) {
-  return self->extended_filter_enabled;
-}
-
-int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }
-
-void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {
-  assert(delay >= 0);
-  self->system_delay = delay;
-}
-
-static void ProcessBlock(AecCore* aec) {
-  int i;
-  float y[PART_LEN], e[PART_LEN];
-  float scale;
-
-  float fft[PART_LEN2];
-  float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
-  float df[2][PART_LEN1];
-  float far_spectrum = 0.0f;
-  float near_spectrum = 0.0f;
-  float abs_far_spectrum[PART_LEN1];
-  float abs_near_spectrum[PART_LEN1];
+  if (aec->farlevel.frcounter == 0) {
 
-  const float gPow[2] = {0.9f, 0.1f};
+    if (aec->farlevel.minlevel < noisyPower) {
+      actThreshold = actThresholdClean;
+    } else {
+      actThreshold = actThresholdNoisy;
+    }
 
-  // Noise estimate constants.
-  const int noiseInitBlocks = 500 * aec->mult;
-  const float step = 0.1f;
-  const float ramp = 1.0002f;
-  const float gInitNoise[2] = {0.999f, 0.001f};
+    if ((aec->stateCounter > (0.5f * countLen * subCountLen)) &&
+        (aec->farlevel.sfrcounter == 0)
 
-  float nearend[PART_LEN];
-  float* nearend_ptr = NULL;
-  float output[PART_LEN];
-  float outputH[PART_LEN];
+        // Estimate in active far-end segments only
+        &&
+        (aec->farlevel.averagelevel >
+         (actThreshold * aec->farlevel.minlevel))) {
 
-  float* xf_ptr = NULL;
+      // Subtract noise power
+      echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
 
-  // Concatenate old and new nearend blocks.
-  if (aec->sampFreq == 32000) {
-    WebRtc_ReadBuffer(aec->nearFrBufH, (void**)&nearend_ptr, nearend, PART_LEN);
-    memcpy(aec->dBufH + PART_LEN, nearend_ptr, sizeof(nearend));
-  }
-  WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN);
-  memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend));
+      // ERL
+      dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
+                                   aec->nearlevel.averagelevel +
+                               1e-10f);
+      dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
 
-  // ---------- Ooura fft ----------
+      aec->erl.instant = dtmp;
+      if (dtmp > aec->erl.max) {
+        aec->erl.max = dtmp;
+      }
 
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  {
-    int16_t farend[PART_LEN];
-    int16_t* farend_ptr = NULL;
-    WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1);
-    (void)fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile);
-    (void)fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile);
-  }
-#endif
+      if (dtmp < aec->erl.min) {
+        aec->erl.min = dtmp;
+      }
 
-  // We should always have at least one element stored in |far_buf|.
-  assert(WebRtc_available_read(aec->far_buf) > 0);
-  WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1);
+      aec->erl.counter++;
+      aec->erl.sum += dtmp;
+      aec->erl.average = aec->erl.sum / aec->erl.counter;
 
-  // Near fft
-  memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, df, 0);
+      // Upper mean
+      if (dtmp > aec->erl.average) {
+        aec->erl.hicounter++;
+        aec->erl.hisum += dtmp;
+        aec->erl.himean = aec->erl.hisum / aec->erl.hicounter;
+      }
 
-  // Power smoothing
-  for (i = 0; i < PART_LEN1; i++) {
-    far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
-                   (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
-    aec->xPow[i] =
-        gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum;
-    // Calculate absolute spectra
-    abs_far_spectrum[i] = sqrtf(far_spectrum);
+      // A_NLP
+      dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
+                                   (2 * aec->linoutlevel.averagelevel) +
+                               1e-10f);
 
-    near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
-    aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
-    // Calculate absolute spectra
-    abs_near_spectrum[i] = sqrtf(near_spectrum);
-  }
+      // subtract noise power
+      suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
+                            safety * aec->linoutlevel.minlevel);
 
-  // Estimate noise power. Wait until dPow is more stable.
-  if (aec->noiseEstCtr > 50) {
-    for (i = 0; i < PART_LEN1; i++) {
-      if (aec->dPow[i] < aec->dMinPow[i]) {
-        aec->dMinPow[i] =
-            (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp;
-      } else {
-        aec->dMinPow[i] *= ramp;
-      }
-    }
-  }
+      dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
 
-  // Smooth increasing noise power from zero at the start,
-  // to avoid a sudden burst of comfort noise.
-  if (aec->noiseEstCtr < noiseInitBlocks) {
-    aec->noiseEstCtr++;
-    for (i = 0; i < PART_LEN1; i++) {
-      if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
-        aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
-                              gInitNoise[1] * aec->dMinPow[i];
-      } else {
-        aec->dInitMinPow[i] = aec->dMinPow[i];
+      aec->aNlp.instant = dtmp2;
+      if (dtmp > aec->aNlp.max) {
+        aec->aNlp.max = dtmp;
       }
-    }
-    aec->noisePow = aec->dInitMinPow;
-  } else {
-    aec->noisePow = aec->dMinPow;
-  }
 
-  // Block wise delay estimation used for logging
-  if (aec->delay_logging_enabled) {
-    int delay_estimate = 0;
-    if (WebRtc_AddFarSpectrumFloat(
-            aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) {
-      delay_estimate = WebRtc_DelayEstimatorProcessFloat(
-          aec->delay_estimator, abs_near_spectrum, PART_LEN1);
-      if (delay_estimate >= 0) {
-        // Update delay estimate buffer.
-        aec->delay_histogram[delay_estimate]++;
+      if (dtmp < aec->aNlp.min) {
+        aec->aNlp.min = dtmp;
       }
-    }
-  }
 
-  // Update the xfBuf block position.
-  aec->xfBufBlockPos--;
-  if (aec->xfBufBlockPos == -1) {
-    aec->xfBufBlockPos = aec->num_partitions - 1;
-  }
-
-  // Buffer xf
-  memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1,
-         xf_ptr,
-         sizeof(float) * PART_LEN1);
-  memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1,
-         &xf_ptr[PART_LEN1],
-         sizeof(float) * PART_LEN1);
-
-  memset(yf, 0, sizeof(yf));
+      aec->aNlp.counter++;
+      aec->aNlp.sum += dtmp;
+      aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter;
 
-  // Filter far
-  WebRtcAec_FilterFar(aec, yf);
+      // Upper mean
+      if (dtmp > aec->aNlp.average) {
+        aec->aNlp.hicounter++;
+        aec->aNlp.hisum += dtmp;
+        aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter;
+      }
 
-  // Inverse fft to obtain echo estimate and error.
-  fft[0] = yf[0][0];
-  fft[1] = yf[0][PART_LEN];
-  for (i = 1; i < PART_LEN; i++) {
-    fft[2 * i] = yf[0][i];
-    fft[2 * i + 1] = yf[1][i];
-  }
-  aec_rdft_inverse_128(fft);
+      // ERLE
 
-  scale = 2.0f / PART_LEN2;
-  for (i = 0; i < PART_LEN; i++) {
-    y[i] = fft[PART_LEN + i] * scale;  // fft scaling
-  }
+      // subtract noise power
+      suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel -
+                            safety * aec->nlpoutlevel.minlevel);
 
-  for (i = 0; i < PART_LEN; i++) {
-    e[i] = nearend_ptr[i] - y[i];
-  }
+      dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
+                                   (2 * aec->nlpoutlevel.averagelevel) +
+                               1e-10f);
+      dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
 
-  // Error fft
-  memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
-  memset(fft, 0, sizeof(float) * PART_LEN);
-  memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
-  // TODO(bjornv): Change to use TimeToFrequency().
-  aec_rdft_forward_128(fft);
+      dtmp = dtmp2;
+      aec->erle.instant = dtmp;
+      if (dtmp > aec->erle.max) {
+        aec->erle.max = dtmp;
+      }
 
-  ef[1][0] = 0;
-  ef[1][PART_LEN] = 0;
-  ef[0][0] = fft[0];
-  ef[0][PART_LEN] = fft[1];
-  for (i = 1; i < PART_LEN; i++) {
-    ef[0][i] = fft[2 * i];
-    ef[1][i] = fft[2 * i + 1];
-  }
+      if (dtmp < aec->erle.min) {
+        aec->erle.min = dtmp;
+      }
 
-  if (aec->metricsMode == 1) {
-    // Note that the first PART_LEN samples in fft (before transformation) are
-    // zero. Hence, the scaling by two in UpdateLevel() should not be
-    // performed. That scaling is taken care of in UpdateMetrics() instead.
-    UpdateLevel(&aec->linoutlevel, ef);
-  }
+      aec->erle.counter++;
+      aec->erle.sum += dtmp;
+      aec->erle.average = aec->erle.sum / aec->erle.counter;
 
-  // Scale error signal inversely with far power.
-  WebRtcAec_ScaleErrorSignal(aec, ef);
-  WebRtcAec_FilterAdaptation(aec, fft, ef);
-  NonLinearProcessing(aec, output, outputH);
+      // Upper mean
+      if (dtmp > aec->erle.average) {
+        aec->erle.hicounter++;
+        aec->erle.hisum += dtmp;
+        aec->erle.himean = aec->erle.hisum / aec->erle.hicounter;
+      }
+    }
 
-  if (aec->metricsMode == 1) {
-    // Update power levels and echo metrics
-    UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr);
-    UpdateLevel(&aec->nearlevel, df);
-    UpdateMetrics(aec);
+    aec->stateCounter = 0;
   }
+}
 
-  // Store the output block.
-  WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
-  // For H band
-  if (aec->sampFreq == 32000) {
-    WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN);
-  }
+static void TimeToFrequency(float time_data[PART_LEN2],
+                            float freq_data[2][PART_LEN1],
+                            int window) {
+  int i = 0;
 
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  {
-    int16_t eInt16[PART_LEN];
+  // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
+  if (window) {
     for (i = 0; i < PART_LEN; i++) {
-      eInt16[i] = (int16_t)WEBRTC_SPL_SAT(
-          WEBRTC_SPL_WORD16_MAX, e[i], WEBRTC_SPL_WORD16_MIN);
+      time_data[i] *= WebRtcAec_sqrtHanning[i];
+      time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i];
     }
+  }
 
-    (void)fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile);
-    (void)fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile);
+  aec_rdft_forward_128(time_data);
+  // Reorder.
+  freq_data[1][0] = 0;
+  freq_data[1][PART_LEN] = 0;
+  freq_data[0][0] = time_data[0];
+  freq_data[0][PART_LEN] = time_data[1];
+  for (i = 1; i < PART_LEN; i++) {
+    freq_data[0][i] = time_data[2 * i];
+    freq_data[1][i] = time_data[2 * i + 1];
   }
-#endif
 }
 
 static void NonLinearProcessing(AecCore* aec, float* output, float* outputH) {
@@ -1411,348 +1014,721 @@ static void NonLinearProcessing(AecCore* aec, float* output, float* outputH) {
           sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1);
 }
 
-static void GetHighbandGain(const float* lambda, float* nlpGainHband) {
+static void ProcessBlock(AecCore* aec) {
   int i;
+  float y[PART_LEN], e[PART_LEN];
+  float scale;
 
-  nlpGainHband[0] = (float)0.0;
-  for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
-    nlpGainHband[0] += lambda[i];
-  }
-  nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
-}
+  float fft[PART_LEN2];
+  float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
+  float df[2][PART_LEN1];
+  float far_spectrum = 0.0f;
+  float near_spectrum = 0.0f;
+  float abs_far_spectrum[PART_LEN1];
+  float abs_near_spectrum[PART_LEN1];
 
-static void ComfortNoise(AecCore* aec,
-                         float efw[2][PART_LEN1],
-                         complex_t* comfortNoiseHband,
-                         const float* noisePow,
-                         const float* lambda) {
-  int i, num;
-  float rand[PART_LEN];
-  float noise, noiseAvg, tmp, tmpAvg;
-  int16_t randW16[PART_LEN];
-  complex_t u[PART_LEN1];
+  const float gPow[2] = {0.9f, 0.1f};
 
-  const float pi2 = 6.28318530717959f;
+  // Noise estimate constants.
+  const int noiseInitBlocks = 500 * aec->mult;
+  const float step = 0.1f;
+  const float ramp = 1.0002f;
+  const float gInitNoise[2] = {0.999f, 0.001f};
 
-  // Generate a uniform random array on [0 1]
-  WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
-  for (i = 0; i < PART_LEN; i++) {
-    rand[i] = ((float)randW16[i]) / 32768;
+  float nearend[PART_LEN];
+  float* nearend_ptr = NULL;
+  float output[PART_LEN];
+  float outputH[PART_LEN];
+
+  float* xf_ptr = NULL;
+
+  // Concatenate old and new nearend blocks.
+  if (aec->sampFreq == 32000) {
+    WebRtc_ReadBuffer(aec->nearFrBufH, (void**)&nearend_ptr, nearend, PART_LEN);
+    memcpy(aec->dBufH + PART_LEN, nearend_ptr, sizeof(nearend));
   }
+  WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN);
+  memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend));
 
-  // Reject LF noise
-  u[0][0] = 0;
-  u[0][1] = 0;
-  for (i = 1; i < PART_LEN1; i++) {
-    tmp = pi2 * rand[i - 1];
+  // ---------- Ooura fft ----------
 
-    noise = sqrtf(noisePow[i]);
-    u[i][0] = noise * cosf(tmp);
-    u[i][1] = -noise * sinf(tmp);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  {
+    int16_t farend[PART_LEN];
+    int16_t* farend_ptr = NULL;
+    WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1);
+    (void)fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile);
+    (void)fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile);
   }
-  u[PART_LEN][1] = 0;
+#endif
+
+  // We should always have at least one element stored in |far_buf|.
+  assert(WebRtc_available_read(aec->far_buf) > 0);
+  WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1);
 
+  // Near fft
+  memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
+  TimeToFrequency(fft, df, 0);
+
+  // Power smoothing
   for (i = 0; i < PART_LEN1; i++) {
-    // This is the proper weighting to match the background noise power
-    tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
-    // tmp = 1 - lambda[i];
-    efw[0][i] += tmp * u[i][0];
-    efw[1][i] += tmp * u[i][1];
+    far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
+                   (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
+    aec->xPow[i] =
+        gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum;
+    // Calculate absolute spectra
+    abs_far_spectrum[i] = sqrtf(far_spectrum);
+
+    near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
+    aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
+    // Calculate absolute spectra
+    abs_near_spectrum[i] = sqrtf(near_spectrum);
   }
 
-  // For H band comfort noise
-  // TODO: don't compute noise and "tmp" twice. Use the previous results.
-  noiseAvg = 0.0;
-  tmpAvg = 0.0;
-  num = 0;
-  if (aec->sampFreq == 32000 && flagHbandCn == 1) {
+  // Estimate noise power. Wait until dPow is more stable.
+  if (aec->noiseEstCtr > 50) {
+    for (i = 0; i < PART_LEN1; i++) {
+      if (aec->dPow[i] < aec->dMinPow[i]) {
+        aec->dMinPow[i] =
+            (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp;
+      } else {
+        aec->dMinPow[i] *= ramp;
+      }
+    }
+  }
 
-    // average noise scale
-    // average over second half of freq spectrum (i.e., 4->8khz)
-    // TODO: we shouldn't need num. We know how many elements we're summing.
-    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
-      num++;
-      noiseAvg += sqrtf(noisePow[i]);
+  // Smooth increasing noise power from zero at the start,
+  // to avoid a sudden burst of comfort noise.
+  if (aec->noiseEstCtr < noiseInitBlocks) {
+    aec->noiseEstCtr++;
+    for (i = 0; i < PART_LEN1; i++) {
+      if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
+        aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
+                              gInitNoise[1] * aec->dMinPow[i];
+      } else {
+        aec->dInitMinPow[i] = aec->dMinPow[i];
+      }
     }
-    noiseAvg /= (float)num;
+    aec->noisePow = aec->dInitMinPow;
+  } else {
+    aec->noisePow = aec->dMinPow;
+  }
 
-    // average nlp scale
-    // average over second half of freq spectrum (i.e., 4->8khz)
-    // TODO: we shouldn't need num. We know how many elements we're summing.
-    num = 0;
-    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
-      num++;
-      tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+  // Block wise delay estimation used for logging
+  if (aec->delay_logging_enabled) {
+    int delay_estimate = 0;
+    if (WebRtc_AddFarSpectrumFloat(
+            aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) {
+      delay_estimate = WebRtc_DelayEstimatorProcessFloat(
+          aec->delay_estimator, abs_near_spectrum, PART_LEN1);
+      if (delay_estimate >= 0) {
+        // Update delay estimate buffer.
+        aec->delay_histogram[delay_estimate]++;
+      }
     }
-    tmpAvg /= (float)num;
+  }
 
-    // Use average noise for H band
-    // TODO: we should probably have a new random vector here.
-    // Reject LF noise
-    u[0][0] = 0;
-    u[0][1] = 0;
-    for (i = 1; i < PART_LEN1; i++) {
-      tmp = pi2 * rand[i - 1];
+  // Update the xfBuf block position.
+  aec->xfBufBlockPos--;
+  if (aec->xfBufBlockPos == -1) {
+    aec->xfBufBlockPos = aec->num_partitions - 1;
+  }
 
-      // Use average noise for H band
-      u[i][0] = noiseAvg * (float)cos(tmp);
-      u[i][1] = -noiseAvg * (float)sin(tmp);
-    }
-    u[PART_LEN][1] = 0;
+  // Buffer xf
+  memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1,
+         xf_ptr,
+         sizeof(float) * PART_LEN1);
+  memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1,
+         &xf_ptr[PART_LEN1],
+         sizeof(float) * PART_LEN1);
 
-    for (i = 0; i < PART_LEN1; i++) {
-      // Use average NLP weight for H band
-      comfortNoiseHband[i][0] = tmpAvg * u[i][0];
-      comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+  memset(yf, 0, sizeof(yf));
+
+  // Filter far
+  WebRtcAec_FilterFar(aec, yf);
+
+  // Inverse fft to obtain echo estimate and error.
+  fft[0] = yf[0][0];
+  fft[1] = yf[0][PART_LEN];
+  for (i = 1; i < PART_LEN; i++) {
+    fft[2 * i] = yf[0][i];
+    fft[2 * i + 1] = yf[1][i];
+  }
+  aec_rdft_inverse_128(fft);
+
+  scale = 2.0f / PART_LEN2;
+  for (i = 0; i < PART_LEN; i++) {
+    y[i] = fft[PART_LEN + i] * scale;  // fft scaling
+  }
+
+  for (i = 0; i < PART_LEN; i++) {
+    e[i] = nearend_ptr[i] - y[i];
+  }
+
+  // Error fft
+  memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
+  memset(fft, 0, sizeof(float) * PART_LEN);
+  memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
+  // TODO(bjornv): Change to use TimeToFrequency().
+  aec_rdft_forward_128(fft);
+
+  ef[1][0] = 0;
+  ef[1][PART_LEN] = 0;
+  ef[0][0] = fft[0];
+  ef[0][PART_LEN] = fft[1];
+  for (i = 1; i < PART_LEN; i++) {
+    ef[0][i] = fft[2 * i];
+    ef[1][i] = fft[2 * i + 1];
+  }
+
+  if (aec->metricsMode == 1) {
+    // Note that the first PART_LEN samples in fft (before transformation) are
+    // zero. Hence, the scaling by two in UpdateLevel() should not be
+    // performed. That scaling is taken care of in UpdateMetrics() instead.
+    UpdateLevel(&aec->linoutlevel, ef);
+  }
+
+  // Scale error signal inversely with far power.
+  WebRtcAec_ScaleErrorSignal(aec, ef);
+  WebRtcAec_FilterAdaptation(aec, fft, ef);
+  NonLinearProcessing(aec, output, outputH);
+
+  if (aec->metricsMode == 1) {
+    // Update power levels and echo metrics
+    UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr);
+    UpdateLevel(&aec->nearlevel, df);
+    UpdateMetrics(aec);
+  }
+
+  // Store the output block.
+  WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
+  // For H band
+  if (aec->sampFreq == 32000) {
+    WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN);
+  }
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  {
+    int16_t eInt16[PART_LEN];
+    for (i = 0; i < PART_LEN; i++) {
+      eInt16[i] = (int16_t)WEBRTC_SPL_SAT(
+          WEBRTC_SPL_WORD16_MAX, e[i], WEBRTC_SPL_WORD16_MIN);
     }
+
+    (void)fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile);
+    (void)fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile);
   }
+#endif
 }
 
-static void InitLevel(PowerLevel* level) {
-  const float kBigFloat = 1E17f;
+int WebRtcAec_CreateAec(AecCore** aecInst) {
+  AecCore* aec = malloc(sizeof(AecCore));
+  *aecInst = aec;
+  if (aec == NULL) {
+    return -1;
+  }
 
-  level->averagelevel = 0;
-  level->framelevel = 0;
-  level->minlevel = kBigFloat;
-  level->frsum = 0;
-  level->sfrsum = 0;
-  level->frcounter = 0;
-  level->sfrcounter = 0;
-}
+  aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
+  if (!aec->nearFrBuf) {
+    WebRtcAec_FreeAec(aec);
+    aec = NULL;
+    return -1;
+  }
 
-static void InitStats(Stats* stats) {
-  stats->instant = kOffsetLevel;
-  stats->average = kOffsetLevel;
-  stats->max = kOffsetLevel;
-  stats->min = kOffsetLevel * (-1);
-  stats->sum = 0;
-  stats->hisum = 0;
-  stats->himean = kOffsetLevel;
-  stats->counter = 0;
-  stats->hicounter = 0;
+  aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
+  if (!aec->outFrBuf) {
+    WebRtcAec_FreeAec(aec);
+    aec = NULL;
+    return -1;
+  }
+
+  aec->nearFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
+  if (!aec->nearFrBufH) {
+    WebRtcAec_FreeAec(aec);
+    aec = NULL;
+    return -1;
+  }
+
+  aec->outFrBufH = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float));
+  if (!aec->outFrBufH) {
+    WebRtcAec_FreeAec(aec);
+    aec = NULL;
+    return -1;
+  }
+
+  // Create far-end buffers.
+  aec->far_buf =
+      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
+  if (!aec->far_buf) {
+    WebRtcAec_FreeAec(aec);
+    aec = NULL;
+    return -1;
+  }
+  aec->far_buf_windowed =
+      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1);
+  if (!aec->far_buf_windowed) {
+    WebRtcAec_FreeAec(aec);
+    aec = NULL;
+    return -1;
+  }
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  aec->far_time_buf =
+      WebRtc_CreateBuffer(kBufSizePartitions, sizeof(int16_t) * PART_LEN);
+  if (!aec->far_time_buf) {
+    WebRtcAec_FreeAec(aec);
+    aec = NULL;
+    return -1;
+  }
+  {
+    char filename[64];
+    sprintf(filename, "aec_far%d.pcm", webrtc_aec_instance_count);
+    aec->farFile = fopen(filename, "wb");
+    sprintf(filename, "aec_near%d.pcm", webrtc_aec_instance_count);
+    aec->nearFile = fopen(filename, "wb");
+    sprintf(filename, "aec_out%d.pcm", webrtc_aec_instance_count);
+    aec->outFile = fopen(filename, "wb");
+    sprintf(filename, "aec_out_linear%d.pcm", webrtc_aec_instance_count);
+    aec->outLinearFile = fopen(filename, "wb");
+  }
+#endif
+  aec->delay_estimator_farend =
+      WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks);
+  if (aec->delay_estimator_farend == NULL) {
+    WebRtcAec_FreeAec(aec);
+    aec = NULL;
+    return -1;
+  }
+  aec->delay_estimator = WebRtc_CreateDelayEstimator(
+      aec->delay_estimator_farend, kLookaheadBlocks);
+  if (aec->delay_estimator == NULL) {
+    WebRtcAec_FreeAec(aec);
+    aec = NULL;
+    return -1;
+  }
+
+  // Assembly optimization
+  WebRtcAec_FilterFar = FilterFar;
+  WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
+  WebRtcAec_FilterAdaptation = FilterAdaptation;
+  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
+  WebRtcAec_ComfortNoise = ComfortNoise;
+  WebRtcAec_SubbandCoherence = SubbandCoherence;
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+  if (WebRtc_GetCPUInfo(kSSE2)) {
+    WebRtcAec_InitAec_SSE2();
+  }
+#endif
+
+#if defined(MIPS_FPU_LE)
+  WebRtcAec_InitAec_mips();
+#endif
+
+#if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
+  WebRtcAec_InitAec_neon();
+#endif
+
+  aec_rdft_init();
+
+  return 0;
 }
 
-static void InitMetrics(AecCore* self) {
-  self->stateCounter = 0;
-  InitLevel(&self->farlevel);
-  InitLevel(&self->nearlevel);
-  InitLevel(&self->linoutlevel);
-  InitLevel(&self->nlpoutlevel);
+int WebRtcAec_FreeAec(AecCore* aec) {
+  if (aec == NULL) {
+    return -1;
+  }
 
-  InitStats(&self->erl);
-  InitStats(&self->erle);
-  InitStats(&self->aNlp);
-  InitStats(&self->rerl);
+  WebRtc_FreeBuffer(aec->nearFrBuf);
+  WebRtc_FreeBuffer(aec->outFrBuf);
+
+  WebRtc_FreeBuffer(aec->nearFrBufH);
+  WebRtc_FreeBuffer(aec->outFrBufH);
+
+  WebRtc_FreeBuffer(aec->far_buf);
+  WebRtc_FreeBuffer(aec->far_buf_windowed);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  WebRtc_FreeBuffer(aec->far_time_buf);
+  fclose(aec->farFile);
+  fclose(aec->nearFile);
+  fclose(aec->outFile);
+  fclose(aec->outLinearFile);
+#endif
+  WebRtc_FreeDelayEstimator(aec->delay_estimator);
+  WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend);
+
+  free(aec);
+  return 0;
 }
 
-static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) {
-  // Do the energy calculation in the frequency domain. The FFT is performed on
-  // a segment of PART_LEN2 samples due to overlap, but we only want the energy
-  // of half that data (the last PART_LEN samples). Parseval's relation states
-  // that the energy is preserved according to
-  //
-  // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
-  //                           = ENERGY,
-  //
-  // where N = PART_LEN2. Since we are only interested in calculating the energy
-  // for the last PART_LEN samples we approximate by calculating ENERGY and
-  // divide by 2,
-  //
-  // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
-  //
-  // Since we deal with real valued time domain signals we only store frequency
-  // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
-  // need to add the contribution from the missing part in
-  // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
-  // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
-  // is the values in the for loop below, but multiplication by 2 and division
-  // by 2 cancel.
+int WebRtcAec_InitAec(AecCore* aec, int sampFreq) {
+  int i;
 
-  // TODO(bjornv): Investigate reusing energy calculations performed at other
-  // places in the code.
-  int k = 1;
-  // Imaginary parts are zero at end points and left out of the calculation.
-  float energy = (in[0][0] * in[0][0]) / 2;
-  energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
+  aec->sampFreq = sampFreq;
 
-  for (k = 1; k < PART_LEN; k++) {
-    energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
+  if (sampFreq == 8000) {
+    aec->normal_mu = 0.6f;
+    aec->normal_error_threshold = 2e-6f;
+  } else {
+    aec->normal_mu = 0.5f;
+    aec->normal_error_threshold = 1.5e-6f;
   }
-  energy /= PART_LEN2;
 
-  level->sfrsum += energy;
-  level->sfrcounter++;
+  if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) {
+    return -1;
+  }
 
-  if (level->sfrcounter > subCountLen) {
-    level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
-    level->sfrsum = 0;
-    level->sfrcounter = 0;
-    if (level->framelevel > 0) {
-      if (level->framelevel < level->minlevel) {
-        level->minlevel = level->framelevel;  // New minimum.
-      } else {
-        level->minlevel *= (1 + 0.001f);  // Small increase.
-      }
-    }
-    level->frcounter++;
-    level->frsum += level->framelevel;
-    if (level->frcounter > countLen) {
-      level->averagelevel = level->frsum / countLen;
-      level->frsum = 0;
-      level->frcounter = 0;
-    }
+  if (WebRtc_InitBuffer(aec->outFrBuf) == -1) {
+    return -1;
   }
-}
 
-static void UpdateMetrics(AecCore* aec) {
-  float dtmp, dtmp2;
+  if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) {
+    return -1;
+  }
 
-  const float actThresholdNoisy = 8.0f;
-  const float actThresholdClean = 40.0f;
-  const float safety = 0.99995f;
-  const float noisyPower = 300000.0f;
+  if (WebRtc_InitBuffer(aec->outFrBufH) == -1) {
+    return -1;
+  }
 
-  float actThreshold;
-  float echo, suppressedEcho;
+  // Initialize far-end buffers.
+  if (WebRtc_InitBuffer(aec->far_buf) == -1) {
+    return -1;
+  }
+  if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) {
+    return -1;
+  }
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  if (WebRtc_InitBuffer(aec->far_time_buf) == -1) {
+    return -1;
+  }
+#endif
+  aec->system_delay = 0;
 
-  if (aec->echoState) {  // Check if echo is likely present
-    aec->stateCounter++;
+  if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) {
+    return -1;
   }
+  if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) {
+    return -1;
+  }
+  aec->delay_logging_enabled = 0;
+  memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
 
-  if (aec->farlevel.frcounter == 0) {
+  aec->reported_delay_enabled = 1;
+  aec->extended_filter_enabled = 0;
+  aec->num_partitions = kNormalNumPartitions;
 
-    if (aec->farlevel.minlevel < noisyPower) {
-      actThreshold = actThresholdClean;
-    } else {
-      actThreshold = actThresholdNoisy;
-    }
+  // Update the delay estimator with filter length.  We use half the
+  // |num_partitions| to take the echo path into account.  In practice we say
+  // that the echo has a duration of maximum half |num_partitions|, which is not
+  // true, but serves as a crude measure.
+  WebRtc_set_allowed_offset(aec->delay_estimator, aec->num_partitions / 2);
+  // TODO(bjornv): I currently hard coded the enable.  Once we've established
+  // that AECM has no performance regression, robust_validation will be enabled
+  // all the time and the APIs to turn it on/off will be removed.  Hence, remove
+  // this line then.
+  WebRtc_enable_robust_validation(aec->delay_estimator, 1);
 
-    if ((aec->stateCounter > (0.5f * countLen * subCountLen)) &&
-        (aec->farlevel.sfrcounter == 0)
+  // Default target suppression mode.
+  aec->nlp_mode = 1;
 
-        // Estimate in active far-end segments only
-        &&
-        (aec->farlevel.averagelevel >
-         (actThreshold * aec->farlevel.minlevel))) {
+  // Sampling frequency multiplier
+  // SWB is processed as 160 frame size
+  if (aec->sampFreq == 32000) {
+    aec->mult = (short)aec->sampFreq / 16000;
+  } else {
+    aec->mult = (short)aec->sampFreq / 8000;
+  }
 
-      // Subtract noise power
-      echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
+  aec->farBufWritePos = 0;
+  aec->farBufReadPos = 0;
 
-      // ERL
-      dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
-                                   aec->nearlevel.averagelevel +
-                               1e-10f);
-      dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
+  aec->inSamples = 0;
+  aec->outSamples = 0;
+  aec->knownDelay = 0;
 
-      aec->erl.instant = dtmp;
-      if (dtmp > aec->erl.max) {
-        aec->erl.max = dtmp;
-      }
+  // Initialize buffers
+  memset(aec->dBuf, 0, sizeof(aec->dBuf));
+  memset(aec->eBuf, 0, sizeof(aec->eBuf));
+  // For H band
+  memset(aec->dBufH, 0, sizeof(aec->dBufH));
 
-      if (dtmp < aec->erl.min) {
-        aec->erl.min = dtmp;
-      }
+  memset(aec->xPow, 0, sizeof(aec->xPow));
+  memset(aec->dPow, 0, sizeof(aec->dPow));
+  memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow));
+  aec->noisePow = aec->dInitMinPow;
+  aec->noiseEstCtr = 0;
 
-      aec->erl.counter++;
-      aec->erl.sum += dtmp;
-      aec->erl.average = aec->erl.sum / aec->erl.counter;
+  // Initial comfort noise power
+  for (i = 0; i < PART_LEN1; i++) {
+    aec->dMinPow[i] = 1.0e6f;
+  }
 
-      // Upper mean
-      if (dtmp > aec->erl.average) {
-        aec->erl.hicounter++;
-        aec->erl.hisum += dtmp;
-        aec->erl.himean = aec->erl.hisum / aec->erl.hicounter;
-      }
+  // Holds the last block written to
+  aec->xfBufBlockPos = 0;
+  // TODO: Investigate need for these initializations. Deleting them doesn't
+  //       change the output at all and yields 0.4% overall speedup.
+  memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
+  memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
+  memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
+  memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
+  memset(
+      aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1);
+  memset(aec->se, 0, sizeof(float) * PART_LEN1);
 
-      // A_NLP
-      dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
-                                   (2 * aec->linoutlevel.averagelevel) +
-                               1e-10f);
+  // To prevent numerical instability in the first block.
+  for (i = 0; i < PART_LEN1; i++) {
+    aec->sd[i] = 1;
+  }
+  for (i = 0; i < PART_LEN1; i++) {
+    aec->sx[i] = 1;
+  }
 
-      // subtract noise power
-      suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
-                            safety * aec->linoutlevel.minlevel);
+  memset(aec->hNs, 0, sizeof(aec->hNs));
+  memset(aec->outBuf, 0, sizeof(float) * PART_LEN);
 
-      dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
+  aec->hNlFbMin = 1;
+  aec->hNlFbLocalMin = 1;
+  aec->hNlXdAvgMin = 1;
+  aec->hNlNewMin = 0;
+  aec->hNlMinCtr = 0;
+  aec->overDrive = 2;
+  aec->overDriveSm = 2;
+  aec->delayIdx = 0;
+  aec->stNearState = 0;
+  aec->echoState = 0;
+  aec->divergeState = 0;
 
-      aec->aNlp.instant = dtmp2;
-      if (dtmp > aec->aNlp.max) {
-        aec->aNlp.max = dtmp;
-      }
+  aec->seed = 777;
+  aec->delayEstCtr = 0;
 
-      if (dtmp < aec->aNlp.min) {
-        aec->aNlp.min = dtmp;
-      }
+  // Metrics disabled by default
+  aec->metricsMode = 0;
+  InitMetrics(aec);
 
-      aec->aNlp.counter++;
-      aec->aNlp.sum += dtmp;
-      aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter;
+  return 0;
+}
 
-      // Upper mean
-      if (dtmp > aec->aNlp.average) {
-        aec->aNlp.hicounter++;
-        aec->aNlp.hisum += dtmp;
-        aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter;
-      }
+void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) {
+  float fft[PART_LEN2];
+  float xf[2][PART_LEN1];
 
-      // ERLE
+  // Check if the buffer is full, and in that case flush the oldest data.
+  if (WebRtc_available_write(aec->far_buf) < 1) {
+    WebRtcAec_MoveFarReadPtr(aec, 1);
+  }
+  // Convert far-end partition to the frequency domain without windowing.
+  memcpy(fft, farend, sizeof(float) * PART_LEN2);
+  TimeToFrequency(fft, xf, 0);
+  WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
 
-      // subtract noise power
-      suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel -
-                            safety * aec->nlpoutlevel.minlevel);
+  // Convert far-end partition to the frequency domain with windowing.
+  memcpy(fft, farend, sizeof(float) * PART_LEN2);
+  TimeToFrequency(fft, xf, 1);
+  WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
+}
 
-      dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
-                                   (2 * aec->nlpoutlevel.averagelevel) +
-                               1e-10f);
-      dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
+int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) {
+  int elements_moved = WebRtc_MoveReadPtr(aec->far_buf_windowed, elements);
+  WebRtc_MoveReadPtr(aec->far_buf, elements);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  WebRtc_MoveReadPtr(aec->far_time_buf, elements);
+#endif
+  aec->system_delay -= elements_moved * PART_LEN;
+  return elements_moved;
+}
 
-      dtmp = dtmp2;
-      aec->erle.instant = dtmp;
-      if (dtmp > aec->erle.max) {
-        aec->erle.max = dtmp;
-      }
+void WebRtcAec_ProcessFrame(AecCore* aec,
+                            const float* nearend,
+                            const float* nearendH,
+                            int knownDelay,
+                            float* out,
+                            float* outH) {
+  int out_elements = 0;
 
-      if (dtmp < aec->erle.min) {
-        aec->erle.min = dtmp;
-      }
+  // For each frame the process is as follows:
+  // 1) If the system_delay indicates on being too small for processing a
+  //    frame we stuff the buffer with enough data for 10 ms.
+  // 2) Adjust the buffer to the system delay, by moving the read pointer.
+  // 3) TODO(bjornv): Investigate if we need to add this:
+  //    If we can't move read pointer due to buffer size limitations we
+  //    flush/stuff the buffer.
+  // 4) Process as many partitions as possible.
+  // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN
+  //    samples. Even though we will have data left to process (we work with
+  //    partitions) we consider updating a whole frame, since that's the
+  //    amount of data we input and output in audio_processing.
+  // 6) Update the outputs.
 
-      aec->erle.counter++;
-      aec->erle.sum += dtmp;
-      aec->erle.average = aec->erle.sum / aec->erle.counter;
+  // TODO(bjornv): Investigate how we should round the delay difference; right
+  // now we know that incoming |knownDelay| is underestimated when it's less
+  // than |aec->knownDelay|. We therefore, round (-32) in that direction. In
+  // the other direction, we don't have this situation, but might flush one
+  // partition too little. This can cause non-causality, which should be
+  // investigated. Maybe, allow for a non-symmetric rounding, like -16.
+  int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
+  int moved_elements = 0;
 
-      // Upper mean
-      if (dtmp > aec->erle.average) {
-        aec->erle.hicounter++;
-        aec->erle.hisum += dtmp;
-        aec->erle.himean = aec->erle.hisum / aec->erle.hicounter;
-      }
-    }
+  // TODO(bjornv): Change the near-end buffer handling to be the same as for
+  // far-end, that is, with a near_pre_buf.
+  // Buffer the near-end frame.
+  WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN);
+  // For H band
+  if (aec->sampFreq == 32000) {
+    WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN);
+  }
 
-    aec->stateCounter = 0;
+  // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
+  // have enough far-end data for that by stuffing the buffer if the
+  // |system_delay| indicates others.
+  if (aec->system_delay < FRAME_LEN) {
+    // We don't have enough data so we rewind 10 ms.
+    WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1));
+  }
+
+  // 2) Compensate for a possible change in the system delay.
+  WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
+  moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
+  aec->knownDelay -= moved_elements * PART_LEN;
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
+#endif
+
+  // 4) Process as many blocks as possible.
+  while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
+    ProcessBlock(aec);
+  }
+
+  // 5) Update system delay with respect to the entire frame.
+  aec->system_delay -= FRAME_LEN;
+
+  // 6) Update output frame.
+  // Stuff the out buffer if we have less than a frame to output.
+  // This should only happen for the first frame.
+  out_elements = (int)WebRtc_available_read(aec->outFrBuf);
+  if (out_elements < FRAME_LEN) {
+    WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN);
+    if (aec->sampFreq == 32000) {
+      WebRtc_MoveReadPtr(aec->outFrBufH, out_elements - FRAME_LEN);
+    }
+  }
+  // Obtain an output frame.
+  WebRtc_ReadBuffer(aec->outFrBuf, NULL, out, FRAME_LEN);
+  // For H band.
+  if (aec->sampFreq == 32000) {
+    WebRtc_ReadBuffer(aec->outFrBufH, NULL, outH, FRAME_LEN);
   }
 }
 
-static void TimeToFrequency(float time_data[PART_LEN2],
-                            float freq_data[2][PART_LEN1],
-                            int window) {
+int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std) {
   int i = 0;
+  int delay_values = 0;
+  int num_delay_values = 0;
+  int my_median = 0;
+  const int kMsPerBlock = PART_LEN / (self->mult * 8);
+  float l1_norm = 0;
 
-  // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
-  if (window) {
-    for (i = 0; i < PART_LEN; i++) {
-      time_data[i] *= WebRtcAec_sqrtHanning[i];
-      time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i];
+  assert(self != NULL);
+  assert(median != NULL);
+  assert(std != NULL);
+
+  if (self->delay_logging_enabled == 0) {
+    // Logging disabled.
+    return -1;
+  }
+
+  // Get number of delay values since last update.
+  for (i = 0; i < kHistorySizeBlocks; i++) {
+    num_delay_values += self->delay_histogram[i];
+  }
+  if (num_delay_values == 0) {
+    // We have no new delay value data. Even though -1 is a valid estimate, it
+    // will practically never be used since multiples of |kMsPerBlock| will
+    // always be returned.
+    *median = -1;
+    *std = -1;
+    return 0;
+  }
+
+  delay_values = num_delay_values >> 1;  // Start value for median count down.
+  // Get median of delay values since last update.
+  for (i = 0; i < kHistorySizeBlocks; i++) {
+    delay_values -= self->delay_histogram[i];
+    if (delay_values < 0) {
+      my_median = i;
+      break;
     }
   }
+  // Account for lookahead.
+  *median = (my_median - kLookaheadBlocks) * kMsPerBlock;
 
-  aec_rdft_forward_128(time_data);
-  // Reorder.
-  freq_data[1][0] = 0;
-  freq_data[1][PART_LEN] = 0;
-  freq_data[0][0] = time_data[0];
-  freq_data[0][PART_LEN] = time_data[1];
-  for (i = 1; i < PART_LEN; i++) {
-    freq_data[0][i] = time_data[2 * i];
-    freq_data[1][i] = time_data[2 * i + 1];
+  // Calculate the L1 norm, with median value as central moment.
+  for (i = 0; i < kHistorySizeBlocks; i++) {
+    l1_norm += (float)abs(i - my_median) * self->delay_histogram[i];
+  }
+  *std = (int)(l1_norm / (float)num_delay_values + 0.5f) * kMsPerBlock;
+
+  // Reset histogram.
+  memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
+
+  return 0;
+}
+
+int WebRtcAec_echo_state(AecCore* self) { return self->echoState; }
+
+void WebRtcAec_GetEchoStats(AecCore* self,
+                            Stats* erl,
+                            Stats* erle,
+                            Stats* a_nlp) {
+  assert(erl != NULL);
+  assert(erle != NULL);
+  assert(a_nlp != NULL);
+  *erl = self->erl;
+  *erle = self->erle;
+  *a_nlp = self->aNlp;
+}
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; }
+#endif
+
+void WebRtcAec_SetConfigCore(AecCore* self,
+                             int nlp_mode,
+                             int metrics_mode,
+                             int delay_logging) {
+  assert(nlp_mode >= 0 && nlp_mode < 3);
+  self->nlp_mode = nlp_mode;
+  self->metricsMode = metrics_mode;
+  if (self->metricsMode) {
+    InitMetrics(self);
   }
+  self->delay_logging_enabled = delay_logging;
+  if (self->delay_logging_enabled) {
+    memset(self->delay_histogram, 0, sizeof(self->delay_histogram));
+  }
+}
+
+void WebRtcAec_enable_reported_delay(AecCore* self, int enable) {
+  self->reported_delay_enabled = enable;
+}
+
+int WebRtcAec_reported_delay_enabled(AecCore* self) {
+  return self->reported_delay_enabled;
+}
+
+void WebRtcAec_enable_delay_correction(AecCore* self, int enable) {
+  self->extended_filter_enabled = enable;
+  self->num_partitions = enable ? kExtendedNumPartitions : kNormalNumPartitions;
+  // Update the delay estimator with filter length.  See InitAEC() for details.
+  WebRtc_set_allowed_offset(self->delay_estimator, self->num_partitions / 2);
+}
+
+int WebRtcAec_delay_correction_enabled(AecCore* self) {
+  return self->extended_filter_enabled;
+}
+
+int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; }
+
+void WebRtcAec_SetSystemDelay(AecCore* self, int delay) {
+  assert(delay >= 0);
+  self->system_delay = delay;
 }
+
diff --git a/modules/audio_processing/aec/aec_rdft.c b/modules/audio_processing/aec/aec_rdft.c
index 5b1c2210..51be0347 100644
--- a/modules/audio_processing/aec/aec_rdft.c
+++ b/modules/audio_processing/aec/aec_rdft.c
@@ -26,95 +26,102 @@
 #include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
 #include "webrtc/typedefs.h"
 
-// constants shared by all paths (C, SSE2).
-float rdft_w[64];
-// constants used by the C path.
-float rdft_wk3ri_first[32];
-float rdft_wk3ri_second[32];
-// constants used by SSE2 but initialized in C path.
-ALIGN16_BEG float ALIGN16_END rdft_wk1r[32];
-ALIGN16_BEG float ALIGN16_END rdft_wk2r[32];
-ALIGN16_BEG float ALIGN16_END rdft_wk3r[32];
-ALIGN16_BEG float ALIGN16_END rdft_wk1i[32];
-ALIGN16_BEG float ALIGN16_END rdft_wk2i[32];
-ALIGN16_BEG float ALIGN16_END rdft_wk3i[32];
-ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4];
-
-static int ip[16];
-
-static void bitrv2_32(int* ip, float* a) {
-  const int n = 32;
-  int j, j1, k, k1, m, m2;
-  float xr, xi, yr, yi;
-
-  ip[0] = 0;
-  {
-    int l = n;
-    m = 1;
-    while ((m << 3) < l) {
-      l >>= 1;
-      for (j = 0; j < m; j++) {
-        ip[m + j] = ip[j] + l;
-      }
-      m <<= 1;
-    }
-  }
-  m2 = 2 * m;
-  for (k = 0; k < m; k++) {
-    for (j = 0; j < k; j++) {
-      j1 = 2 * j + ip[k];
-      k1 = 2 * k + ip[j];
-      xr = a[j1];
-      xi = a[j1 + 1];
-      yr = a[k1];
-      yi = a[k1 + 1];
-      a[j1] = yr;
-      a[j1 + 1] = yi;
-      a[k1] = xr;
-      a[k1 + 1] = xi;
-      j1 += m2;
-      k1 += 2 * m2;
-      xr = a[j1];
-      xi = a[j1 + 1];
-      yr = a[k1];
-      yi = a[k1 + 1];
-      a[j1] = yr;
-      a[j1 + 1] = yi;
-      a[k1] = xr;
-      a[k1 + 1] = xi;
-      j1 += m2;
-      k1 -= m2;
-      xr = a[j1];
-      xi = a[j1 + 1];
-      yr = a[k1];
-      yi = a[k1 + 1];
-      a[j1] = yr;
-      a[j1 + 1] = yi;
-      a[k1] = xr;
-      a[k1 + 1] = xi;
-      j1 += m2;
-      k1 += 2 * m2;
-      xr = a[j1];
-      xi = a[j1 + 1];
-      yr = a[k1];
-      yi = a[k1 + 1];
-      a[j1] = yr;
-      a[j1 + 1] = yi;
-      a[k1] = xr;
-      a[k1 + 1] = xi;
-    }
-    j1 = 2 * k + m2 + ip[k];
-    k1 = j1 + m2;
-    xr = a[j1];
-    xi = a[j1 + 1];
-    yr = a[k1];
-    yi = a[k1 + 1];
-    a[j1] = yr;
-    a[j1 + 1] = yi;
-    a[k1] = xr;
-    a[k1 + 1] = xi;
-  }
-}
+// These tables used to be computed at run-time. For example, refer to:
+// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564
+// to see the initialization code.
+const float rdft_w[64] = {
+    1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f,
+    0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f,
+    0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f,
+    0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f,
+    0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
+    0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f,
+    0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f,
+    0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f,
+    0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f,
+    0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
+    0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f,
+    0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f,
+    0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f,
+    0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f,
+    0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
+    0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
+};
+const float rdft_wk3ri_first[16] = {
+    1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
+    0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
+    0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
+    0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
+};
+const float rdft_wk3ri_second[16] = {
+    -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
+    -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
+    -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
+    -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
+    1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f,
+    0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f,
+    0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f,
+    0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f,
+    0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
+    0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f,
+    0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f,
+    0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
+    1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f,
+    0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f,
+    0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f,
+    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
+    0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f,
+    0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f,
+    0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f,
+    0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
+    1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f,
+    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
+    0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f,
+    -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f,
+    0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f,
+    0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f,
+    0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f,
+    -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
+    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
+    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
+    -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f,
+    -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f,
+    -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f,
+    -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f,
+    -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f,
+    -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
+    -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f,
+    -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f,
+    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
+    -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f,
+    -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f,
+    -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f,
+    -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f,
+    -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
+    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
+    -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f,
+    -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f,
+    -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f,
+    -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f,
+    -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f,
+    -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f,
+    -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
+    0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
+};
 
 static void bitrv2_128_C(float* a) {
   /*
@@ -190,97 +197,6 @@ static void bitrv2_128_C(float* a) {
   }
 }
 
-static void makewt_32(void) {
-  const int nw = 32;
-  int j, nwh;
-  float delta, x, y;
-
-  ip[0] = nw;
-  ip[1] = 1;
-  nwh = nw >> 1;
-  delta = atanf(1.0f) / nwh;
-  rdft_w[0] = 1;
-  rdft_w[1] = 0;
-  rdft_w[nwh] = cosf(delta * nwh);
-  rdft_w[nwh + 1] = rdft_w[nwh];
-  for (j = 2; j < nwh; j += 2) {
-    x = cosf(delta * j);
-    y = sinf(delta * j);
-    rdft_w[j] = x;
-    rdft_w[j + 1] = y;
-    rdft_w[nw - j] = y;
-    rdft_w[nw - j + 1] = x;
-  }
-  bitrv2_32(ip + 2, rdft_w);
-
-  // pre-calculate constants used by cft1st_128 and cftmdl_128...
-  cftmdl_wk1r[0] = rdft_w[2];
-  cftmdl_wk1r[1] = rdft_w[2];
-  cftmdl_wk1r[2] = rdft_w[2];
-  cftmdl_wk1r[3] = -rdft_w[2];
-  {
-    int k1;
-
-    for (k1 = 0, j = 0; j < 128; j += 16, k1 += 2) {
-      const int k2 = 2 * k1;
-      const float wk2r = rdft_w[k1 + 0];
-      const float wk2i = rdft_w[k1 + 1];
-      float wk1r, wk1i;
-      // ... scalar version.
-      wk1r = rdft_w[k2 + 0];
-      wk1i = rdft_w[k2 + 1];
-      rdft_wk3ri_first[k1 + 0] = wk1r - 2 * wk2i * wk1i;
-      rdft_wk3ri_first[k1 + 1] = 2 * wk2i * wk1r - wk1i;
-      wk1r = rdft_w[k2 + 2];
-      wk1i = rdft_w[k2 + 3];
-      rdft_wk3ri_second[k1 + 0] = wk1r - 2 * wk2r * wk1i;
-      rdft_wk3ri_second[k1 + 1] = 2 * wk2r * wk1r - wk1i;
-      // ... vector version.
-      rdft_wk1r[k2 + 0] = rdft_w[k2 + 0];
-      rdft_wk1r[k2 + 1] = rdft_w[k2 + 0];
-      rdft_wk1r[k2 + 2] = rdft_w[k2 + 2];
-      rdft_wk1r[k2 + 3] = rdft_w[k2 + 2];
-      rdft_wk2r[k2 + 0] = rdft_w[k1 + 0];
-      rdft_wk2r[k2 + 1] = rdft_w[k1 + 0];
-      rdft_wk2r[k2 + 2] = -rdft_w[k1 + 1];
-      rdft_wk2r[k2 + 3] = -rdft_w[k1 + 1];
-      rdft_wk3r[k2 + 0] = rdft_wk3ri_first[k1 + 0];
-      rdft_wk3r[k2 + 1] = rdft_wk3ri_first[k1 + 0];
-      rdft_wk3r[k2 + 2] = rdft_wk3ri_second[k1 + 0];
-      rdft_wk3r[k2 + 3] = rdft_wk3ri_second[k1 + 0];
-      rdft_wk1i[k2 + 0] = -rdft_w[k2 + 1];
-      rdft_wk1i[k2 + 1] = rdft_w[k2 + 1];
-      rdft_wk1i[k2 + 2] = -rdft_w[k2 + 3];
-      rdft_wk1i[k2 + 3] = rdft_w[k2 + 3];
-      rdft_wk2i[k2 + 0] = -rdft_w[k1 + 1];
-      rdft_wk2i[k2 + 1] = rdft_w[k1 + 1];
-      rdft_wk2i[k2 + 2] = -rdft_w[k1 + 0];
-      rdft_wk2i[k2 + 3] = rdft_w[k1 + 0];
-      rdft_wk3i[k2 + 0] = -rdft_wk3ri_first[k1 + 1];
-      rdft_wk3i[k2 + 1] = rdft_wk3ri_first[k1 + 1];
-      rdft_wk3i[k2 + 2] = -rdft_wk3ri_second[k1 + 1];
-      rdft_wk3i[k2 + 3] = rdft_wk3ri_second[k1 + 1];
-    }
-  }
-}
-
-static void makect_32(void) {
-  float* c = rdft_w + 32;
-  const int nc = 32;
-  int j, nch;
-  float delta;
-
-  ip[1] = nc;
-  nch = nc >> 1;
-  delta = atanf(1.0f) / nch;
-  c[0] = cosf(delta * nch);
-  c[nch] = 0.5f * c[0];
-  for (j = 1; j < nch; j++) {
-    c[j] = 0.5f * cosf(delta * j);
-    c[nc - j] = 0.5f * sinf(delta * j);
-  }
-}
-
 static void cft1st_128_C(float* a) {
   const int n = 128;
   int j, k1, k2;
@@ -666,7 +582,4 @@ void aec_rdft_init(void) {
 #if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
   aec_rdft_init_neon();
 #endif
-  // init library constants.
-  makewt_32();
-  makect_32();
 }
diff --git a/modules/audio_processing/aec/aec_rdft.h b/modules/audio_processing/aec/aec_rdft.h
index 3b339a05..e72c7216 100644
--- a/modules/audio_processing/aec/aec_rdft.h
+++ b/modules/audio_processing/aec/aec_rdft.h
@@ -21,19 +21,19 @@ static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
 static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
 #endif
 
-// constants shared by all paths (C, SSE2).
-extern float rdft_w[64];
-// constants used by the C path.
-extern float rdft_wk3ri_first[32];
-extern float rdft_wk3ri_second[32];
-// constants used by SSE2 but initialized in C path.
-extern ALIGN16_BEG float ALIGN16_END rdft_wk1r[32];
-extern ALIGN16_BEG float ALIGN16_END rdft_wk2r[32];
-extern ALIGN16_BEG float ALIGN16_END rdft_wk3r[32];
-extern ALIGN16_BEG float ALIGN16_END rdft_wk1i[32];
-extern ALIGN16_BEG float ALIGN16_END rdft_wk2i[32];
-extern ALIGN16_BEG float ALIGN16_END rdft_wk3i[32];
-extern ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4];
+// Constants shared by all paths (C, SSE2, NEON).
+extern const float rdft_w[64];
+// Constants used by the C path.
+extern const float rdft_wk3ri_first[16];
+extern const float rdft_wk3ri_second[16];
+// Constants used by SSE2 and NEON but initialized in the C path.
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32];
+extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4];
 
 // code path selection function pointers
 typedef void (*rft_sub_128_t)(float* a);