diff options
Diffstat (limited to 'webrtc/modules/audio_processing/aec')
-rw-r--r-- | webrtc/modules/audio_processing/aec/aec_core.c | 625 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/aec_core_internal.h | 47 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/aec_core_mips.c | 89 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/aec_core_neon.c | 184 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/aec_core_sse2.c | 193 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/echo_cancellation.c | 139 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/echo_cancellation.h (renamed from webrtc/modules/audio_processing/aec/include/echo_cancellation.h) | 58 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/echo_cancellation_internal.h | 2 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc | 2 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/system_delay_unittest.cc | 3 |
10 files changed, 688 insertions, 654 deletions
diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c index f8eed32372..901e0fde0b 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.c +++ b/webrtc/modules/audio_processing/aec/aec_core.c @@ -44,7 +44,6 @@ static const int countLen = 50; static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. // Quantities to control H band scaling for SWB input -static const int flagHbandCn = 1; // flag for adding comfort noise in H band static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band // Initial bin for averaging nlp gain in low band @@ -135,6 +134,9 @@ WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; WebRtcAecComfortNoise WebRtcAec_ComfortNoise; WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; +WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; +WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; +WebRtcAecWindowData WebRtcAec_WindowData; __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { return aRe * bRe - aIm * bIm; @@ -151,40 +153,49 @@ static int CmpFloat(const void* a, const void* b) { return (*da > *db) - (*da < *db); } -static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) { +static void FilterFar( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { int i; - for (i = 0; i < aec->num_partitions; i++) { + for (i = 0; i < num_partitions; i++) { int j; - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions * (PART_LEN1); + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * (PART_LEN1); } for (j = 0; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); + y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); + y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); } } } -static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) { - const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; - const float error_threshold = aec->extended_filter_enabled +static void ScaleErrorSignal(int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { + const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; + const float error_threshold = extended_filter_enabled ? kExtendedErrorThreshold - : aec->normal_error_threshold; + : normal_error_threshold; int i; float abs_ef; for (i = 0; i < (PART_LEN1); i++) { - ef[0][i] /= (aec->xPow[i] + 1e-10f); - ef[1][i] /= (aec->xPow[i] + 1e-10f); + ef[0][i] /= (x_pow[i] + 1e-10f); + ef[1][i] /= (x_pow[i] + 1e-10f); abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); if (abs_ef > error_threshold) { @@ -199,59 +210,40 @@ static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) { } } -// Time-unconstrined filter adaptation. -// TODO(andrew): consider for a low-complexity mode. -// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft, -// float ef[2][PART_LEN1]) { -// int i, j; -// for (i = 0; i < aec->num_partitions; i++) { -// int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); -// int pos; -// // Check for wrap -// if (i + aec->xfBufBlockPos >= aec->num_partitions) { -// xPos -= aec->num_partitions * PART_LEN1; -// } -// -// pos = i * PART_LEN1; -// -// for (j = 0; j < PART_LEN1; j++) { -// aec->wfBuf[0][pos + j] += MulRe(aec->xfBuf[0][xPos + j], -// -aec->xfBuf[1][xPos + j], -// ef[0][j], ef[1][j]); -// aec->wfBuf[1][pos + j] += MulIm(aec->xfBuf[0][xPos + j], -// -aec->xfBuf[1][xPos + j], -// ef[0][j], ef[1][j]); -// } -// } -//} - -static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) { + +static void FilterAdaptation( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { int i, j; - for (i = 0; i < aec->num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); + float fft[PART_LEN2]; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); int pos; // Check for wrap - if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions * PART_LEN1; + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; } pos = i * PART_LEN1; for (j = 0; j < PART_LEN; j++) { - fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j], - -aec->xfBuf[1][xPos + j], - ef[0][j], - ef[1][j]); - fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j], - -aec->xfBuf[1][xPos + j], - ef[0][j], - ef[1][j]); + fft[2 * j] = MulRe(x_fft_buf[0][xPos + j], + -x_fft_buf[1][xPos + j], + e_fft[0][j], + e_fft[1][j]); + fft[2 * j + 1] = MulIm(x_fft_buf[0][xPos + j], + -x_fft_buf[1][xPos + j], + e_fft[0][j], + e_fft[1][j]); } - fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], - -aec->xfBuf[1][xPos + PART_LEN], - ef[0][PART_LEN], - ef[1][PART_LEN]); + fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN], + -x_fft_buf[1][xPos + PART_LEN], + e_fft[0][PART_LEN], + e_fft[1][PART_LEN]); aec_rdft_inverse_128(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); @@ -265,12 +257,12 @@ static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) { } aec_rdft_forward_128(fft); - aec->wfBuf[0][pos] += fft[0]; - aec->wfBuf[0][pos + PART_LEN] += fft[1]; + h_fft_buf[0][pos] += fft[0]; + h_fft_buf[0][pos + PART_LEN] += fft[1]; for (j = 1; j < PART_LEN; j++) { - aec->wfBuf[0][pos + j] += fft[2 * j]; - aec->wfBuf[1][pos + j] += fft[2 * j + 1]; + h_fft_buf[0][pos + j] += fft[2 * j]; + h_fft_buf[1][pos + j] += fft[2 * j + 1]; } } } @@ -334,12 +326,13 @@ const float WebRtcAec_kMinFarendPSD = 15; // - sde : cross-PSD of near-end and residual echo // - sxd : cross-PSD of near-end and far-end // -// In addition to updating the PSDs, also the filter diverge state is determined -// upon actions are taken. +// In addition to updating the PSDs, also the filter diverge state is +// determined. static void SmoothedPSD(AecCore* aec, float efw[2][PART_LEN1], float dfw[2][PART_LEN1], - float xfw[2][PART_LEN1]) { + float xfw[2][PART_LEN1], + int* extreme_filter_divergence) { // Power estimate smoothing coefficients. const float* ptrGCoh = aec->extended_filter_enabled ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] @@ -380,15 +373,12 @@ static void SmoothedPSD(AecCore* aec, seSum += aec->se[i]; } - // Divergent filter safeguard. + // Divergent filter safeguard update. aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; - if (aec->divergeState) - memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); - - // Reset if error is significantly larger than nearend (13 dB). - if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) - memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); + // Signal extreme filter divergence if the error is significantly larger + // than the nearend (13 dB). + *extreme_filter_divergence = (seSum > (19.95f * sdSum)); } // Window time domain data to be used by the fft. @@ -417,32 +407,15 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherence(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, - float* cohxd) { - float dfw[2][PART_LEN1]; + float* cohxd, + int* extreme_filter_divergence) { int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - - SmoothedPSD(aec, efw, dfw, xfw); + SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence); // Subband coherence for (i = 0; i < PART_LEN1; i++) { @@ -458,23 +431,23 @@ static void SubbandCoherence(AecCore* aec, static void GetHighbandGain(const float* lambda, float* nlpGainHband) { int i; - nlpGainHband[0] = (float)0.0; + *nlpGainHband = (float)0.0; for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { - nlpGainHband[0] += lambda[i]; + *nlpGainHband += lambda[i]; } - nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); + *nlpGainHband /= (float)(PART_LEN1 - 1 - freqAvgIc); } static void ComfortNoise(AecCore* aec, float efw[2][PART_LEN1], - complex_t* comfortNoiseHband, + float comfortNoiseHband[2][PART_LEN1], const float* noisePow, const float* lambda) { int i, num; float rand[PART_LEN]; float noise, noiseAvg, tmp, tmpAvg; int16_t randW16[PART_LEN]; - complex_t u[PART_LEN1]; + float u[2][PART_LEN1]; const float pi2 = 6.28318530717959f; @@ -486,22 +459,22 @@ static void ComfortNoise(AecCore* aec, // Reject LF noise u[0][0] = 0; - u[0][1] = 0; + u[1][0] = 0; for (i = 1; i < PART_LEN1; i++) { tmp = pi2 * rand[i - 1]; noise = sqrtf(noisePow[i]); - u[i][0] = noise * cosf(tmp); - u[i][1] = -noise * sinf(tmp); + u[0][i] = noise * cosf(tmp); + u[1][i] = -noise * sinf(tmp); } - u[PART_LEN][1] = 0; + u[1][PART_LEN] = 0; for (i = 0; i < PART_LEN1; i++) { // This is the proper weighting to match the background noise power tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); // tmp = 1 - lambda[i]; - efw[0][i] += tmp * u[i][0]; - efw[1][i] += tmp * u[i][1]; + efw[0][i] += tmp * u[0][i]; + efw[1][i] += tmp * u[1][i]; } // For H band comfort noise @@ -509,7 +482,7 @@ static void ComfortNoise(AecCore* aec, noiseAvg = 0.0; tmpAvg = 0.0; num = 0; - if (aec->num_bands > 1 && flagHbandCn == 1) { + if (aec->num_bands > 1) { // average noise scale // average over second half of freq spectrum (i.e., 4->8khz) @@ -534,21 +507,24 @@ static void ComfortNoise(AecCore* aec, // TODO: we should probably have a new random vector here. // Reject LF noise u[0][0] = 0; - u[0][1] = 0; + u[1][0] = 0; for (i = 1; i < PART_LEN1; i++) { tmp = pi2 * rand[i - 1]; // Use average noise for H band - u[i][0] = noiseAvg * (float)cos(tmp); - u[i][1] = -noiseAvg * (float)sin(tmp); + u[0][i] = noiseAvg * (float)cos(tmp); + u[1][i] = -noiseAvg * (float)sin(tmp); } - u[PART_LEN][1] = 0; + u[1][PART_LEN] = 0; for (i = 0; i < PART_LEN1; i++) { // Use average NLP weight for H band - comfortNoiseHband[i][0] = tmpAvg * u[i][0]; - comfortNoiseHband[i][1] = tmpAvg * u[i][1]; + comfortNoiseHband[0][i] = tmpAvg * u[0][i]; + comfortNoiseHband[1][i] = tmpAvg * u[1][i]; } + } else { + memset(comfortNoiseHband, 0, + 2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0])); } } @@ -837,21 +813,29 @@ static void UpdateDelayMetrics(AecCore* self) { return; } -static void TimeToFrequency(float time_data[PART_LEN2], - float freq_data[2][PART_LEN1], - int window) { - int i = 0; - - // TODO(bjornv): Should we have a different function/wrapper for windowed FFT? - if (window) { - for (i = 0; i < PART_LEN; i++) { - time_data[i] *= WebRtcAec_sqrtHanning[i]; - time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i]; - } +static void ScaledInverseFft(float freq_data[2][PART_LEN1], + float time_data[PART_LEN2], + float scale, + int conjugate) { + int i; + const float normalization = scale / ((float)PART_LEN2); + const float sign = (conjugate ? -1 : 1); + time_data[0] = freq_data[0][0] * normalization; + time_data[1] = freq_data[0][PART_LEN] * normalization; + for (i = 1; i < PART_LEN; i++) { + time_data[2 * i] = freq_data[0][i] * normalization; + time_data[2 * i + 1] = sign * freq_data[1][i] * normalization; } + aec_rdft_inverse_128(time_data); +} + +static void Fft(float time_data[PART_LEN2], + float freq_data[2][PART_LEN1]) { + int i; aec_rdft_forward_128(time_data); - // Reorder. + + // Reorder fft output data. freq_data[1][0] = 0; freq_data[1][PART_LEN] = 0; freq_data[0][0] = time_data[0]; @@ -862,13 +846,6 @@ static void TimeToFrequency(float time_data[PART_LEN2], } } -static int MoveFarReadPtrWithoutSystemDelayUpdate(AecCore* self, int elements) { - WebRtc_MoveReadPtr(self->far_buf_windowed, elements); -#ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_MoveReadPtr(self->far_time_buf, elements); -#endif - return WebRtc_MoveReadPtr(self->far_buf, elements); -} static int SignalBasedDelayCorrection(AecCore* self) { int delay_correction = 0; @@ -909,7 +886,7 @@ static int SignalBasedDelayCorrection(AecCore* self) { const int upper_bound = self->num_partitions * 3 / 4; const int do_correction = delay <= lower_bound || delay > upper_bound; if (do_correction == 1) { - int available_read = (int)WebRtc_available_read(self->far_buf); + int available_read = (int)WebRtc_available_read(self->far_time_buf); // With |shift_offset| we gradually rely on the delay estimates. For // positive delays we reduce the correction by |shift_offset| to lower the // risk of pushing the AEC into a non causal state. For negative delays @@ -942,13 +919,94 @@ static int SignalBasedDelayCorrection(AecCore* self) { return delay_correction; } -static void NonLinearProcessing(AecCore* aec, - float* output, - float* const* outputH) { - float efw[2][PART_LEN1], xfw[2][PART_LEN1]; - complex_t comfortNoiseHband[PART_LEN1]; +static void EchoSubtraction( + AecCore* aec, + int num_partitions, + int x_fft_buf_block_pos, + int metrics_mode, + int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float* const y, + float x_pow[PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + PowerLevel* linout_level, + float echo_subtractor_output[PART_LEN]) { + float s_fft[2][PART_LEN1]; + float e_extended[PART_LEN2]; + float s_extended[PART_LEN2]; + float *s; + float e[PART_LEN]; + float e_fft[2][PART_LEN1]; + int i; + memset(s_fft, 0, sizeof(s_fft)); + + // Conditionally reset the echo subtraction filter if the filter has diverged + // significantly. + if (!aec->extended_filter_enabled && + aec->extreme_filter_divergence) { + memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); + aec->extreme_filter_divergence = 0; + } + + // Produce echo estimate s_fft. + WebRtcAec_FilterFar(num_partitions, + x_fft_buf_block_pos, + x_fft_buf, + h_fft_buf, + s_fft); + + // Compute the time-domain echo estimate s. + ScaledInverseFft(s_fft, s_extended, 2.0f, 0); + s = &s_extended[PART_LEN]; + + // Compute the time-domain echo prediction error. + for (i = 0; i < PART_LEN; ++i) { + e[i] = y[i] - s[i]; + } + + // Compute the frequency domain echo prediction error. + memset(e_extended, 0, sizeof(float) * PART_LEN); + memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN); + Fft(e_extended, e_fft); + + RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file, + &e_fft[0][0], + sizeof(e_fft[0][0]) * PART_LEN1 * 2); + + if (metrics_mode == 1) { + // Note that the first PART_LEN samples in fft (before transformation) are + // zero. Hence, the scaling by two in UpdateLevel() should not be + // performed. That scaling is taken care of in UpdateMetrics() instead. + UpdateLevel(linout_level, e_fft); + } + + // Scale error signal inversely with far power. + WebRtcAec_ScaleErrorSignal(extended_filter_enabled, + normal_mu, + normal_error_threshold, + x_pow, + e_fft); + WebRtcAec_FilterAdaptation(num_partitions, + x_fft_buf_block_pos, + x_fft_buf, + e_fft, + h_fft_buf); + memcpy(echo_subtractor_output, e, sizeof(float) * PART_LEN); +} + + +static void EchoSuppression(AecCore* aec, + float farend[PART_LEN2], + float* echo_subtractor_output, + float* output, + float* const* outputH) { + float efw[2][PART_LEN1]; + float xfw[2][PART_LEN1]; + float dfw[2][PART_LEN1]; + float comfortNoiseHband[2][PART_LEN1]; float fft[PART_LEN2]; - float scale, dtmp; float nlpGainHband; int i; size_t j; @@ -972,27 +1030,51 @@ static void NonLinearProcessing(AecCore* aec, float* xfw_ptr = NULL; - aec->delayEstCtr++; - if (aec->delayEstCtr == delayEstInterval) { - aec->delayEstCtr = 0; - } + // Update eBuf with echo subtractor output. + memcpy(aec->eBuf + PART_LEN, + echo_subtractor_output, + sizeof(float) * PART_LEN); - // initialize comfort noise for H band - memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); - nlpGainHband = (float)0.0; - dtmp = (float)0.0; + // Analysis filter banks for the echo suppressor. + // Windowed near-end ffts. + WindowData(fft, aec->dBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, dfw); + + // Windowed echo suppressor output ffts. + WindowData(fft, aec->eBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, efw); - // We should always have at least one element stored in |far_buf|. - assert(WebRtc_available_read(aec->far_buf_windowed) > 0); // NLP - WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); - // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of - // |xfwBuf|. + // Convert far-end partition to the frequency domain with windowing. + WindowData(fft, farend); + Fft(fft, xfw); + xfw_ptr = &xfw[0][0]; + // Buffer far. memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); - WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd); + aec->delayEstCtr++; + if (aec->delayEstCtr == delayEstInterval) { + aec->delayEstCtr = 0; + aec->delayIdx = WebRtcAec_PartitionDelay(aec); + } + + // Use delayed far. + memcpy(xfw, + aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); + + WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd, + &aec->extreme_filter_divergence); + + // Select the microphone signal as output if the filter is deemed to have + // diverged. + if (aec->divergeState) { + memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); + } hNlXdAvg = 0; for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { @@ -1098,67 +1180,51 @@ static void NonLinearProcessing(AecCore* aec, // scaling only in UpdateMetrics(). UpdateLevel(&aec->nlpoutlevel, efw); } + // Inverse error fft. - fft[0] = efw[0][0]; - fft[1] = efw[0][PART_LEN]; - for (i = 1; i < PART_LEN; i++) { - fft[2 * i] = efw[0][i]; - // Sign change required by Ooura fft. - fft[2 * i + 1] = -efw[1][i]; - } - aec_rdft_inverse_128(fft); + ScaledInverseFft(efw, fft, 2.0f, 1); // Overlap and add to obtain output. - scale = 2.0f / PART_LEN2; for (i = 0; i < PART_LEN; i++) { - fft[i] *= scale; // fft scaling - fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i]; - - fft[PART_LEN + i] *= scale; // fft scaling - aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; + output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] + + aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]); // Saturate output to keep it in the allowed range. output[i] = WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN); + WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN); } + memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0])); // For H band if (aec->num_bands > 1) { - // H band gain // average nlp over low band: average over second half of freq spectrum // (4->8khz) GetHighbandGain(hNl, &nlpGainHband); // Inverse comfort_noise - if (flagHbandCn == 1) { - fft[0] = comfortNoiseHband[0][0]; - fft[1] = comfortNoiseHband[PART_LEN][0]; - for (i = 1; i < PART_LEN; i++) { - fft[2 * i] = comfortNoiseHband[i][0]; - fft[2 * i + 1] = comfortNoiseHband[i][1]; - } - aec_rdft_inverse_128(fft); - scale = 2.0f / PART_LEN2; - } + ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0); // compute gain factor for (j = 0; j < aec->num_bands - 1; ++j) { for (i = 0; i < PART_LEN; i++) { - dtmp = aec->dBufH[j][i]; - dtmp = dtmp * nlpGainHband; // for variable gain + outputH[j][i] = aec->dBufH[j][i] * nlpGainHband; + } + } - // add some comfort noise where Hband is attenuated - if (flagHbandCn == 1 && j == 0) { - fft[i] *= scale; // fft scaling - dtmp += cnScaleHband * fft[i]; - } + // Add some comfort noise where Hband is attenuated. + for (i = 0; i < PART_LEN; i++) { + outputH[0][i] += cnScaleHband * fft[i]; + } - // Saturate output to keep it in the allowed range. + // Saturate output to keep it in the allowed range. + for (j = 0; j < aec->num_bands - 1; ++j) { + for (i = 0; i < PART_LEN; i++) { outputH[j][i] = WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN); + WEBRTC_SPL_WORD16_MAX, outputH[j][i], WEBRTC_SPL_WORD16_MIN); } } + } // Copy the current block to the old position. @@ -1177,11 +1243,9 @@ static void NonLinearProcessing(AecCore* aec, static void ProcessBlock(AecCore* aec) { size_t i; - float y[PART_LEN], e[PART_LEN]; - float scale; float fft[PART_LEN2]; - float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; + float xf[2][PART_LEN1]; float df[2][PART_LEN1]; float far_spectrum = 0.0f; float near_spectrum = 0.0f; @@ -1198,15 +1262,18 @@ static void ProcessBlock(AecCore* aec) { float nearend[PART_LEN]; float* nearend_ptr = NULL; + float farend[PART_LEN2]; + float* farend_ptr = NULL; + float echo_subtractor_output[PART_LEN]; float output[PART_LEN]; float outputH[NUM_HIGH_BANDS_MAX][PART_LEN]; float* outputH_ptr[NUM_HIGH_BANDS_MAX]; + float* xf_ptr = NULL; + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { outputH_ptr[i] = outputH[i]; } - float* xf_ptr = NULL; - // Concatenate old and new nearend blocks. for (i = 0; i < aec->num_bands - 1; ++i) { WebRtc_ReadBuffer(aec->nearFrBufH[i], @@ -1218,25 +1285,28 @@ static void ProcessBlock(AecCore* aec) { WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN); memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend)); - // ---------- Ooura fft ---------- + // We should always have at least one element stored in |far_buf|. + assert(WebRtc_available_read(aec->far_time_buf) > 0); + WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); #ifdef WEBRTC_AEC_DEBUG_DUMP { - float farend[PART_LEN]; - float* farend_ptr = NULL; - WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); - RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, farend_ptr, PART_LEN); + // TODO(minyue): |farend_ptr| starts from buffered samples. This will be + // modified when |aec->far_time_buf| is revised. + RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, &farend_ptr[PART_LEN], PART_LEN); + RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN); } #endif - // We should always have at least one element stored in |far_buf|. - assert(WebRtc_available_read(aec->far_buf) > 0); - WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1); + // Convert far-end signal to the frequency domain. + memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2); + Fft(fft, xf); + xf_ptr = &xf[0][0]; // Near fft memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); - TimeToFrequency(fft, df, 0); + Fft(fft, df); // Power smoothing for (i = 0; i < PART_LEN1; i++) { @@ -1314,60 +1384,25 @@ static void ProcessBlock(AecCore* aec) { &xf_ptr[PART_LEN1], sizeof(float) * PART_LEN1); - memset(yf, 0, sizeof(yf)); - - // Filter far - WebRtcAec_FilterFar(aec, yf); - - // Inverse fft to obtain echo estimate and error. - fft[0] = yf[0][0]; - fft[1] = yf[0][PART_LEN]; - for (i = 1; i < PART_LEN; i++) { - fft[2 * i] = yf[0][i]; - fft[2 * i + 1] = yf[1][i]; - } - aec_rdft_inverse_128(fft); - - scale = 2.0f / PART_LEN2; - for (i = 0; i < PART_LEN; i++) { - y[i] = fft[PART_LEN + i] * scale; // fft scaling - } - - for (i = 0; i < PART_LEN; i++) { - e[i] = nearend_ptr[i] - y[i]; - } - - // Error fft - memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); - memset(fft, 0, sizeof(float) * PART_LEN); - memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); - // TODO(bjornv): Change to use TimeToFrequency(). - aec_rdft_forward_128(fft); - - ef[1][0] = 0; - ef[1][PART_LEN] = 0; - ef[0][0] = fft[0]; - ef[0][PART_LEN] = fft[1]; - for (i = 1; i < PART_LEN; i++) { - ef[0][i] = fft[2 * i]; - ef[1][i] = fft[2 * i + 1]; - } - - RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file, - &ef[0][0], - sizeof(ef[0][0]) * PART_LEN1 * 2); - - if (aec->metricsMode == 1) { - // Note that the first PART_LEN samples in fft (before transformation) are - // zero. Hence, the scaling by two in UpdateLevel() should not be - // performed. That scaling is taken care of in UpdateMetrics() instead. - UpdateLevel(&aec->linoutlevel, ef); - } - - // Scale error signal inversely with far power. - WebRtcAec_ScaleErrorSignal(aec, ef); - WebRtcAec_FilterAdaptation(aec, fft, ef); - NonLinearProcessing(aec, output, outputH_ptr); + // Perform echo subtraction. + EchoSubtraction(aec, + aec->num_partitions, + aec->xfBufBlockPos, + aec->metricsMode, + aec->extended_filter_enabled, + aec->normal_mu, + aec->normal_error_threshold, + aec->xfBuf, + nearend_ptr, + aec->xPow, + aec->wfBuf, + &aec->linoutlevel, + echo_subtractor_output); + + RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN); + + // Perform echo suppression. + EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr); if (aec->metricsMode == 1) { // Update power levels and echo metrics @@ -1383,7 +1418,6 @@ static void ProcessBlock(AecCore* aec) { WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN); } - RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN); RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN); } @@ -1422,26 +1456,20 @@ AecCore* WebRtcAec_CreateAec() { } // Create far-end buffers. - aec->far_buf = - WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); - if (!aec->far_buf) { - WebRtcAec_FreeAec(aec); - return NULL; - } - aec->far_buf_windowed = - WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); - if (!aec->far_buf_windowed) { - WebRtcAec_FreeAec(aec); - return NULL; - } -#ifdef WEBRTC_AEC_DEBUG_DUMP - aec->instance_index = webrtc_aec_instance_count; + // For bit exactness with legacy code, each element in |far_time_buf| is + // supposed to contain |PART_LEN2| samples with an overlap of |PART_LEN| + // samples from the last frame. + // TODO(minyue): reduce |far_time_buf| to non-overlapped |PART_LEN| samples. aec->far_time_buf = - WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN); + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN2); if (!aec->far_time_buf) { WebRtcAec_FreeAec(aec); return NULL; } + +#ifdef WEBRTC_AEC_DEBUG_DUMP + aec->instance_index = webrtc_aec_instance_count; + aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL; aec->debug_dump_count = 0; #endif @@ -1477,6 +1505,10 @@ AecCore* WebRtcAec_CreateAec() { WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; WebRtcAec_ComfortNoise = ComfortNoise; WebRtcAec_SubbandCoherence = SubbandCoherence; + WebRtcAec_StoreAsComplex = StoreAsComplex; + WebRtcAec_PartitionDelay = PartitionDelay; + WebRtcAec_WindowData = WindowData; + #if defined(WEBRTC_ARCH_X86_FAMILY) if (WebRtc_GetCPUInfo(kSSE2)) { @@ -1515,11 +1547,8 @@ void WebRtcAec_FreeAec(AecCore* aec) { WebRtc_FreeBuffer(aec->outFrBufH[i]); } - WebRtc_FreeBuffer(aec->far_buf); - WebRtc_FreeBuffer(aec->far_buf_windowed); -#ifdef WEBRTC_AEC_DEBUG_DUMP WebRtc_FreeBuffer(aec->far_time_buf); -#endif + RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile); RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile); RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile); @@ -1555,10 +1584,9 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { } // Initialize far-end buffers. - WebRtc_InitBuffer(aec->far_buf); - WebRtc_InitBuffer(aec->far_buf_windowed); -#ifdef WEBRTC_AEC_DEBUG_DUMP WebRtc_InitBuffer(aec->far_time_buf); + +#ifdef WEBRTC_AEC_DEBUG_DUMP { int process_rate = sampFreq > 16000 ? 16000 : sampFreq; RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index, @@ -1693,6 +1721,8 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { aec->seed = 777; aec->delayEstCtr = 0; + aec->extreme_filter_divergence = 0; + // Metrics disabled by default aec->metricsMode = 0; InitMetrics(aec); @@ -1700,27 +1730,22 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { return 0; } -void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { - float fft[PART_LEN2]; - float xf[2][PART_LEN1]; +// For bit exactness with a legacy code, |farend| is supposed to contain +// |PART_LEN2| samples with an overlap of |PART_LEN| samples from the last +// frame. +// TODO(minyue): reduce |farend| to non-overlapped |PART_LEN| samples. +void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { // Check if the buffer is full, and in that case flush the oldest data. - if (WebRtc_available_write(aec->far_buf) < 1) { + if (WebRtc_available_write(aec->far_time_buf) < 1) { WebRtcAec_MoveFarReadPtr(aec, 1); } - // Convert far-end partition to the frequency domain without windowing. - memcpy(fft, farend, sizeof(float) * PART_LEN2); - TimeToFrequency(fft, xf, 0); - WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1); - // Convert far-end partition to the frequency domain with windowing. - memcpy(fft, farend, sizeof(float) * PART_LEN2); - TimeToFrequency(fft, xf, 1); - WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1); + WebRtc_WriteBuffer(aec->far_time_buf, farend, 1); } int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) { - int elements_moved = MoveFarReadPtrWithoutSystemDelayUpdate(aec, elements); + int elements_moved = WebRtc_MoveReadPtr(aec->far_time_buf, elements); aec->system_delay -= elements_moved * PART_LEN; return elements_moved; } @@ -1794,14 +1819,14 @@ void WebRtcAec_ProcessFrames(AecCore* aec, // rounding, like -16. int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; int moved_elements = - MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements); + WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); aec->knownDelay -= moved_elements * PART_LEN; } else { // 2 b) Apply signal based delay correction. int move_elements = SignalBasedDelayCorrection(aec); int moved_elements = - MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements); - int far_near_buffer_diff = WebRtc_available_read(aec->far_buf) - + WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); + int far_near_buffer_diff = WebRtc_available_read(aec->far_time_buf) - WebRtc_available_read(aec->nearFrBuf) / PART_LEN; WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements); WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend, @@ -1880,10 +1905,6 @@ void WebRtcAec_GetEchoStats(AecCore* self, *a_nlp = self->aNlp; } -#ifdef WEBRTC_AEC_DEBUG_DUMP -void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; } -#endif - void WebRtcAec_SetConfigCore(AecCore* self, int nlp_mode, int metrics_mode, diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index 2de028379b..3809c82567 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -95,8 +95,8 @@ struct AecCore { int xfBufBlockPos; - RingBuffer* far_buf; - RingBuffer* far_buf_windowed; + RingBuffer* far_time_buf; + int system_delay; // Current system delay buffered in AEC. int mult; // sampling frequency multiple @@ -152,6 +152,10 @@ struct AecCore { // Runtime selection of number of filter partitions. int num_partitions; + // Flag that extreme filter divergence has been detected by the Echo + // Suppressor. + int extreme_filter_divergence; + #ifdef WEBRTC_AEC_DEBUG_DUMP // Sequence number of this AEC instance, so that different instances can // choose different dump file names. @@ -161,7 +165,6 @@ struct AecCore { // each time. int debug_dump_count; - RingBuffer* far_time_buf; rtc_WavWriter* farFile; rtc_WavWriter* nearFile; rtc_WavWriter* outFile; @@ -170,13 +173,25 @@ struct AecCore { #endif }; -typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]); +typedef void (*WebRtcAecFilterFar)( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]); extern WebRtcAecFilterFar WebRtcAec_FilterFar; -typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]); -extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; -typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec, - float* fft, +typedef void (*WebRtcAecScaleErrorSignal)(int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_pow[PART_LEN1], float ef[2][PART_LEN1]); +extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; +typedef void (*WebRtcAecFilterAdaptation)( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]); extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec, float hNl[PART_LEN1], @@ -186,17 +201,29 @@ extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; typedef void (*WebRtcAecComfortNoise)(AecCore* aec, float efw[2][PART_LEN1], - complex_t* comfortNoiseHband, + float comfortNoiseHband[2][PART_LEN1], const float* noisePow, const float* lambda); extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise; typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, - float* cohxd); + float* cohxd, + int* extreme_filter_divergence); extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; +typedef int (*WebRtcAecPartitionDelay)(const AecCore* aec); +extern WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; + +typedef void (*WebRtcAecStoreAsComplex)(const float* data, + float data_complex[2][PART_LEN1]); +extern WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; + +typedef void (*WebRtcAecWindowData)(float* x_windowed, const float* x); +extern WebRtcAecWindowData WebRtcAec_WindowData; + #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/aec/aec_core_mips.c b/webrtc/modules/audio_processing/aec/aec_core_mips.c index bb33087aee..035a4b76af 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_mips.c +++ b/webrtc/modules/audio_processing/aec/aec_core_mips.c @@ -20,13 +20,12 @@ #include "webrtc/modules/audio_processing/aec/aec_core_internal.h" #include "webrtc/modules/audio_processing/aec/aec_rdft.h" -static const int flagHbandCn = 1; // flag for adding comfort noise in H band extern const float WebRtcAec_weightCurve[65]; extern const float WebRtcAec_overDriveCurve[65]; void WebRtcAec_ComfortNoise_mips(AecCore* aec, float efw[2][PART_LEN1], - complex_t* comfortNoiseHband, + float comfortNoiseHband[2][PART_LEN1], const float* noisePow, const float* lambda) { int i, num; @@ -274,7 +273,7 @@ void WebRtcAec_ComfortNoise_mips(AecCore* aec, noiseAvg = 0.0; tmpAvg = 0.0; num = 0; - if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) { + if (aec->num_bands > 1) { for (i = 0; i < PART_LEN; i++) { rand[i] = ((float)randW16[i]) / 32768; } @@ -314,27 +313,35 @@ void WebRtcAec_ComfortNoise_mips(AecCore* aec, for (i = 0; i < PART_LEN1; i++) { // Use average NLP weight for H band - comfortNoiseHband[i][0] = tmpAvg * u[i][0]; - comfortNoiseHband[i][1] = tmpAvg * u[i][1]; + comfortNoiseHband[0][i] = tmpAvg * u[i][0]; + comfortNoiseHband[1][i] = tmpAvg * u[i][1]; } + } else { + memset(comfortNoiseHband, 0, + 2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0])); } } -void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) { +void WebRtcAec_FilterFar_mips( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { int i; - for (i = 0; i < aec->num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions * (PART_LEN1); + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * (PART_LEN1); } - float* yf0 = yf[0]; - float* yf1 = yf[1]; - float* aRe = aec->xfBuf[0] + xPos; - float* aIm = aec->xfBuf[1] + xPos; - float* bRe = aec->wfBuf[0] + pos; - float* bIm = aec->wfBuf[1] + pos; + float* yf0 = y_fft[0]; + float* yf1 = y_fft[1]; + float* aRe = x_fft_buf[0] + xPos; + float* aIm = x_fft_buf[1] + xPos; + float* bRe = h_fft_buf[0] + pos; + float* bIm = h_fft_buf[1] + pos; float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; int len = PART_LEN1 >> 1; @@ -432,23 +439,27 @@ void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) { } } -void WebRtcAec_FilterAdaptation_mips(AecCore* aec, - float* fft, - float ef[2][PART_LEN1]) { +void WebRtcAec_FilterAdaptation_mips( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + float fft[PART_LEN2]; int i; - for (i = 0; i < aec->num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos)*(PART_LEN1); int pos; // Check for wrap - if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions * PART_LEN1; + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; } pos = i * PART_LEN1; - float* aRe = aec->xfBuf[0] + xPos; - float* aIm = aec->xfBuf[1] + xPos; - float* bRe = ef[0]; - float* bIm = ef[1]; + float* aRe = x_fft_buf[0] + xPos; + float* aIm = x_fft_buf[1] + xPos; + float* bRe = e_fft[0]; + float* bIm = e_fft[1]; float* fft_tmp; float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12; @@ -573,8 +584,8 @@ void WebRtcAec_FilterAdaptation_mips(AecCore* aec, ); } aec_rdft_forward_128(fft); - aRe = aec->wfBuf[0] + pos; - aIm = aec->wfBuf[1] + pos; + aRe = h_fft_buf[0] + pos; + aIm = h_fft_buf[1] + pos; __asm __volatile ( ".set push \n\t" ".set noreorder \n\t" @@ -699,15 +710,18 @@ void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec, } } -void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { - const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; - const float error_threshold = aec->extended_filter_enabled +void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { + const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; + const float error_threshold = extended_filter_enabled ? kExtendedErrorThreshold - : aec->normal_error_threshold; + : normal_error_threshold; int len = (PART_LEN1); float* ef0 = ef[0]; float* ef1 = ef[1]; - float* xPow = aec->xPow; float fac1 = 1e-10f; float err_th2 = error_threshold * error_threshold; float f0, f1, f2; @@ -719,7 +733,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { ".set push \n\t" ".set noreorder \n\t" "1: \n\t" - "lwc1 %[f0], 0(%[xPow]) \n\t" + "lwc1 %[f0], 0(%[x_pow]) \n\t" "lwc1 %[f1], 0(%[ef0]) \n\t" "lwc1 %[f2], 0(%[ef1]) \n\t" "add.s %[f0], %[f0], %[fac1] \n\t" @@ -747,7 +761,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { "swc1 %[f1], 0(%[ef0]) \n\t" "swc1 %[f2], 0(%[ef1]) \n\t" "addiu %[len], %[len], -1 \n\t" - "addiu %[xPow], %[xPow], 4 \n\t" + "addiu %[x_pow], %[x_pow], 4 \n\t" "addiu %[ef0], %[ef0], 4 \n\t" "bgtz %[len], 1b \n\t" " addiu %[ef1], %[ef1], 4 \n\t" @@ -756,7 +770,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { #if !defined(MIPS32_R2_LE) [f3] "=&f" (f3), #endif - [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), + [x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), [len] "+r" (len) : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu), [err_th] "f" (error_threshold) @@ -771,4 +785,3 @@ void WebRtcAec_InitAec_mips(void) { WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips; WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; } - diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.c b/webrtc/modules/audio_processing/aec/aec_core_neon.c index 9a677aaa67..7898ab2543 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_neon.c +++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c @@ -34,45 +34,49 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { return aRe * bIm + aIm * bRe; } -static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) { +static void FilterFarNEON( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { int i; - const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { int j; - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= num_partitions) { + if (i + x_fft_buf_block_pos >= num_partitions) { xPos -= num_partitions * PART_LEN1; } // vectorized code (four at once) for (j = 0; j + 3 < PART_LEN1; j += 4) { - const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); - const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); - const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); - const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); - const float32x4_t yf_re = vld1q_f32(&yf[0][j]); - const float32x4_t yf_im = vld1q_f32(&yf[1][j]); - const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); - const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); - const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); - const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); - const float32x4_t g = vaddq_f32(yf_re, e); - const float32x4_t h = vaddq_f32(yf_im, f); - vst1q_f32(&yf[0][j], g); - vst1q_f32(&yf[1][j], h); + const float32x4_t x_fft_buf_re = vld1q_f32(&x_fft_buf[0][xPos + j]); + const float32x4_t x_fft_buf_im = vld1q_f32(&x_fft_buf[1][xPos + j]); + const float32x4_t h_fft_buf_re = vld1q_f32(&h_fft_buf[0][pos + j]); + const float32x4_t h_fft_buf_im = vld1q_f32(&h_fft_buf[1][pos + j]); + const float32x4_t y_fft_re = vld1q_f32(&y_fft[0][j]); + const float32x4_t y_fft_im = vld1q_f32(&y_fft[1][j]); + const float32x4_t a = vmulq_f32(x_fft_buf_re, h_fft_buf_re); + const float32x4_t e = vmlsq_f32(a, x_fft_buf_im, h_fft_buf_im); + const float32x4_t c = vmulq_f32(x_fft_buf_re, h_fft_buf_im); + const float32x4_t f = vmlaq_f32(c, x_fft_buf_im, h_fft_buf_re); + const float32x4_t g = vaddq_f32(y_fft_re, e); + const float32x4_t h = vaddq_f32(y_fft_im, f); + vst1q_f32(&y_fft[0][j], g); + vst1q_f32(&y_fft[1][j], h); } // scalar code for the remaining items. for (; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); + y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); + y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); } } } @@ -122,20 +126,24 @@ static float32x4_t vsqrtq_f32(float32x4_t s) { } #endif // WEBRTC_ARCH_ARM64 -static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { - const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; - const float error_threshold = aec->extended_filter_enabled ? - kExtendedErrorThreshold : aec->normal_error_threshold; +static void ScaleErrorSignalNEON(int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { + const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; + const float error_threshold = extended_filter_enabled ? + kExtendedErrorThreshold : normal_error_threshold; const float32x4_t k1e_10f = vdupq_n_f32(1e-10f); const float32x4_t kMu = vmovq_n_f32(mu); const float32x4_t kThresh = vmovq_n_f32(error_threshold); int i; // vectorized code (four at once) for (i = 0; i + 3 < PART_LEN1; i += 4) { - const float32x4_t xPow = vld1q_f32(&aec->xPow[i]); + const float32x4_t x_pow_local = vld1q_f32(&x_pow[i]); const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]); const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]); - const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f); + const float32x4_t xPowPlus = vaddq_f32(x_pow_local, k1e_10f); float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus); float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus); const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re); @@ -162,8 +170,8 @@ static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { // scalar code for the remaining items. for (; i < PART_LEN1; i++) { float abs_ef; - ef[0][i] /= (aec->xPow[i] + 1e-10f); - ef[1][i] /= (aec->xPow[i] + 1e-10f); + ef[0][i] /= (x_pow[i] + 1e-10f); + ef[1][i] /= (x_pow[i] + 1e-10f); abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); if (abs_ef > error_threshold) { @@ -178,34 +186,37 @@ static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { } } -static void FilterAdaptationNEON(AecCore* aec, - float* fft, - float ef[2][PART_LEN1]) { +static void FilterAdaptationNEON( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + float fft[PART_LEN2]; int i; - const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; int pos = i * PART_LEN1; int j; // Check for wrap - if (i + aec->xfBufBlockPos >= num_partitions) { + if (i + x_fft_buf_block_pos >= num_partitions) { xPos -= num_partitions * PART_LEN1; } // Process the whole array... for (j = 0; j < PART_LEN; j += 4) { - // Load xfBuf and ef. - const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); - const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); - const float32x4_t ef_re = vld1q_f32(&ef[0][j]); - const float32x4_t ef_im = vld1q_f32(&ef[1][j]); - // Calculate the product of conjugate(xfBuf) by ef. + // Load x_fft_buf and e_fft. + const float32x4_t x_fft_buf_re = vld1q_f32(&x_fft_buf[0][xPos + j]); + const float32x4_t x_fft_buf_im = vld1q_f32(&x_fft_buf[1][xPos + j]); + const float32x4_t e_fft_re = vld1q_f32(&e_fft[0][j]); + const float32x4_t e_fft_im = vld1q_f32(&e_fft[1][j]); + // Calculate the product of conjugate(x_fft_buf) by e_fft. // re(conjugate(a) * b) = aRe * bRe + aIm * bIm // im(conjugate(a) * b)= aRe * bIm - aIm * bRe - const float32x4_t a = vmulq_f32(xfBuf_re, ef_re); - const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im); - const float32x4_t c = vmulq_f32(xfBuf_re, ef_im); - const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re); + const float32x4_t a = vmulq_f32(x_fft_buf_re, e_fft_re); + const float32x4_t e = vmlaq_f32(a, x_fft_buf_im, e_fft_im); + const float32x4_t c = vmulq_f32(x_fft_buf_re, e_fft_im); + const float32x4_t f = vmlsq_f32(c, x_fft_buf_im, e_fft_re); // Interleave real and imaginary parts. const float32x4x2_t g_n_h = vzipq_f32(e, f); // Store @@ -213,10 +224,10 @@ static void FilterAdaptationNEON(AecCore* aec, vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]); } // ... and fixup the first imaginary entry. - fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], - -aec->xfBuf[1][xPos + PART_LEN], - ef[0][PART_LEN], - ef[1][PART_LEN]); + fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN], + -x_fft_buf[1][xPos + PART_LEN], + e_fft[0][PART_LEN], + e_fft[1][PART_LEN]); aec_rdft_inverse_128(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); @@ -234,21 +245,21 @@ static void FilterAdaptationNEON(AecCore* aec, aec_rdft_forward_128(fft); { - const float wt1 = aec->wfBuf[1][pos]; - aec->wfBuf[0][pos + PART_LEN] += fft[1]; + const float wt1 = h_fft_buf[1][pos]; + h_fft_buf[0][pos + PART_LEN] += fft[1]; for (j = 0; j < PART_LEN; j += 4) { - float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); - float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); + float32x4_t wtBuf_re = vld1q_f32(&h_fft_buf[0][pos + j]); + float32x4_t wtBuf_im = vld1q_f32(&h_fft_buf[1][pos + j]); const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]); const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]); const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4); wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]); wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]); - vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re); - vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im); + vst1q_f32(&h_fft_buf[0][pos + j], wtBuf_re); + vst1q_f32(&h_fft_buf[1][pos + j], wtBuf_im); } - aec->wfBuf[1][pos] = wt1; + h_fft_buf[1][pos] = wt1; } } } @@ -442,7 +453,7 @@ static void OverdriveAndSuppressNEON(AecCore* aec, } } -static int PartitionDelay(const AecCore* aec) { +static int PartitionDelayNEON(const AecCore* aec) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -499,7 +510,8 @@ static int PartitionDelay(const AecCore* aec) { static void SmoothedPSD(AecCore* aec, float efw[2][PART_LEN1], float dfw[2][PART_LEN1], - float xfw[2][PART_LEN1]) { + float xfw[2][PART_LEN1], + int* extreme_filter_divergence) { // Power estimate smoothing coefficients. const float* ptrGCoh = aec->extended_filter_enabled ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] @@ -615,19 +627,16 @@ static void SmoothedPSD(AecCore* aec, seSum += aec->se[i]; } - // Divergent filter safeguard. + // Divergent filter safeguard update. aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; - if (aec->divergeState) - memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); - - // Reset if error is significantly larger than nearend (13 dB). - if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) - memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); + // Signal extreme filter divergence if the error is significantly larger + // than the nearend (13 dB). + *extreme_filter_divergence = (seSum > (19.95f * sdSum)); } // Window time domain data to be used by the fft. -__inline static void WindowData(float* x_windowed, const float* x) { +static void WindowDataNEON(float* x_windowed, const float* x) { int i; for (i = 0; i < PART_LEN; i += 4) { const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); @@ -648,8 +657,8 @@ __inline static void WindowData(float* x_windowed, const float* x) { } // Puts fft output data into a complex valued array. -__inline static void StoreAsComplex(const float* data, - float data_complex[2][PART_LEN1]) { +static void StoreAsComplexNEON(const float* data, + float data_complex[2][PART_LEN1]) { int i; for (i = 0; i < PART_LEN; i += 4) { const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); @@ -665,32 +674,15 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherenceNEON(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, - float* cohxd) { - float dfw[2][PART_LEN1]; + float* cohxd, + int* extreme_filter_divergence) { int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - - SmoothedPSD(aec, efw, dfw, xfw); + SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence); { const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); @@ -732,5 +724,7 @@ void WebRtcAec_InitAec_neon(void) { WebRtcAec_FilterAdaptation = FilterAdaptationNEON; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; + WebRtcAec_StoreAsComplex = StoreAsComplexNEON; + WebRtcAec_PartitionDelay = PartitionDelayNEON; + WebRtcAec_WindowData = WindowDataNEON; } - diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c index b1bffcbb9f..f897a4c0c7 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c @@ -29,67 +29,76 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { return aRe * bIm + aIm * bRe; } -static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) { +static void FilterFarSSE2( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { + int i; - const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { int j; - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= num_partitions) { + if (i + x_fft_buf_block_pos >= num_partitions) { xPos -= num_partitions * (PART_LEN1); } // vectorized code (four at once) for (j = 0; j + 3 < PART_LEN1; j += 4) { - const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); - const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); - const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); - const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); - const __m128 yf_re = _mm_loadu_ps(&yf[0][j]); - const __m128 yf_im = _mm_loadu_ps(&yf[1][j]); - const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re); - const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im); - const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im); - const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re); + const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]); + const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]); + const __m128 h_fft_buf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]); + const __m128 h_fft_buf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]); + const __m128 y_fft_re = _mm_loadu_ps(&y_fft[0][j]); + const __m128 y_fft_im = _mm_loadu_ps(&y_fft[1][j]); + const __m128 a = _mm_mul_ps(x_fft_buf_re, h_fft_buf_re); + const __m128 b = _mm_mul_ps(x_fft_buf_im, h_fft_buf_im); + const __m128 c = _mm_mul_ps(x_fft_buf_re, h_fft_buf_im); + const __m128 d = _mm_mul_ps(x_fft_buf_im, h_fft_buf_re); const __m128 e = _mm_sub_ps(a, b); const __m128 f = _mm_add_ps(c, d); - const __m128 g = _mm_add_ps(yf_re, e); - const __m128 h = _mm_add_ps(yf_im, f); - _mm_storeu_ps(&yf[0][j], g); - _mm_storeu_ps(&yf[1][j], h); + const __m128 g = _mm_add_ps(y_fft_re, e); + const __m128 h = _mm_add_ps(y_fft_im, f); + _mm_storeu_ps(&y_fft[0][j], g); + _mm_storeu_ps(&y_fft[1][j], h); } // scalar code for the remaining items. for (; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); + y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); + y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); } } } -static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { +static void ScaleErrorSignalSSE2(int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { const __m128 k1e_10f = _mm_set1_ps(1e-10f); - const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu) - : _mm_set1_ps(aec->normal_mu); - const __m128 kThresh = aec->extended_filter_enabled + const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu) + : _mm_set1_ps(normal_mu); + const __m128 kThresh = extended_filter_enabled ? _mm_set1_ps(kExtendedErrorThreshold) - : _mm_set1_ps(aec->normal_error_threshold); + : _mm_set1_ps(normal_error_threshold); int i; // vectorized code (four at once) for (i = 0; i + 3 < PART_LEN1; i += 4) { - const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]); + const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); - const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f); + const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f); __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus); const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re); @@ -116,14 +125,14 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { // scalar code for the remaining items. { const float mu = - aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; - const float error_threshold = aec->extended_filter_enabled + extended_filter_enabled ? kExtendedMu : normal_mu; + const float error_threshold = extended_filter_enabled ? kExtendedErrorThreshold - : aec->normal_error_threshold; + : normal_error_threshold; for (; i < (PART_LEN1); i++) { float abs_ef; - ef[0][i] /= (aec->xPow[i] + 1e-10f); - ef[1][i] /= (aec->xPow[i] + 1e-10f); + ef[0][i] /= (x_pow[i] + 1e-10f); + ef[1][i] /= (x_pow[i] + 1e-10f); abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); if (abs_ef > error_threshold) { @@ -139,33 +148,36 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { } } -static void FilterAdaptationSSE2(AecCore* aec, - float* fft, - float ef[2][PART_LEN1]) { +static void FilterAdaptationSSE2( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + float fft[PART_LEN2]; int i, j; - const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); + int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= num_partitions) { + if (i + x_fft_buf_block_pos >= num_partitions) { xPos -= num_partitions * PART_LEN1; } // Process the whole array... for (j = 0; j < PART_LEN; j += 4) { - // Load xfBuf and ef. - const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); - const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); - const __m128 ef_re = _mm_loadu_ps(&ef[0][j]); - const __m128 ef_im = _mm_loadu_ps(&ef[1][j]); - // Calculate the product of conjugate(xfBuf) by ef. + // Load x_fft_buf and e_fft. + const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]); + const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]); + const __m128 e_fft_re = _mm_loadu_ps(&e_fft[0][j]); + const __m128 e_fft_im = _mm_loadu_ps(&e_fft[1][j]); + // Calculate the product of conjugate(x_fft_buf) by e_fft. // re(conjugate(a) * b) = aRe * bRe + aIm * bIm // im(conjugate(a) * b)= aRe * bIm - aIm * bRe - const __m128 a = _mm_mul_ps(xfBuf_re, ef_re); - const __m128 b = _mm_mul_ps(xfBuf_im, ef_im); - const __m128 c = _mm_mul_ps(xfBuf_re, ef_im); - const __m128 d = _mm_mul_ps(xfBuf_im, ef_re); + const __m128 a = _mm_mul_ps(x_fft_buf_re, e_fft_re); + const __m128 b = _mm_mul_ps(x_fft_buf_im, e_fft_im); + const __m128 c = _mm_mul_ps(x_fft_buf_re, e_fft_im); + const __m128 d = _mm_mul_ps(x_fft_buf_im, e_fft_re); const __m128 e = _mm_add_ps(a, b); const __m128 f = _mm_sub_ps(c, d); // Interleave real and imaginary parts. @@ -176,10 +188,10 @@ static void FilterAdaptationSSE2(AecCore* aec, _mm_storeu_ps(&fft[2 * j + 4], h); } // ... and fixup the first imaginary entry. - fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], - -aec->xfBuf[1][xPos + PART_LEN], - ef[0][PART_LEN], - ef[1][PART_LEN]); + fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN], + -x_fft_buf[1][xPos + PART_LEN], + e_fft[0][PART_LEN], + e_fft[1][PART_LEN]); aec_rdft_inverse_128(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); @@ -197,11 +209,11 @@ static void FilterAdaptationSSE2(AecCore* aec, aec_rdft_forward_128(fft); { - float wt1 = aec->wfBuf[1][pos]; - aec->wfBuf[0][pos + PART_LEN] += fft[1]; + float wt1 = h_fft_buf[1][pos]; + h_fft_buf[0][pos + PART_LEN] += fft[1]; for (j = 0; j < PART_LEN; j += 4) { - __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); - __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); + __m128 wtBuf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]); + __m128 wtBuf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]); const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); const __m128 fft_re = @@ -210,10 +222,10 @@ static void FilterAdaptationSSE2(AecCore* aec, _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1)); wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); - _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re); - _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im); + _mm_storeu_ps(&h_fft_buf[0][pos + j], wtBuf_re); + _mm_storeu_ps(&h_fft_buf[1][pos + j], wtBuf_im); } - aec->wfBuf[1][pos] = wt1; + h_fft_buf[1][pos] = wt1; } } } @@ -427,7 +439,8 @@ __inline static void _mm_add_ps_4x1(__m128 sum, float *dst) { sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1))); _mm_store_ss(dst, sum); } -static int PartitionDelay(const AecCore* aec) { + +static int PartitionDelaySSE2(const AecCore* aec) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -476,7 +489,8 @@ static int PartitionDelay(const AecCore* aec) { static void SmoothedPSD(AecCore* aec, float efw[2][PART_LEN1], float dfw[2][PART_LEN1], - float xfw[2][PART_LEN1]) { + float xfw[2][PART_LEN1], + int* extreme_filter_divergence) { // Power estimate smoothing coefficients. const float* ptrGCoh = aec->extended_filter_enabled ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] @@ -595,19 +609,16 @@ static void SmoothedPSD(AecCore* aec, seSum += aec->se[i]; } - // Divergent filter safeguard. + // Divergent filter safeguard update. aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; - if (aec->divergeState) - memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); - - // Reset if error is significantly larger than nearend (13 dB). - if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) - memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); + // Signal extreme filter divergence if the error is significantly larger + // than the nearend (13 dB). + *extreme_filter_divergence = (seSum > (19.95f * sdSum)); } // Window time domain data to be used by the fft. -__inline static void WindowData(float* x_windowed, const float* x) { +static void WindowDataSSE2(float* x_windowed, const float* x) { int i; for (i = 0; i < PART_LEN; i += 4) { const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]); @@ -627,8 +638,8 @@ __inline static void WindowData(float* x_windowed, const float* x) { } // Puts fft output data into a complex valued array. -__inline static void StoreAsComplex(const float* data, - float data_complex[2][PART_LEN1]) { +static void StoreAsComplexSSE2(const float* data, + float data_complex[2][PART_LEN1]) { int i; for (i = 0; i < PART_LEN; i += 4) { const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]); @@ -649,32 +660,15 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherenceSSE2(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, - float* cohxd) { - float dfw[2][PART_LEN1]; + float* cohxd, + int* extreme_filter_divergence) { int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - - SmoothedPSD(aec, efw, dfw, xfw); + SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence); { const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f); @@ -728,4 +722,7 @@ void WebRtcAec_InitAec_SSE2(void) { WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; + WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; + WebRtcAec_PartitionDelay = PartitionDelaySSE2; + WebRtcAec_WindowData = WindowDataSSE2; } diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation.c b/webrtc/modules/audio_processing/aec/echo_cancellation.c index 0f5cd31ddb..aab1718b24 100644 --- a/webrtc/modules/audio_processing/aec/echo_cancellation.c +++ b/webrtc/modules/audio_processing/aec/echo_cancellation.c @@ -11,7 +11,7 @@ /* * Contains the API functions for the AEC. */ -#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" +#include "webrtc/modules/audio_processing/aec/echo_cancellation.h" #include <math.h> #ifdef WEBRTC_AEC_DEBUG_DUMP @@ -146,7 +146,6 @@ void* WebRtcAec_Create() { } aecpc->initFlag = 0; - aecpc->lastError = 0; #ifdef WEBRTC_AEC_DEBUG_DUMP { @@ -192,26 +191,22 @@ int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) { sampFreq != 16000 && sampFreq != 32000 && sampFreq != 48000) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } aecpc->sampFreq = sampFreq; if (scSampFreq < 1 || scSampFreq > 96000) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } aecpc->scSampFreq = scSampFreq; // Initialize echo canceller core if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; + return AEC_UNSPECIFIED_ERROR; } if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; + return AEC_UNSPECIFIED_ERROR; } WebRtc_InitBuffer(aecpc->far_pre_buf); @@ -261,13 +256,32 @@ int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) { aecConfig.delay_logging = kAecFalse; if (WebRtcAec_set_config(aecpc, aecConfig) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; + return AEC_UNSPECIFIED_ERROR; } return 0; } +// Returns any error that is caused when buffering the +// far-end signal. +int32_t WebRtcAec_GetBufferFarendError(void* aecInst, + const float* farend, + size_t nrOfSamples) { + Aec* aecpc = aecInst; + + if (!farend) + return AEC_NULL_POINTER_ERROR; + + if (aecpc->initFlag != initCheck) + return AEC_UNINITIALIZED_ERROR; + + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) + return AEC_BAD_PARAMETER_ERROR; + + return 0; +} + // only buffer L band for farend int32_t WebRtcAec_BufferFarend(void* aecInst, const float* farend, @@ -277,21 +291,13 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, float new_farend[MAX_RESAMP_LEN]; const float* farend_ptr = farend; - if (farend == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } + // Get any error caused by buffering the farend signal. + int32_t error_code = WebRtcAec_GetBufferFarendError(aecInst, farend, + nrOfSamples); - if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; - } + if (error_code != 0) + return error_code; - // number of samples == 160 for SWB input - if (nrOfSamples != 80 && nrOfSamples != 160) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { // Resample and get a new number of samples @@ -311,7 +317,8 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, // Write the time-domain data to |far_pre_buf|. WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples); - // Transform to frequency domain if we have enough data. + // TODO(minyue): reduce to |PART_LEN| samples for each buffering, when + // WebRtcAec_BufferFarendPartition() is changed to take |PART_LEN| samples. while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) { // We have enough data to pass to the FFT, hence read PART_LEN2 samples. { @@ -319,10 +326,6 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, float tmp[PART_LEN2]; WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2); WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp); -#ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_WriteBuffer( - WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1); -#endif } // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing. @@ -343,29 +346,24 @@ int32_t WebRtcAec_Process(void* aecInst, int32_t retVal = 0; if (out == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; + return AEC_NULL_POINTER_ERROR; } if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; + return AEC_UNINITIALIZED_ERROR; } // number of samples == 160 for SWB input if (nrOfSamples != 80 && nrOfSamples != 160) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } if (msInSndCardBuf < 0) { msInSndCardBuf = 0; - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; - retVal = -1; + retVal = AEC_BAD_PARAMETER_WARNING; } else if (msInSndCardBuf > kMaxTrustedDelayMs) { // The clamping is now done in ProcessExtended/Normal(). - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; - retVal = -1; + retVal = AEC_BAD_PARAMETER_WARNING; } // This returns the value of aec->extended_filter_enabled. @@ -378,15 +376,13 @@ int32_t WebRtcAec_Process(void* aecInst, msInSndCardBuf, skew); } else { - if (ProcessNormal(aecpc, - nearend, - num_bands, - out, - nrOfSamples, - msInSndCardBuf, - skew) != 0) { - retVal = -1; - } + retVal = ProcessNormal(aecpc, + nearend, + num_bands, + out, + nrOfSamples, + msInSndCardBuf, + skew); } #ifdef WEBRTC_AEC_DEBUG_DUMP @@ -405,31 +401,26 @@ int32_t WebRtcAec_Process(void* aecInst, int WebRtcAec_set_config(void* handle, AecConfig config) { Aec* self = (Aec*)handle; if (self->initFlag != initCheck) { - self->lastError = AEC_UNINITIALIZED_ERROR; - return -1; + return AEC_UNINITIALIZED_ERROR; } if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) { - self->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } self->skewMode = config.skewMode; if (config.nlpMode != kAecNlpConservative && config.nlpMode != kAecNlpModerate && config.nlpMode != kAecNlpAggressive) { - self->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) { - self->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) { - self->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } WebRtcAec_SetConfigCore( @@ -440,12 +431,10 @@ int WebRtcAec_set_config(void* handle, AecConfig config) { int WebRtcAec_get_echo_status(void* handle, int* status) { Aec* self = (Aec*)handle; if (status == NULL) { - self->lastError = AEC_NULL_POINTER_ERROR; - return -1; + return AEC_NULL_POINTER_ERROR; } if (self->initFlag != initCheck) { - self->lastError = AEC_UNINITIALIZED_ERROR; - return -1; + return AEC_UNINITIALIZED_ERROR; } *status = WebRtcAec_echo_state(self->aec); @@ -466,12 +455,10 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) { return -1; } if (metrics == NULL) { - self->lastError = AEC_NULL_POINTER_ERROR; - return -1; + return AEC_NULL_POINTER_ERROR; } if (self->initFlag != initCheck) { - self->lastError = AEC_UNINITIALIZED_ERROR; - return -1; + return AEC_UNINITIALIZED_ERROR; } WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp); @@ -556,32 +543,24 @@ int WebRtcAec_GetDelayMetrics(void* handle, float* fraction_poor_delays) { Aec* self = handle; if (median == NULL) { - self->lastError = AEC_NULL_POINTER_ERROR; - return -1; + return AEC_NULL_POINTER_ERROR; } if (std == NULL) { - self->lastError = AEC_NULL_POINTER_ERROR; - return -1; + return AEC_NULL_POINTER_ERROR; } if (self->initFlag != initCheck) { - self->lastError = AEC_UNINITIALIZED_ERROR; - return -1; + return AEC_UNINITIALIZED_ERROR; } if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std, fraction_poor_delays) == -1) { // Logging disabled. - self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR; - return -1; + return AEC_UNSUPPORTED_FUNCTION_ERROR; } return 0; } -int32_t WebRtcAec_get_error_code(void* aecInst) { - Aec* aecpc = aecInst; - return aecpc->lastError; -} AecCore* WebRtcAec_aec_core(void* handle) { if (!handle) { @@ -617,7 +596,7 @@ static int ProcessNormal(Aec* aecpc, retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew); if (retVal == -1) { aecpc->skew = 0; - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + retVal = AEC_BAD_PARAMETER_WARNING; } aecpc->skew /= aecpc->sampFactor * nrOfSamples; diff --git a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h b/webrtc/modules/audio_processing/aec/echo_cancellation.h index a340cf84d0..de84b2e6d1 100644 --- a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h +++ b/webrtc/modules/audio_processing/aec/echo_cancellation.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_ #include <stddef.h> @@ -109,13 +109,32 @@ int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq); * Outputs Description * ------------------------------------------------------------------- * int32_t return 0: OK - * -1: error + * 12000-12050: error code */ int32_t WebRtcAec_BufferFarend(void* aecInst, const float* farend, size_t nrOfSamples); /* + * Reports any errors that would arise if buffering a farend buffer + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * const float* farend In buffer containing one frame of + * farend signal for L band + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 12000-12050: error code + */ +int32_t WebRtcAec_GetBufferFarendError(void* aecInst, + const float* farend, + size_t nrOfSamples); + +/* * Runs the echo canceller on an 80 or 160 sample blocks of data. * * Inputs Description @@ -136,7 +155,7 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, * float* const* out Out buffer, one frame of processed nearend * for each band * int32_t return 0: OK - * -1: error + * 12000-12050: error code */ int32_t WebRtcAec_Process(void* aecInst, const float* const* nearend, @@ -157,8 +176,8 @@ int32_t WebRtcAec_Process(void* aecInst, * * Outputs Description * ------------------------------------------------------------------- - * int return 0: OK - * -1: error + * int return 0: OK + * 12000-12050: error code */ int WebRtcAec_set_config(void* handle, AecConfig config); @@ -173,8 +192,8 @@ int WebRtcAec_set_config(void* handle, AecConfig config); * ------------------------------------------------------------------- * int* status 0: Almost certainly nearend single-talk * 1: Might not be neared single-talk - * int return 0: OK - * -1: error + * int return 0: OK + * 12000-12050: error code */ int WebRtcAec_get_echo_status(void* handle, int* status); @@ -189,8 +208,8 @@ int WebRtcAec_get_echo_status(void* handle, int* status); * ------------------------------------------------------------------- * AecMetrics* metrics Struct which will be filled out with the * current echo metrics. - * int return 0: OK - * -1: error + * int return 0: OK + * 12000-12050: error code */ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics); @@ -208,27 +227,14 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics); * float* fraction_poor_delays Fraction of the delay estimates that may * cause the AEC to perform poorly. * - * int return 0: OK - * -1: error + * int return 0: OK + * 12000-12050: error code */ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std, float* fraction_poor_delays); -/* - * Gets the last error code. - * - * Inputs Description - * ------------------------------------------------------------------- - * void* aecInst Pointer to the AEC instance - * - * Outputs Description - * ------------------------------------------------------------------- - * int32_t return 11000-11100: error code - */ -int32_t WebRtcAec_get_error_code(void* aecInst); - // Returns a pointer to the low level AEC handle. // // Input: @@ -242,4 +248,4 @@ struct AecCore* WebRtcAec_aec_core(void* handle); #ifdef __cplusplus } #endif -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_ diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h b/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h index 95a6cf3324..e87219f33d 100644 --- a/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h +++ b/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h @@ -57,8 +57,6 @@ typedef struct { RingBuffer* far_pre_buf; // Time domain far-end pre-buffer. - int lastError; - int farend_started; AecCore* aec; diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc b/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc index 315ac3e9f9..42db082ff9 100644 --- a/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc +++ b/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc @@ -10,7 +10,7 @@ // TODO(bjornv): Make this a comprehensive test. -#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" +#include "webrtc/modules/audio_processing/aec/echo_cancellation.h" #include <stdlib.h> #include <time.h> diff --git a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc index 07e3cf8add..567118d828 100644 --- a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc +++ b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc @@ -13,8 +13,7 @@ extern "C" { #include "webrtc/modules/audio_processing/aec/aec_core.h" } #include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h" -#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" -#include "webrtc/test/testsupport/gtest_disable.h" +#include "webrtc/modules/audio_processing/aec/echo_cancellation.h" #include "webrtc/typedefs.h" namespace { |