diff options
author | minyue <minyue@webrtc.org> | 2015-12-18 15:31:14 -0800 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-12-18 23:31:19 +0000 |
commit | 92594a30ce02aed75f8a2a9f21e5b8c5c4e5f199 (patch) | |
tree | 1ea821ec482c80f290c1afe5666b9792fae0f7b5 /webrtc/modules/audio_processing | |
parent | 4ff818e61467f716340a43d30a970bcb5ec31ebf (diff) | |
download | webrtc-92594a30ce02aed75f8a2a9f21e5b8c5c4e5f199.tar.gz |
Moving FFT on farend signal to where it is used in AEC (bit exact).
Currently, FFT is performance when AEC buffers farend signal. This has some drawbacks
1. memory inefficiency: two ring buffers are needed;
2. computation inefficiency: if ringbuffer gets wrapped around, some FFT computation will be wasted;
3. accessibility: the main AEC function looses accessibility to the time-domain signal.
Therefore, this CL tries to buffer time domain data, which is buffered any way if a debugging macro is defined, and calculate the FFTs where they are actually used.
BUG=
Review URL: https://codereview.webrtc.org/1512573003
Cr-Commit-Position: refs/heads/master@{#11091}
Diffstat (limited to 'webrtc/modules/audio_processing')
-rw-r--r-- | webrtc/modules/audio_processing/aec/aec_core.c | 110 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/aec_core_internal.h | 5 | ||||
-rw-r--r-- | webrtc/modules/audio_processing/aec/echo_cancellation.c | 7 |
3 files changed, 49 insertions, 73 deletions
diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c index 468532644b..901e0fde0b 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.c +++ b/webrtc/modules/audio_processing/aec/aec_core.c @@ -846,13 +846,6 @@ static void Fft(float time_data[PART_LEN2], } } -static int MoveFarReadPtrWithoutSystemDelayUpdate(AecCore* self, int elements) { - WebRtc_MoveReadPtr(self->far_buf_windowed, elements); -#ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_MoveReadPtr(self->far_time_buf, elements); -#endif - return WebRtc_MoveReadPtr(self->far_buf, elements); -} static int SignalBasedDelayCorrection(AecCore* self) { int delay_correction = 0; @@ -893,7 +886,7 @@ static int SignalBasedDelayCorrection(AecCore* self) { const int upper_bound = self->num_partitions * 3 / 4; const int do_correction = delay <= lower_bound || delay > upper_bound; if (do_correction == 1) { - int available_read = (int)WebRtc_available_read(self->far_buf); + int available_read = (int)WebRtc_available_read(self->far_time_buf); // With |shift_offset| we gradually rely on the delay estimates. For // positive delays we reduce the correction by |shift_offset| to lower the // risk of pushing the AEC into a non causal state. For negative delays @@ -1005,6 +998,7 @@ static void EchoSubtraction( static void EchoSuppression(AecCore* aec, + float farend[PART_LEN2], float* echo_subtractor_output, float* output, float* const* outputH) { @@ -1052,13 +1046,13 @@ static void EchoSuppression(AecCore* aec, aec_rdft_forward_128(fft); StoreAsComplex(fft, efw); - // We should always have at least one element stored in |far_buf|. - assert(WebRtc_available_read(aec->far_buf_windowed) > 0); // NLP - WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); - // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of - // |xfwBuf|. + // Convert far-end partition to the frequency domain with windowing. + WindowData(fft, farend); + Fft(fft, xfw); + xfw_ptr = &xfw[0][0]; + // Buffer far. memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); @@ -1267,8 +1261,10 @@ static void ProcessBlock(AecCore* aec) { const float gInitNoise[2] = {0.999f, 0.001f}; float nearend[PART_LEN]; - float echo_subtractor_output[PART_LEN]; float* nearend_ptr = NULL; + float farend[PART_LEN2]; + float* farend_ptr = NULL; + float echo_subtractor_output[PART_LEN]; float output[PART_LEN]; float outputH[NUM_HIGH_BANDS_MAX][PART_LEN]; float* outputH_ptr[NUM_HIGH_BANDS_MAX]; @@ -1289,21 +1285,24 @@ static void ProcessBlock(AecCore* aec) { WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN); memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend)); - // ---------- Ooura fft ---------- + // We should always have at least one element stored in |far_buf|. + assert(WebRtc_available_read(aec->far_time_buf) > 0); + WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); #ifdef WEBRTC_AEC_DEBUG_DUMP { - float farend[PART_LEN]; - float* farend_ptr = NULL; - WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); - RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, farend_ptr, PART_LEN); + // TODO(minyue): |farend_ptr| starts from buffered samples. This will be + // modified when |aec->far_time_buf| is revised. + RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, &farend_ptr[PART_LEN], PART_LEN); + RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN); } #endif - // We should always have at least one element stored in |far_buf|. - assert(WebRtc_available_read(aec->far_buf) > 0); - WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1); + // Convert far-end signal to the frequency domain. + memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2); + Fft(fft, xf); + xf_ptr = &xf[0][0]; // Near fft memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); @@ -1403,7 +1402,7 @@ static void ProcessBlock(AecCore* aec) { RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN); // Perform echo suppression. - EchoSuppression(aec, echo_subtractor_output, output, outputH_ptr); + EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr); if (aec->metricsMode == 1) { // Update power levels and echo metrics @@ -1457,26 +1456,20 @@ AecCore* WebRtcAec_CreateAec() { } // Create far-end buffers. - aec->far_buf = - WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); - if (!aec->far_buf) { - WebRtcAec_FreeAec(aec); - return NULL; - } - aec->far_buf_windowed = - WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); - if (!aec->far_buf_windowed) { - WebRtcAec_FreeAec(aec); - return NULL; - } -#ifdef WEBRTC_AEC_DEBUG_DUMP - aec->instance_index = webrtc_aec_instance_count; + // For bit exactness with legacy code, each element in |far_time_buf| is + // supposed to contain |PART_LEN2| samples with an overlap of |PART_LEN| + // samples from the last frame. + // TODO(minyue): reduce |far_time_buf| to non-overlapped |PART_LEN| samples. aec->far_time_buf = - WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN); + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN2); if (!aec->far_time_buf) { WebRtcAec_FreeAec(aec); return NULL; } + +#ifdef WEBRTC_AEC_DEBUG_DUMP + aec->instance_index = webrtc_aec_instance_count; + aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL; aec->debug_dump_count = 0; #endif @@ -1554,11 +1547,8 @@ void WebRtcAec_FreeAec(AecCore* aec) { WebRtc_FreeBuffer(aec->outFrBufH[i]); } - WebRtc_FreeBuffer(aec->far_buf); - WebRtc_FreeBuffer(aec->far_buf_windowed); -#ifdef WEBRTC_AEC_DEBUG_DUMP WebRtc_FreeBuffer(aec->far_time_buf); -#endif + RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile); RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile); RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile); @@ -1594,10 +1584,9 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { } // Initialize far-end buffers. - WebRtc_InitBuffer(aec->far_buf); - WebRtc_InitBuffer(aec->far_buf_windowed); -#ifdef WEBRTC_AEC_DEBUG_DUMP WebRtc_InitBuffer(aec->far_time_buf); + +#ifdef WEBRTC_AEC_DEBUG_DUMP { int process_rate = sampFreq > 16000 ? 16000 : sampFreq; RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index, @@ -1741,27 +1730,22 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { return 0; } -void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { - float fft[PART_LEN2]; - float xf[2][PART_LEN1]; +// For bit exactness with a legacy code, |farend| is supposed to contain +// |PART_LEN2| samples with an overlap of |PART_LEN| samples from the last +// frame. +// TODO(minyue): reduce |farend| to non-overlapped |PART_LEN| samples. +void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { // Check if the buffer is full, and in that case flush the oldest data. - if (WebRtc_available_write(aec->far_buf) < 1) { + if (WebRtc_available_write(aec->far_time_buf) < 1) { WebRtcAec_MoveFarReadPtr(aec, 1); } - // Convert far-end partition to the frequency domain without windowing. - memcpy(fft, farend, sizeof(float) * PART_LEN2); - Fft(fft, xf); - WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1); - // Convert far-end partition to the frequency domain with windowing. - WindowData(fft, farend); - Fft(fft, xf); - WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1); + WebRtc_WriteBuffer(aec->far_time_buf, farend, 1); } int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) { - int elements_moved = MoveFarReadPtrWithoutSystemDelayUpdate(aec, elements); + int elements_moved = WebRtc_MoveReadPtr(aec->far_time_buf, elements); aec->system_delay -= elements_moved * PART_LEN; return elements_moved; } @@ -1835,14 +1819,14 @@ void WebRtcAec_ProcessFrames(AecCore* aec, // rounding, like -16. int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; int moved_elements = - MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements); + WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); aec->knownDelay -= moved_elements * PART_LEN; } else { // 2 b) Apply signal based delay correction. int move_elements = SignalBasedDelayCorrection(aec); int moved_elements = - MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements); - int far_near_buffer_diff = WebRtc_available_read(aec->far_buf) - + WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); + int far_near_buffer_diff = WebRtc_available_read(aec->far_time_buf) - WebRtc_available_read(aec->nearFrBuf) / PART_LEN; WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements); WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend, @@ -1921,10 +1905,6 @@ void WebRtcAec_GetEchoStats(AecCore* self, *a_nlp = self->aNlp; } -#ifdef WEBRTC_AEC_DEBUG_DUMP -void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; } -#endif - void WebRtcAec_SetConfigCore(AecCore* self, int nlp_mode, int metrics_mode, diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index 10630165ba..3809c82567 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -95,8 +95,8 @@ struct AecCore { int xfBufBlockPos; - RingBuffer* far_buf; - RingBuffer* far_buf_windowed; + RingBuffer* far_time_buf; + int system_delay; // Current system delay buffered in AEC. int mult; // sampling frequency multiple @@ -165,7 +165,6 @@ struct AecCore { // each time. int debug_dump_count; - RingBuffer* far_time_buf; rtc_WavWriter* farFile; rtc_WavWriter* nearFile; rtc_WavWriter* outFile; diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation.c b/webrtc/modules/audio_processing/aec/echo_cancellation.c index cf38c67b4d..aab1718b24 100644 --- a/webrtc/modules/audio_processing/aec/echo_cancellation.c +++ b/webrtc/modules/audio_processing/aec/echo_cancellation.c @@ -317,7 +317,8 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, // Write the time-domain data to |far_pre_buf|. WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples); - // Transform to frequency domain if we have enough data. + // TODO(minyue): reduce to |PART_LEN| samples for each buffering, when + // WebRtcAec_BufferFarendPartition() is changed to take |PART_LEN| samples. while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) { // We have enough data to pass to the FFT, hence read PART_LEN2 samples. { @@ -325,10 +326,6 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, float tmp[PART_LEN2]; WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2); WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp); -#ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_WriteBuffer( - WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1); -#endif } // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing. |