diff options
Diffstat (limited to 'webrtc/modules/audio_processing')
96 files changed, 6227 insertions, 2658 deletions
diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn index 2d0c602ef0..9d91911bc2 100644 --- a/webrtc/modules/audio_processing/BUILD.gn +++ b/webrtc/modules/audio_processing/BUILD.gn @@ -30,12 +30,12 @@ source_set("audio_processing") { "aec/aec_resampler.c", "aec/aec_resampler.h", "aec/echo_cancellation.c", + "aec/echo_cancellation.h", "aec/echo_cancellation_internal.h", - "aec/include/echo_cancellation.h", "aecm/aecm_core.c", "aecm/aecm_core.h", "aecm/echo_control_mobile.c", - "aecm/include/echo_control_mobile.h", + "aecm/echo_control_mobile.h", "agc/agc.cc", "agc/agc.h", "agc/agc_manager_direct.cc", @@ -161,8 +161,8 @@ source_set("audio_processing") { if (rtc_prefer_fixed_point) { defines += [ "WEBRTC_NS_FIXED" ] sources += [ - "ns/include/noise_suppression_x.h", "ns/noise_suppression_x.c", + "ns/noise_suppression_x.h", "ns/nsx_core.c", "ns/nsx_core.h", "ns/nsx_defines.h", @@ -176,8 +176,8 @@ source_set("audio_processing") { defines += [ "WEBRTC_NS_FLOAT" ] sources += [ "ns/defines.h", - "ns/include/noise_suppression.h", "ns/noise_suppression.c", + "ns/noise_suppression.h", "ns/ns_core.c", "ns/ns_core.h", "ns/windows_private.h", diff --git a/webrtc/modules/audio_processing/OWNERS b/webrtc/modules/audio_processing/OWNERS index 7b760682b0..d14f7f8614 100644 --- a/webrtc/modules/audio_processing/OWNERS +++ b/webrtc/modules/audio_processing/OWNERS @@ -1,5 +1,4 @@ aluebs@webrtc.org -andrew@webrtc.org henrik.lundin@webrtc.org peah@webrtc.org diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c index f8eed32372..901e0fde0b 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.c +++ b/webrtc/modules/audio_processing/aec/aec_core.c @@ -44,7 +44,6 @@ static const int countLen = 50; static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. // Quantities to control H band scaling for SWB input -static const int flagHbandCn = 1; // flag for adding comfort noise in H band static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band // Initial bin for averaging nlp gain in low band @@ -135,6 +134,9 @@ WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; WebRtcAecComfortNoise WebRtcAec_ComfortNoise; WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; +WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; +WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; +WebRtcAecWindowData WebRtcAec_WindowData; __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { return aRe * bRe - aIm * bIm; @@ -151,40 +153,49 @@ static int CmpFloat(const void* a, const void* b) { return (*da > *db) - (*da < *db); } -static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) { +static void FilterFar( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { int i; - for (i = 0; i < aec->num_partitions; i++) { + for (i = 0; i < num_partitions; i++) { int j; - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions * (PART_LEN1); + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * (PART_LEN1); } for (j = 0; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); + y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); + y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); } } } -static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) { - const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; - const float error_threshold = aec->extended_filter_enabled +static void ScaleErrorSignal(int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { + const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; + const float error_threshold = extended_filter_enabled ? kExtendedErrorThreshold - : aec->normal_error_threshold; + : normal_error_threshold; int i; float abs_ef; for (i = 0; i < (PART_LEN1); i++) { - ef[0][i] /= (aec->xPow[i] + 1e-10f); - ef[1][i] /= (aec->xPow[i] + 1e-10f); + ef[0][i] /= (x_pow[i] + 1e-10f); + ef[1][i] /= (x_pow[i] + 1e-10f); abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); if (abs_ef > error_threshold) { @@ -199,59 +210,40 @@ static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) { } } -// Time-unconstrined filter adaptation. -// TODO(andrew): consider for a low-complexity mode. -// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft, -// float ef[2][PART_LEN1]) { -// int i, j; -// for (i = 0; i < aec->num_partitions; i++) { -// int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); -// int pos; -// // Check for wrap -// if (i + aec->xfBufBlockPos >= aec->num_partitions) { -// xPos -= aec->num_partitions * PART_LEN1; -// } -// -// pos = i * PART_LEN1; -// -// for (j = 0; j < PART_LEN1; j++) { -// aec->wfBuf[0][pos + j] += MulRe(aec->xfBuf[0][xPos + j], -// -aec->xfBuf[1][xPos + j], -// ef[0][j], ef[1][j]); -// aec->wfBuf[1][pos + j] += MulIm(aec->xfBuf[0][xPos + j], -// -aec->xfBuf[1][xPos + j], -// ef[0][j], ef[1][j]); -// } -// } -//} - -static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) { + +static void FilterAdaptation( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { int i, j; - for (i = 0; i < aec->num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); + float fft[PART_LEN2]; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); int pos; // Check for wrap - if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions * PART_LEN1; + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; } pos = i * PART_LEN1; for (j = 0; j < PART_LEN; j++) { - fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j], - -aec->xfBuf[1][xPos + j], - ef[0][j], - ef[1][j]); - fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j], - -aec->xfBuf[1][xPos + j], - ef[0][j], - ef[1][j]); + fft[2 * j] = MulRe(x_fft_buf[0][xPos + j], + -x_fft_buf[1][xPos + j], + e_fft[0][j], + e_fft[1][j]); + fft[2 * j + 1] = MulIm(x_fft_buf[0][xPos + j], + -x_fft_buf[1][xPos + j], + e_fft[0][j], + e_fft[1][j]); } - fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], - -aec->xfBuf[1][xPos + PART_LEN], - ef[0][PART_LEN], - ef[1][PART_LEN]); + fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN], + -x_fft_buf[1][xPos + PART_LEN], + e_fft[0][PART_LEN], + e_fft[1][PART_LEN]); aec_rdft_inverse_128(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); @@ -265,12 +257,12 @@ static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) { } aec_rdft_forward_128(fft); - aec->wfBuf[0][pos] += fft[0]; - aec->wfBuf[0][pos + PART_LEN] += fft[1]; + h_fft_buf[0][pos] += fft[0]; + h_fft_buf[0][pos + PART_LEN] += fft[1]; for (j = 1; j < PART_LEN; j++) { - aec->wfBuf[0][pos + j] += fft[2 * j]; - aec->wfBuf[1][pos + j] += fft[2 * j + 1]; + h_fft_buf[0][pos + j] += fft[2 * j]; + h_fft_buf[1][pos + j] += fft[2 * j + 1]; } } } @@ -334,12 +326,13 @@ const float WebRtcAec_kMinFarendPSD = 15; // - sde : cross-PSD of near-end and residual echo // - sxd : cross-PSD of near-end and far-end // -// In addition to updating the PSDs, also the filter diverge state is determined -// upon actions are taken. +// In addition to updating the PSDs, also the filter diverge state is +// determined. static void SmoothedPSD(AecCore* aec, float efw[2][PART_LEN1], float dfw[2][PART_LEN1], - float xfw[2][PART_LEN1]) { + float xfw[2][PART_LEN1], + int* extreme_filter_divergence) { // Power estimate smoothing coefficients. const float* ptrGCoh = aec->extended_filter_enabled ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] @@ -380,15 +373,12 @@ static void SmoothedPSD(AecCore* aec, seSum += aec->se[i]; } - // Divergent filter safeguard. + // Divergent filter safeguard update. aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; - if (aec->divergeState) - memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); - - // Reset if error is significantly larger than nearend (13 dB). - if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) - memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); + // Signal extreme filter divergence if the error is significantly larger + // than the nearend (13 dB). + *extreme_filter_divergence = (seSum > (19.95f * sdSum)); } // Window time domain data to be used by the fft. @@ -417,32 +407,15 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherence(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, - float* cohxd) { - float dfw[2][PART_LEN1]; + float* cohxd, + int* extreme_filter_divergence) { int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - - SmoothedPSD(aec, efw, dfw, xfw); + SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence); // Subband coherence for (i = 0; i < PART_LEN1; i++) { @@ -458,23 +431,23 @@ static void SubbandCoherence(AecCore* aec, static void GetHighbandGain(const float* lambda, float* nlpGainHband) { int i; - nlpGainHband[0] = (float)0.0; + *nlpGainHband = (float)0.0; for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { - nlpGainHband[0] += lambda[i]; + *nlpGainHband += lambda[i]; } - nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); + *nlpGainHband /= (float)(PART_LEN1 - 1 - freqAvgIc); } static void ComfortNoise(AecCore* aec, float efw[2][PART_LEN1], - complex_t* comfortNoiseHband, + float comfortNoiseHband[2][PART_LEN1], const float* noisePow, const float* lambda) { int i, num; float rand[PART_LEN]; float noise, noiseAvg, tmp, tmpAvg; int16_t randW16[PART_LEN]; - complex_t u[PART_LEN1]; + float u[2][PART_LEN1]; const float pi2 = 6.28318530717959f; @@ -486,22 +459,22 @@ static void ComfortNoise(AecCore* aec, // Reject LF noise u[0][0] = 0; - u[0][1] = 0; + u[1][0] = 0; for (i = 1; i < PART_LEN1; i++) { tmp = pi2 * rand[i - 1]; noise = sqrtf(noisePow[i]); - u[i][0] = noise * cosf(tmp); - u[i][1] = -noise * sinf(tmp); + u[0][i] = noise * cosf(tmp); + u[1][i] = -noise * sinf(tmp); } - u[PART_LEN][1] = 0; + u[1][PART_LEN] = 0; for (i = 0; i < PART_LEN1; i++) { // This is the proper weighting to match the background noise power tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); // tmp = 1 - lambda[i]; - efw[0][i] += tmp * u[i][0]; - efw[1][i] += tmp * u[i][1]; + efw[0][i] += tmp * u[0][i]; + efw[1][i] += tmp * u[1][i]; } // For H band comfort noise @@ -509,7 +482,7 @@ static void ComfortNoise(AecCore* aec, noiseAvg = 0.0; tmpAvg = 0.0; num = 0; - if (aec->num_bands > 1 && flagHbandCn == 1) { + if (aec->num_bands > 1) { // average noise scale // average over second half of freq spectrum (i.e., 4->8khz) @@ -534,21 +507,24 @@ static void ComfortNoise(AecCore* aec, // TODO: we should probably have a new random vector here. // Reject LF noise u[0][0] = 0; - u[0][1] = 0; + u[1][0] = 0; for (i = 1; i < PART_LEN1; i++) { tmp = pi2 * rand[i - 1]; // Use average noise for H band - u[i][0] = noiseAvg * (float)cos(tmp); - u[i][1] = -noiseAvg * (float)sin(tmp); + u[0][i] = noiseAvg * (float)cos(tmp); + u[1][i] = -noiseAvg * (float)sin(tmp); } - u[PART_LEN][1] = 0; + u[1][PART_LEN] = 0; for (i = 0; i < PART_LEN1; i++) { // Use average NLP weight for H band - comfortNoiseHband[i][0] = tmpAvg * u[i][0]; - comfortNoiseHband[i][1] = tmpAvg * u[i][1]; + comfortNoiseHband[0][i] = tmpAvg * u[0][i]; + comfortNoiseHband[1][i] = tmpAvg * u[1][i]; } + } else { + memset(comfortNoiseHband, 0, + 2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0])); } } @@ -837,21 +813,29 @@ static void UpdateDelayMetrics(AecCore* self) { return; } -static void TimeToFrequency(float time_data[PART_LEN2], - float freq_data[2][PART_LEN1], - int window) { - int i = 0; - - // TODO(bjornv): Should we have a different function/wrapper for windowed FFT? - if (window) { - for (i = 0; i < PART_LEN; i++) { - time_data[i] *= WebRtcAec_sqrtHanning[i]; - time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i]; - } +static void ScaledInverseFft(float freq_data[2][PART_LEN1], + float time_data[PART_LEN2], + float scale, + int conjugate) { + int i; + const float normalization = scale / ((float)PART_LEN2); + const float sign = (conjugate ? -1 : 1); + time_data[0] = freq_data[0][0] * normalization; + time_data[1] = freq_data[0][PART_LEN] * normalization; + for (i = 1; i < PART_LEN; i++) { + time_data[2 * i] = freq_data[0][i] * normalization; + time_data[2 * i + 1] = sign * freq_data[1][i] * normalization; } + aec_rdft_inverse_128(time_data); +} + +static void Fft(float time_data[PART_LEN2], + float freq_data[2][PART_LEN1]) { + int i; aec_rdft_forward_128(time_data); - // Reorder. + + // Reorder fft output data. freq_data[1][0] = 0; freq_data[1][PART_LEN] = 0; freq_data[0][0] = time_data[0]; @@ -862,13 +846,6 @@ static void TimeToFrequency(float time_data[PART_LEN2], } } -static int MoveFarReadPtrWithoutSystemDelayUpdate(AecCore* self, int elements) { - WebRtc_MoveReadPtr(self->far_buf_windowed, elements); -#ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_MoveReadPtr(self->far_time_buf, elements); -#endif - return WebRtc_MoveReadPtr(self->far_buf, elements); -} static int SignalBasedDelayCorrection(AecCore* self) { int delay_correction = 0; @@ -909,7 +886,7 @@ static int SignalBasedDelayCorrection(AecCore* self) { const int upper_bound = self->num_partitions * 3 / 4; const int do_correction = delay <= lower_bound || delay > upper_bound; if (do_correction == 1) { - int available_read = (int)WebRtc_available_read(self->far_buf); + int available_read = (int)WebRtc_available_read(self->far_time_buf); // With |shift_offset| we gradually rely on the delay estimates. For // positive delays we reduce the correction by |shift_offset| to lower the // risk of pushing the AEC into a non causal state. For negative delays @@ -942,13 +919,94 @@ static int SignalBasedDelayCorrection(AecCore* self) { return delay_correction; } -static void NonLinearProcessing(AecCore* aec, - float* output, - float* const* outputH) { - float efw[2][PART_LEN1], xfw[2][PART_LEN1]; - complex_t comfortNoiseHband[PART_LEN1]; +static void EchoSubtraction( + AecCore* aec, + int num_partitions, + int x_fft_buf_block_pos, + int metrics_mode, + int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float* const y, + float x_pow[PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + PowerLevel* linout_level, + float echo_subtractor_output[PART_LEN]) { + float s_fft[2][PART_LEN1]; + float e_extended[PART_LEN2]; + float s_extended[PART_LEN2]; + float *s; + float e[PART_LEN]; + float e_fft[2][PART_LEN1]; + int i; + memset(s_fft, 0, sizeof(s_fft)); + + // Conditionally reset the echo subtraction filter if the filter has diverged + // significantly. + if (!aec->extended_filter_enabled && + aec->extreme_filter_divergence) { + memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); + aec->extreme_filter_divergence = 0; + } + + // Produce echo estimate s_fft. + WebRtcAec_FilterFar(num_partitions, + x_fft_buf_block_pos, + x_fft_buf, + h_fft_buf, + s_fft); + + // Compute the time-domain echo estimate s. + ScaledInverseFft(s_fft, s_extended, 2.0f, 0); + s = &s_extended[PART_LEN]; + + // Compute the time-domain echo prediction error. + for (i = 0; i < PART_LEN; ++i) { + e[i] = y[i] - s[i]; + } + + // Compute the frequency domain echo prediction error. + memset(e_extended, 0, sizeof(float) * PART_LEN); + memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN); + Fft(e_extended, e_fft); + + RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file, + &e_fft[0][0], + sizeof(e_fft[0][0]) * PART_LEN1 * 2); + + if (metrics_mode == 1) { + // Note that the first PART_LEN samples in fft (before transformation) are + // zero. Hence, the scaling by two in UpdateLevel() should not be + // performed. That scaling is taken care of in UpdateMetrics() instead. + UpdateLevel(linout_level, e_fft); + } + + // Scale error signal inversely with far power. + WebRtcAec_ScaleErrorSignal(extended_filter_enabled, + normal_mu, + normal_error_threshold, + x_pow, + e_fft); + WebRtcAec_FilterAdaptation(num_partitions, + x_fft_buf_block_pos, + x_fft_buf, + e_fft, + h_fft_buf); + memcpy(echo_subtractor_output, e, sizeof(float) * PART_LEN); +} + + +static void EchoSuppression(AecCore* aec, + float farend[PART_LEN2], + float* echo_subtractor_output, + float* output, + float* const* outputH) { + float efw[2][PART_LEN1]; + float xfw[2][PART_LEN1]; + float dfw[2][PART_LEN1]; + float comfortNoiseHband[2][PART_LEN1]; float fft[PART_LEN2]; - float scale, dtmp; float nlpGainHband; int i; size_t j; @@ -972,27 +1030,51 @@ static void NonLinearProcessing(AecCore* aec, float* xfw_ptr = NULL; - aec->delayEstCtr++; - if (aec->delayEstCtr == delayEstInterval) { - aec->delayEstCtr = 0; - } + // Update eBuf with echo subtractor output. + memcpy(aec->eBuf + PART_LEN, + echo_subtractor_output, + sizeof(float) * PART_LEN); - // initialize comfort noise for H band - memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); - nlpGainHband = (float)0.0; - dtmp = (float)0.0; + // Analysis filter banks for the echo suppressor. + // Windowed near-end ffts. + WindowData(fft, aec->dBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, dfw); + + // Windowed echo suppressor output ffts. + WindowData(fft, aec->eBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, efw); - // We should always have at least one element stored in |far_buf|. - assert(WebRtc_available_read(aec->far_buf_windowed) > 0); // NLP - WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); - // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of - // |xfwBuf|. + // Convert far-end partition to the frequency domain with windowing. + WindowData(fft, farend); + Fft(fft, xfw); + xfw_ptr = &xfw[0][0]; + // Buffer far. memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); - WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd); + aec->delayEstCtr++; + if (aec->delayEstCtr == delayEstInterval) { + aec->delayEstCtr = 0; + aec->delayIdx = WebRtcAec_PartitionDelay(aec); + } + + // Use delayed far. + memcpy(xfw, + aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); + + WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd, + &aec->extreme_filter_divergence); + + // Select the microphone signal as output if the filter is deemed to have + // diverged. + if (aec->divergeState) { + memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); + } hNlXdAvg = 0; for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { @@ -1098,67 +1180,51 @@ static void NonLinearProcessing(AecCore* aec, // scaling only in UpdateMetrics(). UpdateLevel(&aec->nlpoutlevel, efw); } + // Inverse error fft. - fft[0] = efw[0][0]; - fft[1] = efw[0][PART_LEN]; - for (i = 1; i < PART_LEN; i++) { - fft[2 * i] = efw[0][i]; - // Sign change required by Ooura fft. - fft[2 * i + 1] = -efw[1][i]; - } - aec_rdft_inverse_128(fft); + ScaledInverseFft(efw, fft, 2.0f, 1); // Overlap and add to obtain output. - scale = 2.0f / PART_LEN2; for (i = 0; i < PART_LEN; i++) { - fft[i] *= scale; // fft scaling - fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i]; - - fft[PART_LEN + i] *= scale; // fft scaling - aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; + output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] + + aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]); // Saturate output to keep it in the allowed range. output[i] = WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN); + WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN); } + memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0])); // For H band if (aec->num_bands > 1) { - // H band gain // average nlp over low band: average over second half of freq spectrum // (4->8khz) GetHighbandGain(hNl, &nlpGainHband); // Inverse comfort_noise - if (flagHbandCn == 1) { - fft[0] = comfortNoiseHband[0][0]; - fft[1] = comfortNoiseHband[PART_LEN][0]; - for (i = 1; i < PART_LEN; i++) { - fft[2 * i] = comfortNoiseHband[i][0]; - fft[2 * i + 1] = comfortNoiseHband[i][1]; - } - aec_rdft_inverse_128(fft); - scale = 2.0f / PART_LEN2; - } + ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0); // compute gain factor for (j = 0; j < aec->num_bands - 1; ++j) { for (i = 0; i < PART_LEN; i++) { - dtmp = aec->dBufH[j][i]; - dtmp = dtmp * nlpGainHband; // for variable gain + outputH[j][i] = aec->dBufH[j][i] * nlpGainHband; + } + } - // add some comfort noise where Hband is attenuated - if (flagHbandCn == 1 && j == 0) { - fft[i] *= scale; // fft scaling - dtmp += cnScaleHband * fft[i]; - } + // Add some comfort noise where Hband is attenuated. + for (i = 0; i < PART_LEN; i++) { + outputH[0][i] += cnScaleHband * fft[i]; + } - // Saturate output to keep it in the allowed range. + // Saturate output to keep it in the allowed range. + for (j = 0; j < aec->num_bands - 1; ++j) { + for (i = 0; i < PART_LEN; i++) { outputH[j][i] = WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN); + WEBRTC_SPL_WORD16_MAX, outputH[j][i], WEBRTC_SPL_WORD16_MIN); } } + } // Copy the current block to the old position. @@ -1177,11 +1243,9 @@ static void NonLinearProcessing(AecCore* aec, static void ProcessBlock(AecCore* aec) { size_t i; - float y[PART_LEN], e[PART_LEN]; - float scale; float fft[PART_LEN2]; - float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; + float xf[2][PART_LEN1]; float df[2][PART_LEN1]; float far_spectrum = 0.0f; float near_spectrum = 0.0f; @@ -1198,15 +1262,18 @@ static void ProcessBlock(AecCore* aec) { float nearend[PART_LEN]; float* nearend_ptr = NULL; + float farend[PART_LEN2]; + float* farend_ptr = NULL; + float echo_subtractor_output[PART_LEN]; float output[PART_LEN]; float outputH[NUM_HIGH_BANDS_MAX][PART_LEN]; float* outputH_ptr[NUM_HIGH_BANDS_MAX]; + float* xf_ptr = NULL; + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { outputH_ptr[i] = outputH[i]; } - float* xf_ptr = NULL; - // Concatenate old and new nearend blocks. for (i = 0; i < aec->num_bands - 1; ++i) { WebRtc_ReadBuffer(aec->nearFrBufH[i], @@ -1218,25 +1285,28 @@ static void ProcessBlock(AecCore* aec) { WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN); memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend)); - // ---------- Ooura fft ---------- + // We should always have at least one element stored in |far_buf|. + assert(WebRtc_available_read(aec->far_time_buf) > 0); + WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); #ifdef WEBRTC_AEC_DEBUG_DUMP { - float farend[PART_LEN]; - float* farend_ptr = NULL; - WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); - RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, farend_ptr, PART_LEN); + // TODO(minyue): |farend_ptr| starts from buffered samples. This will be + // modified when |aec->far_time_buf| is revised. + RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, &farend_ptr[PART_LEN], PART_LEN); + RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN); } #endif - // We should always have at least one element stored in |far_buf|. - assert(WebRtc_available_read(aec->far_buf) > 0); - WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1); + // Convert far-end signal to the frequency domain. + memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2); + Fft(fft, xf); + xf_ptr = &xf[0][0]; // Near fft memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); - TimeToFrequency(fft, df, 0); + Fft(fft, df); // Power smoothing for (i = 0; i < PART_LEN1; i++) { @@ -1314,60 +1384,25 @@ static void ProcessBlock(AecCore* aec) { &xf_ptr[PART_LEN1], sizeof(float) * PART_LEN1); - memset(yf, 0, sizeof(yf)); - - // Filter far - WebRtcAec_FilterFar(aec, yf); - - // Inverse fft to obtain echo estimate and error. - fft[0] = yf[0][0]; - fft[1] = yf[0][PART_LEN]; - for (i = 1; i < PART_LEN; i++) { - fft[2 * i] = yf[0][i]; - fft[2 * i + 1] = yf[1][i]; - } - aec_rdft_inverse_128(fft); - - scale = 2.0f / PART_LEN2; - for (i = 0; i < PART_LEN; i++) { - y[i] = fft[PART_LEN + i] * scale; // fft scaling - } - - for (i = 0; i < PART_LEN; i++) { - e[i] = nearend_ptr[i] - y[i]; - } - - // Error fft - memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); - memset(fft, 0, sizeof(float) * PART_LEN); - memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); - // TODO(bjornv): Change to use TimeToFrequency(). - aec_rdft_forward_128(fft); - - ef[1][0] = 0; - ef[1][PART_LEN] = 0; - ef[0][0] = fft[0]; - ef[0][PART_LEN] = fft[1]; - for (i = 1; i < PART_LEN; i++) { - ef[0][i] = fft[2 * i]; - ef[1][i] = fft[2 * i + 1]; - } - - RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file, - &ef[0][0], - sizeof(ef[0][0]) * PART_LEN1 * 2); - - if (aec->metricsMode == 1) { - // Note that the first PART_LEN samples in fft (before transformation) are - // zero. Hence, the scaling by two in UpdateLevel() should not be - // performed. That scaling is taken care of in UpdateMetrics() instead. - UpdateLevel(&aec->linoutlevel, ef); - } - - // Scale error signal inversely with far power. - WebRtcAec_ScaleErrorSignal(aec, ef); - WebRtcAec_FilterAdaptation(aec, fft, ef); - NonLinearProcessing(aec, output, outputH_ptr); + // Perform echo subtraction. + EchoSubtraction(aec, + aec->num_partitions, + aec->xfBufBlockPos, + aec->metricsMode, + aec->extended_filter_enabled, + aec->normal_mu, + aec->normal_error_threshold, + aec->xfBuf, + nearend_ptr, + aec->xPow, + aec->wfBuf, + &aec->linoutlevel, + echo_subtractor_output); + + RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN); + + // Perform echo suppression. + EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr); if (aec->metricsMode == 1) { // Update power levels and echo metrics @@ -1383,7 +1418,6 @@ static void ProcessBlock(AecCore* aec) { WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN); } - RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN); RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN); } @@ -1422,26 +1456,20 @@ AecCore* WebRtcAec_CreateAec() { } // Create far-end buffers. - aec->far_buf = - WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); - if (!aec->far_buf) { - WebRtcAec_FreeAec(aec); - return NULL; - } - aec->far_buf_windowed = - WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); - if (!aec->far_buf_windowed) { - WebRtcAec_FreeAec(aec); - return NULL; - } -#ifdef WEBRTC_AEC_DEBUG_DUMP - aec->instance_index = webrtc_aec_instance_count; + // For bit exactness with legacy code, each element in |far_time_buf| is + // supposed to contain |PART_LEN2| samples with an overlap of |PART_LEN| + // samples from the last frame. + // TODO(minyue): reduce |far_time_buf| to non-overlapped |PART_LEN| samples. aec->far_time_buf = - WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN); + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN2); if (!aec->far_time_buf) { WebRtcAec_FreeAec(aec); return NULL; } + +#ifdef WEBRTC_AEC_DEBUG_DUMP + aec->instance_index = webrtc_aec_instance_count; + aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL; aec->debug_dump_count = 0; #endif @@ -1477,6 +1505,10 @@ AecCore* WebRtcAec_CreateAec() { WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; WebRtcAec_ComfortNoise = ComfortNoise; WebRtcAec_SubbandCoherence = SubbandCoherence; + WebRtcAec_StoreAsComplex = StoreAsComplex; + WebRtcAec_PartitionDelay = PartitionDelay; + WebRtcAec_WindowData = WindowData; + #if defined(WEBRTC_ARCH_X86_FAMILY) if (WebRtc_GetCPUInfo(kSSE2)) { @@ -1515,11 +1547,8 @@ void WebRtcAec_FreeAec(AecCore* aec) { WebRtc_FreeBuffer(aec->outFrBufH[i]); } - WebRtc_FreeBuffer(aec->far_buf); - WebRtc_FreeBuffer(aec->far_buf_windowed); -#ifdef WEBRTC_AEC_DEBUG_DUMP WebRtc_FreeBuffer(aec->far_time_buf); -#endif + RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile); RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile); RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile); @@ -1555,10 +1584,9 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { } // Initialize far-end buffers. - WebRtc_InitBuffer(aec->far_buf); - WebRtc_InitBuffer(aec->far_buf_windowed); -#ifdef WEBRTC_AEC_DEBUG_DUMP WebRtc_InitBuffer(aec->far_time_buf); + +#ifdef WEBRTC_AEC_DEBUG_DUMP { int process_rate = sampFreq > 16000 ? 16000 : sampFreq; RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index, @@ -1693,6 +1721,8 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { aec->seed = 777; aec->delayEstCtr = 0; + aec->extreme_filter_divergence = 0; + // Metrics disabled by default aec->metricsMode = 0; InitMetrics(aec); @@ -1700,27 +1730,22 @@ int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { return 0; } -void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { - float fft[PART_LEN2]; - float xf[2][PART_LEN1]; +// For bit exactness with a legacy code, |farend| is supposed to contain +// |PART_LEN2| samples with an overlap of |PART_LEN| samples from the last +// frame. +// TODO(minyue): reduce |farend| to non-overlapped |PART_LEN| samples. +void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { // Check if the buffer is full, and in that case flush the oldest data. - if (WebRtc_available_write(aec->far_buf) < 1) { + if (WebRtc_available_write(aec->far_time_buf) < 1) { WebRtcAec_MoveFarReadPtr(aec, 1); } - // Convert far-end partition to the frequency domain without windowing. - memcpy(fft, farend, sizeof(float) * PART_LEN2); - TimeToFrequency(fft, xf, 0); - WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1); - // Convert far-end partition to the frequency domain with windowing. - memcpy(fft, farend, sizeof(float) * PART_LEN2); - TimeToFrequency(fft, xf, 1); - WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1); + WebRtc_WriteBuffer(aec->far_time_buf, farend, 1); } int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) { - int elements_moved = MoveFarReadPtrWithoutSystemDelayUpdate(aec, elements); + int elements_moved = WebRtc_MoveReadPtr(aec->far_time_buf, elements); aec->system_delay -= elements_moved * PART_LEN; return elements_moved; } @@ -1794,14 +1819,14 @@ void WebRtcAec_ProcessFrames(AecCore* aec, // rounding, like -16. int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; int moved_elements = - MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements); + WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); aec->knownDelay -= moved_elements * PART_LEN; } else { // 2 b) Apply signal based delay correction. int move_elements = SignalBasedDelayCorrection(aec); int moved_elements = - MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements); - int far_near_buffer_diff = WebRtc_available_read(aec->far_buf) - + WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); + int far_near_buffer_diff = WebRtc_available_read(aec->far_time_buf) - WebRtc_available_read(aec->nearFrBuf) / PART_LEN; WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements); WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend, @@ -1880,10 +1905,6 @@ void WebRtcAec_GetEchoStats(AecCore* self, *a_nlp = self->aNlp; } -#ifdef WEBRTC_AEC_DEBUG_DUMP -void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; } -#endif - void WebRtcAec_SetConfigCore(AecCore* self, int nlp_mode, int metrics_mode, diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h index 2de028379b..3809c82567 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_internal.h +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -95,8 +95,8 @@ struct AecCore { int xfBufBlockPos; - RingBuffer* far_buf; - RingBuffer* far_buf_windowed; + RingBuffer* far_time_buf; + int system_delay; // Current system delay buffered in AEC. int mult; // sampling frequency multiple @@ -152,6 +152,10 @@ struct AecCore { // Runtime selection of number of filter partitions. int num_partitions; + // Flag that extreme filter divergence has been detected by the Echo + // Suppressor. + int extreme_filter_divergence; + #ifdef WEBRTC_AEC_DEBUG_DUMP // Sequence number of this AEC instance, so that different instances can // choose different dump file names. @@ -161,7 +165,6 @@ struct AecCore { // each time. int debug_dump_count; - RingBuffer* far_time_buf; rtc_WavWriter* farFile; rtc_WavWriter* nearFile; rtc_WavWriter* outFile; @@ -170,13 +173,25 @@ struct AecCore { #endif }; -typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]); +typedef void (*WebRtcAecFilterFar)( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]); extern WebRtcAecFilterFar WebRtcAec_FilterFar; -typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]); -extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; -typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec, - float* fft, +typedef void (*WebRtcAecScaleErrorSignal)(int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_pow[PART_LEN1], float ef[2][PART_LEN1]); +extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; +typedef void (*WebRtcAecFilterAdaptation)( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]); extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec, float hNl[PART_LEN1], @@ -186,17 +201,29 @@ extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; typedef void (*WebRtcAecComfortNoise)(AecCore* aec, float efw[2][PART_LEN1], - complex_t* comfortNoiseHband, + float comfortNoiseHband[2][PART_LEN1], const float* noisePow, const float* lambda); extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise; typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, - float* cohxd); + float* cohxd, + int* extreme_filter_divergence); extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; +typedef int (*WebRtcAecPartitionDelay)(const AecCore* aec); +extern WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; + +typedef void (*WebRtcAecStoreAsComplex)(const float* data, + float data_complex[2][PART_LEN1]); +extern WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; + +typedef void (*WebRtcAecWindowData)(float* x_windowed, const float* x); +extern WebRtcAecWindowData WebRtcAec_WindowData; + #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/aec/aec_core_mips.c b/webrtc/modules/audio_processing/aec/aec_core_mips.c index bb33087aee..035a4b76af 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_mips.c +++ b/webrtc/modules/audio_processing/aec/aec_core_mips.c @@ -20,13 +20,12 @@ #include "webrtc/modules/audio_processing/aec/aec_core_internal.h" #include "webrtc/modules/audio_processing/aec/aec_rdft.h" -static const int flagHbandCn = 1; // flag for adding comfort noise in H band extern const float WebRtcAec_weightCurve[65]; extern const float WebRtcAec_overDriveCurve[65]; void WebRtcAec_ComfortNoise_mips(AecCore* aec, float efw[2][PART_LEN1], - complex_t* comfortNoiseHband, + float comfortNoiseHband[2][PART_LEN1], const float* noisePow, const float* lambda) { int i, num; @@ -274,7 +273,7 @@ void WebRtcAec_ComfortNoise_mips(AecCore* aec, noiseAvg = 0.0; tmpAvg = 0.0; num = 0; - if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) { + if (aec->num_bands > 1) { for (i = 0; i < PART_LEN; i++) { rand[i] = ((float)randW16[i]) / 32768; } @@ -314,27 +313,35 @@ void WebRtcAec_ComfortNoise_mips(AecCore* aec, for (i = 0; i < PART_LEN1; i++) { // Use average NLP weight for H band - comfortNoiseHband[i][0] = tmpAvg * u[i][0]; - comfortNoiseHband[i][1] = tmpAvg * u[i][1]; + comfortNoiseHband[0][i] = tmpAvg * u[i][0]; + comfortNoiseHband[1][i] = tmpAvg * u[i][1]; } + } else { + memset(comfortNoiseHband, 0, + 2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0])); } } -void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) { +void WebRtcAec_FilterFar_mips( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { int i; - for (i = 0; i < aec->num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions * (PART_LEN1); + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * (PART_LEN1); } - float* yf0 = yf[0]; - float* yf1 = yf[1]; - float* aRe = aec->xfBuf[0] + xPos; - float* aIm = aec->xfBuf[1] + xPos; - float* bRe = aec->wfBuf[0] + pos; - float* bIm = aec->wfBuf[1] + pos; + float* yf0 = y_fft[0]; + float* yf1 = y_fft[1]; + float* aRe = x_fft_buf[0] + xPos; + float* aIm = x_fft_buf[1] + xPos; + float* bRe = h_fft_buf[0] + pos; + float* bIm = h_fft_buf[1] + pos; float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; int len = PART_LEN1 >> 1; @@ -432,23 +439,27 @@ void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) { } } -void WebRtcAec_FilterAdaptation_mips(AecCore* aec, - float* fft, - float ef[2][PART_LEN1]) { +void WebRtcAec_FilterAdaptation_mips( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + float fft[PART_LEN2]; int i; - for (i = 0; i < aec->num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); + for (i = 0; i < num_partitions; i++) { + int xPos = (i + x_fft_buf_block_pos)*(PART_LEN1); int pos; // Check for wrap - if (i + aec->xfBufBlockPos >= aec->num_partitions) { - xPos -= aec->num_partitions * PART_LEN1; + if (i + x_fft_buf_block_pos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; } pos = i * PART_LEN1; - float* aRe = aec->xfBuf[0] + xPos; - float* aIm = aec->xfBuf[1] + xPos; - float* bRe = ef[0]; - float* bIm = ef[1]; + float* aRe = x_fft_buf[0] + xPos; + float* aIm = x_fft_buf[1] + xPos; + float* bRe = e_fft[0]; + float* bIm = e_fft[1]; float* fft_tmp; float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12; @@ -573,8 +584,8 @@ void WebRtcAec_FilterAdaptation_mips(AecCore* aec, ); } aec_rdft_forward_128(fft); - aRe = aec->wfBuf[0] + pos; - aIm = aec->wfBuf[1] + pos; + aRe = h_fft_buf[0] + pos; + aIm = h_fft_buf[1] + pos; __asm __volatile ( ".set push \n\t" ".set noreorder \n\t" @@ -699,15 +710,18 @@ void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec, } } -void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { - const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; - const float error_threshold = aec->extended_filter_enabled +void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { + const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; + const float error_threshold = extended_filter_enabled ? kExtendedErrorThreshold - : aec->normal_error_threshold; + : normal_error_threshold; int len = (PART_LEN1); float* ef0 = ef[0]; float* ef1 = ef[1]; - float* xPow = aec->xPow; float fac1 = 1e-10f; float err_th2 = error_threshold * error_threshold; float f0, f1, f2; @@ -719,7 +733,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { ".set push \n\t" ".set noreorder \n\t" "1: \n\t" - "lwc1 %[f0], 0(%[xPow]) \n\t" + "lwc1 %[f0], 0(%[x_pow]) \n\t" "lwc1 %[f1], 0(%[ef0]) \n\t" "lwc1 %[f2], 0(%[ef1]) \n\t" "add.s %[f0], %[f0], %[fac1] \n\t" @@ -747,7 +761,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { "swc1 %[f1], 0(%[ef0]) \n\t" "swc1 %[f2], 0(%[ef1]) \n\t" "addiu %[len], %[len], -1 \n\t" - "addiu %[xPow], %[xPow], 4 \n\t" + "addiu %[x_pow], %[x_pow], 4 \n\t" "addiu %[ef0], %[ef0], 4 \n\t" "bgtz %[len], 1b \n\t" " addiu %[ef1], %[ef1], 4 \n\t" @@ -756,7 +770,7 @@ void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { #if !defined(MIPS32_R2_LE) [f3] "=&f" (f3), #endif - [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), + [x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), [len] "+r" (len) : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu), [err_th] "f" (error_threshold) @@ -771,4 +785,3 @@ void WebRtcAec_InitAec_mips(void) { WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips; WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; } - diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.c b/webrtc/modules/audio_processing/aec/aec_core_neon.c index 9a677aaa67..7898ab2543 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_neon.c +++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c @@ -34,45 +34,49 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { return aRe * bIm + aIm * bRe; } -static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) { +static void FilterFarNEON( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { int i; - const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { int j; - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= num_partitions) { + if (i + x_fft_buf_block_pos >= num_partitions) { xPos -= num_partitions * PART_LEN1; } // vectorized code (four at once) for (j = 0; j + 3 < PART_LEN1; j += 4) { - const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); - const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); - const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); - const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); - const float32x4_t yf_re = vld1q_f32(&yf[0][j]); - const float32x4_t yf_im = vld1q_f32(&yf[1][j]); - const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); - const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); - const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); - const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); - const float32x4_t g = vaddq_f32(yf_re, e); - const float32x4_t h = vaddq_f32(yf_im, f); - vst1q_f32(&yf[0][j], g); - vst1q_f32(&yf[1][j], h); + const float32x4_t x_fft_buf_re = vld1q_f32(&x_fft_buf[0][xPos + j]); + const float32x4_t x_fft_buf_im = vld1q_f32(&x_fft_buf[1][xPos + j]); + const float32x4_t h_fft_buf_re = vld1q_f32(&h_fft_buf[0][pos + j]); + const float32x4_t h_fft_buf_im = vld1q_f32(&h_fft_buf[1][pos + j]); + const float32x4_t y_fft_re = vld1q_f32(&y_fft[0][j]); + const float32x4_t y_fft_im = vld1q_f32(&y_fft[1][j]); + const float32x4_t a = vmulq_f32(x_fft_buf_re, h_fft_buf_re); + const float32x4_t e = vmlsq_f32(a, x_fft_buf_im, h_fft_buf_im); + const float32x4_t c = vmulq_f32(x_fft_buf_re, h_fft_buf_im); + const float32x4_t f = vmlaq_f32(c, x_fft_buf_im, h_fft_buf_re); + const float32x4_t g = vaddq_f32(y_fft_re, e); + const float32x4_t h = vaddq_f32(y_fft_im, f); + vst1q_f32(&y_fft[0][j], g); + vst1q_f32(&y_fft[1][j], h); } // scalar code for the remaining items. for (; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); + y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); + y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); } } } @@ -122,20 +126,24 @@ static float32x4_t vsqrtq_f32(float32x4_t s) { } #endif // WEBRTC_ARCH_ARM64 -static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { - const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; - const float error_threshold = aec->extended_filter_enabled ? - kExtendedErrorThreshold : aec->normal_error_threshold; +static void ScaleErrorSignalNEON(int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { + const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; + const float error_threshold = extended_filter_enabled ? + kExtendedErrorThreshold : normal_error_threshold; const float32x4_t k1e_10f = vdupq_n_f32(1e-10f); const float32x4_t kMu = vmovq_n_f32(mu); const float32x4_t kThresh = vmovq_n_f32(error_threshold); int i; // vectorized code (four at once) for (i = 0; i + 3 < PART_LEN1; i += 4) { - const float32x4_t xPow = vld1q_f32(&aec->xPow[i]); + const float32x4_t x_pow_local = vld1q_f32(&x_pow[i]); const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]); const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]); - const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f); + const float32x4_t xPowPlus = vaddq_f32(x_pow_local, k1e_10f); float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus); float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus); const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re); @@ -162,8 +170,8 @@ static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { // scalar code for the remaining items. for (; i < PART_LEN1; i++) { float abs_ef; - ef[0][i] /= (aec->xPow[i] + 1e-10f); - ef[1][i] /= (aec->xPow[i] + 1e-10f); + ef[0][i] /= (x_pow[i] + 1e-10f); + ef[1][i] /= (x_pow[i] + 1e-10f); abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); if (abs_ef > error_threshold) { @@ -178,34 +186,37 @@ static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { } } -static void FilterAdaptationNEON(AecCore* aec, - float* fft, - float ef[2][PART_LEN1]) { +static void FilterAdaptationNEON( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + float fft[PART_LEN2]; int i; - const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; int pos = i * PART_LEN1; int j; // Check for wrap - if (i + aec->xfBufBlockPos >= num_partitions) { + if (i + x_fft_buf_block_pos >= num_partitions) { xPos -= num_partitions * PART_LEN1; } // Process the whole array... for (j = 0; j < PART_LEN; j += 4) { - // Load xfBuf and ef. - const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); - const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); - const float32x4_t ef_re = vld1q_f32(&ef[0][j]); - const float32x4_t ef_im = vld1q_f32(&ef[1][j]); - // Calculate the product of conjugate(xfBuf) by ef. + // Load x_fft_buf and e_fft. + const float32x4_t x_fft_buf_re = vld1q_f32(&x_fft_buf[0][xPos + j]); + const float32x4_t x_fft_buf_im = vld1q_f32(&x_fft_buf[1][xPos + j]); + const float32x4_t e_fft_re = vld1q_f32(&e_fft[0][j]); + const float32x4_t e_fft_im = vld1q_f32(&e_fft[1][j]); + // Calculate the product of conjugate(x_fft_buf) by e_fft. // re(conjugate(a) * b) = aRe * bRe + aIm * bIm // im(conjugate(a) * b)= aRe * bIm - aIm * bRe - const float32x4_t a = vmulq_f32(xfBuf_re, ef_re); - const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im); - const float32x4_t c = vmulq_f32(xfBuf_re, ef_im); - const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re); + const float32x4_t a = vmulq_f32(x_fft_buf_re, e_fft_re); + const float32x4_t e = vmlaq_f32(a, x_fft_buf_im, e_fft_im); + const float32x4_t c = vmulq_f32(x_fft_buf_re, e_fft_im); + const float32x4_t f = vmlsq_f32(c, x_fft_buf_im, e_fft_re); // Interleave real and imaginary parts. const float32x4x2_t g_n_h = vzipq_f32(e, f); // Store @@ -213,10 +224,10 @@ static void FilterAdaptationNEON(AecCore* aec, vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]); } // ... and fixup the first imaginary entry. - fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], - -aec->xfBuf[1][xPos + PART_LEN], - ef[0][PART_LEN], - ef[1][PART_LEN]); + fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN], + -x_fft_buf[1][xPos + PART_LEN], + e_fft[0][PART_LEN], + e_fft[1][PART_LEN]); aec_rdft_inverse_128(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); @@ -234,21 +245,21 @@ static void FilterAdaptationNEON(AecCore* aec, aec_rdft_forward_128(fft); { - const float wt1 = aec->wfBuf[1][pos]; - aec->wfBuf[0][pos + PART_LEN] += fft[1]; + const float wt1 = h_fft_buf[1][pos]; + h_fft_buf[0][pos + PART_LEN] += fft[1]; for (j = 0; j < PART_LEN; j += 4) { - float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); - float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); + float32x4_t wtBuf_re = vld1q_f32(&h_fft_buf[0][pos + j]); + float32x4_t wtBuf_im = vld1q_f32(&h_fft_buf[1][pos + j]); const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]); const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]); const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4); wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]); wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]); - vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re); - vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im); + vst1q_f32(&h_fft_buf[0][pos + j], wtBuf_re); + vst1q_f32(&h_fft_buf[1][pos + j], wtBuf_im); } - aec->wfBuf[1][pos] = wt1; + h_fft_buf[1][pos] = wt1; } } } @@ -442,7 +453,7 @@ static void OverdriveAndSuppressNEON(AecCore* aec, } } -static int PartitionDelay(const AecCore* aec) { +static int PartitionDelayNEON(const AecCore* aec) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -499,7 +510,8 @@ static int PartitionDelay(const AecCore* aec) { static void SmoothedPSD(AecCore* aec, float efw[2][PART_LEN1], float dfw[2][PART_LEN1], - float xfw[2][PART_LEN1]) { + float xfw[2][PART_LEN1], + int* extreme_filter_divergence) { // Power estimate smoothing coefficients. const float* ptrGCoh = aec->extended_filter_enabled ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] @@ -615,19 +627,16 @@ static void SmoothedPSD(AecCore* aec, seSum += aec->se[i]; } - // Divergent filter safeguard. + // Divergent filter safeguard update. aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; - if (aec->divergeState) - memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); - - // Reset if error is significantly larger than nearend (13 dB). - if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) - memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); + // Signal extreme filter divergence if the error is significantly larger + // than the nearend (13 dB). + *extreme_filter_divergence = (seSum > (19.95f * sdSum)); } // Window time domain data to be used by the fft. -__inline static void WindowData(float* x_windowed, const float* x) { +static void WindowDataNEON(float* x_windowed, const float* x) { int i; for (i = 0; i < PART_LEN; i += 4) { const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); @@ -648,8 +657,8 @@ __inline static void WindowData(float* x_windowed, const float* x) { } // Puts fft output data into a complex valued array. -__inline static void StoreAsComplex(const float* data, - float data_complex[2][PART_LEN1]) { +static void StoreAsComplexNEON(const float* data, + float data_complex[2][PART_LEN1]) { int i; for (i = 0; i < PART_LEN; i += 4) { const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); @@ -665,32 +674,15 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherenceNEON(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, - float* cohxd) { - float dfw[2][PART_LEN1]; + float* cohxd, + int* extreme_filter_divergence) { int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - - SmoothedPSD(aec, efw, dfw, xfw); + SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence); { const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); @@ -732,5 +724,7 @@ void WebRtcAec_InitAec_neon(void) { WebRtcAec_FilterAdaptation = FilterAdaptationNEON; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; + WebRtcAec_StoreAsComplex = StoreAsComplexNEON; + WebRtcAec_PartitionDelay = PartitionDelayNEON; + WebRtcAec_WindowData = WindowDataNEON; } - diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c index b1bffcbb9f..f897a4c0c7 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c @@ -29,67 +29,76 @@ __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { return aRe * bIm + aIm * bRe; } -static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) { +static void FilterFarSSE2( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float y_fft[2][PART_LEN1]) { + int i; - const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { int j; - int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= num_partitions) { + if (i + x_fft_buf_block_pos >= num_partitions) { xPos -= num_partitions * (PART_LEN1); } // vectorized code (four at once) for (j = 0; j + 3 < PART_LEN1; j += 4) { - const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); - const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); - const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); - const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); - const __m128 yf_re = _mm_loadu_ps(&yf[0][j]); - const __m128 yf_im = _mm_loadu_ps(&yf[1][j]); - const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re); - const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im); - const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im); - const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re); + const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]); + const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]); + const __m128 h_fft_buf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]); + const __m128 h_fft_buf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]); + const __m128 y_fft_re = _mm_loadu_ps(&y_fft[0][j]); + const __m128 y_fft_im = _mm_loadu_ps(&y_fft[1][j]); + const __m128 a = _mm_mul_ps(x_fft_buf_re, h_fft_buf_re); + const __m128 b = _mm_mul_ps(x_fft_buf_im, h_fft_buf_im); + const __m128 c = _mm_mul_ps(x_fft_buf_re, h_fft_buf_im); + const __m128 d = _mm_mul_ps(x_fft_buf_im, h_fft_buf_re); const __m128 e = _mm_sub_ps(a, b); const __m128 f = _mm_add_ps(c, d); - const __m128 g = _mm_add_ps(yf_re, e); - const __m128 h = _mm_add_ps(yf_im, f); - _mm_storeu_ps(&yf[0][j], g); - _mm_storeu_ps(&yf[1][j], h); + const __m128 g = _mm_add_ps(y_fft_re, e); + const __m128 h = _mm_add_ps(y_fft_im, f); + _mm_storeu_ps(&y_fft[0][j], g); + _mm_storeu_ps(&y_fft[1][j], h); } // scalar code for the remaining items. for (; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], - aec->xfBuf[1][xPos + j], - aec->wfBuf[0][pos + j], - aec->wfBuf[1][pos + j]); + y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); + y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], + x_fft_buf[1][xPos + j], + h_fft_buf[0][pos + j], + h_fft_buf[1][pos + j]); } } } -static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { +static void ScaleErrorSignalSSE2(int extended_filter_enabled, + float normal_mu, + float normal_error_threshold, + float x_pow[PART_LEN1], + float ef[2][PART_LEN1]) { const __m128 k1e_10f = _mm_set1_ps(1e-10f); - const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu) - : _mm_set1_ps(aec->normal_mu); - const __m128 kThresh = aec->extended_filter_enabled + const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu) + : _mm_set1_ps(normal_mu); + const __m128 kThresh = extended_filter_enabled ? _mm_set1_ps(kExtendedErrorThreshold) - : _mm_set1_ps(aec->normal_error_threshold); + : _mm_set1_ps(normal_error_threshold); int i; // vectorized code (four at once) for (i = 0; i + 3 < PART_LEN1; i += 4) { - const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]); + const __m128 x_pow_local = _mm_loadu_ps(&x_pow[i]); const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); - const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f); + const __m128 xPowPlus = _mm_add_ps(x_pow_local, k1e_10f); __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus); const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re); @@ -116,14 +125,14 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { // scalar code for the remaining items. { const float mu = - aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; - const float error_threshold = aec->extended_filter_enabled + extended_filter_enabled ? kExtendedMu : normal_mu; + const float error_threshold = extended_filter_enabled ? kExtendedErrorThreshold - : aec->normal_error_threshold; + : normal_error_threshold; for (; i < (PART_LEN1); i++) { float abs_ef; - ef[0][i] /= (aec->xPow[i] + 1e-10f); - ef[1][i] /= (aec->xPow[i] + 1e-10f); + ef[0][i] /= (x_pow[i] + 1e-10f); + ef[1][i] /= (x_pow[i] + 1e-10f); abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); if (abs_ef > error_threshold) { @@ -139,33 +148,36 @@ static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { } } -static void FilterAdaptationSSE2(AecCore* aec, - float* fft, - float ef[2][PART_LEN1]) { +static void FilterAdaptationSSE2( + int num_partitions, + int x_fft_buf_block_pos, + float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], + float e_fft[2][PART_LEN1], + float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { + float fft[PART_LEN2]; int i, j; - const int num_partitions = aec->num_partitions; for (i = 0; i < num_partitions; i++) { - int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); + int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= num_partitions) { + if (i + x_fft_buf_block_pos >= num_partitions) { xPos -= num_partitions * PART_LEN1; } // Process the whole array... for (j = 0; j < PART_LEN; j += 4) { - // Load xfBuf and ef. - const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); - const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); - const __m128 ef_re = _mm_loadu_ps(&ef[0][j]); - const __m128 ef_im = _mm_loadu_ps(&ef[1][j]); - // Calculate the product of conjugate(xfBuf) by ef. + // Load x_fft_buf and e_fft. + const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]); + const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]); + const __m128 e_fft_re = _mm_loadu_ps(&e_fft[0][j]); + const __m128 e_fft_im = _mm_loadu_ps(&e_fft[1][j]); + // Calculate the product of conjugate(x_fft_buf) by e_fft. // re(conjugate(a) * b) = aRe * bRe + aIm * bIm // im(conjugate(a) * b)= aRe * bIm - aIm * bRe - const __m128 a = _mm_mul_ps(xfBuf_re, ef_re); - const __m128 b = _mm_mul_ps(xfBuf_im, ef_im); - const __m128 c = _mm_mul_ps(xfBuf_re, ef_im); - const __m128 d = _mm_mul_ps(xfBuf_im, ef_re); + const __m128 a = _mm_mul_ps(x_fft_buf_re, e_fft_re); + const __m128 b = _mm_mul_ps(x_fft_buf_im, e_fft_im); + const __m128 c = _mm_mul_ps(x_fft_buf_re, e_fft_im); + const __m128 d = _mm_mul_ps(x_fft_buf_im, e_fft_re); const __m128 e = _mm_add_ps(a, b); const __m128 f = _mm_sub_ps(c, d); // Interleave real and imaginary parts. @@ -176,10 +188,10 @@ static void FilterAdaptationSSE2(AecCore* aec, _mm_storeu_ps(&fft[2 * j + 4], h); } // ... and fixup the first imaginary entry. - fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], - -aec->xfBuf[1][xPos + PART_LEN], - ef[0][PART_LEN], - ef[1][PART_LEN]); + fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN], + -x_fft_buf[1][xPos + PART_LEN], + e_fft[0][PART_LEN], + e_fft[1][PART_LEN]); aec_rdft_inverse_128(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); @@ -197,11 +209,11 @@ static void FilterAdaptationSSE2(AecCore* aec, aec_rdft_forward_128(fft); { - float wt1 = aec->wfBuf[1][pos]; - aec->wfBuf[0][pos + PART_LEN] += fft[1]; + float wt1 = h_fft_buf[1][pos]; + h_fft_buf[0][pos + PART_LEN] += fft[1]; for (j = 0; j < PART_LEN; j += 4) { - __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); - __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); + __m128 wtBuf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]); + __m128 wtBuf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]); const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); const __m128 fft_re = @@ -210,10 +222,10 @@ static void FilterAdaptationSSE2(AecCore* aec, _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1)); wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); - _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re); - _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im); + _mm_storeu_ps(&h_fft_buf[0][pos + j], wtBuf_re); + _mm_storeu_ps(&h_fft_buf[1][pos + j], wtBuf_im); } - aec->wfBuf[1][pos] = wt1; + h_fft_buf[1][pos] = wt1; } } } @@ -427,7 +439,8 @@ __inline static void _mm_add_ps_4x1(__m128 sum, float *dst) { sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1))); _mm_store_ss(dst, sum); } -static int PartitionDelay(const AecCore* aec) { + +static int PartitionDelaySSE2(const AecCore* aec) { // Measures the energy in each filter partition and returns the partition with // highest energy. // TODO(bjornv): Spread computational cost by computing one partition per @@ -476,7 +489,8 @@ static int PartitionDelay(const AecCore* aec) { static void SmoothedPSD(AecCore* aec, float efw[2][PART_LEN1], float dfw[2][PART_LEN1], - float xfw[2][PART_LEN1]) { + float xfw[2][PART_LEN1], + int* extreme_filter_divergence) { // Power estimate smoothing coefficients. const float* ptrGCoh = aec->extended_filter_enabled ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] @@ -595,19 +609,16 @@ static void SmoothedPSD(AecCore* aec, seSum += aec->se[i]; } - // Divergent filter safeguard. + // Divergent filter safeguard update. aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; - if (aec->divergeState) - memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); - - // Reset if error is significantly larger than nearend (13 dB). - if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) - memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); + // Signal extreme filter divergence if the error is significantly larger + // than the nearend (13 dB). + *extreme_filter_divergence = (seSum > (19.95f * sdSum)); } // Window time domain data to be used by the fft. -__inline static void WindowData(float* x_windowed, const float* x) { +static void WindowDataSSE2(float* x_windowed, const float* x) { int i; for (i = 0; i < PART_LEN; i += 4) { const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]); @@ -627,8 +638,8 @@ __inline static void WindowData(float* x_windowed, const float* x) { } // Puts fft output data into a complex valued array. -__inline static void StoreAsComplex(const float* data, - float data_complex[2][PART_LEN1]) { +static void StoreAsComplexSSE2(const float* data, + float data_complex[2][PART_LEN1]) { int i; for (i = 0; i < PART_LEN; i += 4) { const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]); @@ -649,32 +660,15 @@ __inline static void StoreAsComplex(const float* data, static void SubbandCoherenceSSE2(AecCore* aec, float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], float xfw[2][PART_LEN1], float* fft, float* cohde, - float* cohxd) { - float dfw[2][PART_LEN1]; + float* cohxd, + int* extreme_filter_divergence) { int i; - if (aec->delayEstCtr == 0) - aec->delayIdx = PartitionDelay(aec); - - // Use delayed far. - memcpy(xfw, - aec->xfwBuf + aec->delayIdx * PART_LEN1, - sizeof(xfw[0][0]) * 2 * PART_LEN1); - - // Windowed near fft - WindowData(fft, aec->dBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, dfw); - - // Windowed error fft - WindowData(fft, aec->eBuf); - aec_rdft_forward_128(fft); - StoreAsComplex(fft, efw); - - SmoothedPSD(aec, efw, dfw, xfw); + SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence); { const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f); @@ -728,4 +722,7 @@ void WebRtcAec_InitAec_SSE2(void) { WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; + WebRtcAec_StoreAsComplex = StoreAsComplexSSE2; + WebRtcAec_PartitionDelay = PartitionDelaySSE2; + WebRtcAec_WindowData = WindowDataSSE2; } diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation.c b/webrtc/modules/audio_processing/aec/echo_cancellation.c index 0f5cd31ddb..aab1718b24 100644 --- a/webrtc/modules/audio_processing/aec/echo_cancellation.c +++ b/webrtc/modules/audio_processing/aec/echo_cancellation.c @@ -11,7 +11,7 @@ /* * Contains the API functions for the AEC. */ -#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" +#include "webrtc/modules/audio_processing/aec/echo_cancellation.h" #include <math.h> #ifdef WEBRTC_AEC_DEBUG_DUMP @@ -146,7 +146,6 @@ void* WebRtcAec_Create() { } aecpc->initFlag = 0; - aecpc->lastError = 0; #ifdef WEBRTC_AEC_DEBUG_DUMP { @@ -192,26 +191,22 @@ int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) { sampFreq != 16000 && sampFreq != 32000 && sampFreq != 48000) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } aecpc->sampFreq = sampFreq; if (scSampFreq < 1 || scSampFreq > 96000) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } aecpc->scSampFreq = scSampFreq; // Initialize echo canceller core if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; + return AEC_UNSPECIFIED_ERROR; } if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; + return AEC_UNSPECIFIED_ERROR; } WebRtc_InitBuffer(aecpc->far_pre_buf); @@ -261,13 +256,32 @@ int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) { aecConfig.delay_logging = kAecFalse; if (WebRtcAec_set_config(aecpc, aecConfig) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; + return AEC_UNSPECIFIED_ERROR; } return 0; } +// Returns any error that is caused when buffering the +// far-end signal. +int32_t WebRtcAec_GetBufferFarendError(void* aecInst, + const float* farend, + size_t nrOfSamples) { + Aec* aecpc = aecInst; + + if (!farend) + return AEC_NULL_POINTER_ERROR; + + if (aecpc->initFlag != initCheck) + return AEC_UNINITIALIZED_ERROR; + + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) + return AEC_BAD_PARAMETER_ERROR; + + return 0; +} + // only buffer L band for farend int32_t WebRtcAec_BufferFarend(void* aecInst, const float* farend, @@ -277,21 +291,13 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, float new_farend[MAX_RESAMP_LEN]; const float* farend_ptr = farend; - if (farend == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } + // Get any error caused by buffering the farend signal. + int32_t error_code = WebRtcAec_GetBufferFarendError(aecInst, farend, + nrOfSamples); - if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; - } + if (error_code != 0) + return error_code; - // number of samples == 160 for SWB input - if (nrOfSamples != 80 && nrOfSamples != 160) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { // Resample and get a new number of samples @@ -311,7 +317,8 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, // Write the time-domain data to |far_pre_buf|. WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples); - // Transform to frequency domain if we have enough data. + // TODO(minyue): reduce to |PART_LEN| samples for each buffering, when + // WebRtcAec_BufferFarendPartition() is changed to take |PART_LEN| samples. while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) { // We have enough data to pass to the FFT, hence read PART_LEN2 samples. { @@ -319,10 +326,6 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, float tmp[PART_LEN2]; WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2); WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp); -#ifdef WEBRTC_AEC_DEBUG_DUMP - WebRtc_WriteBuffer( - WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1); -#endif } // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing. @@ -343,29 +346,24 @@ int32_t WebRtcAec_Process(void* aecInst, int32_t retVal = 0; if (out == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; + return AEC_NULL_POINTER_ERROR; } if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; + return AEC_UNINITIALIZED_ERROR; } // number of samples == 160 for SWB input if (nrOfSamples != 80 && nrOfSamples != 160) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } if (msInSndCardBuf < 0) { msInSndCardBuf = 0; - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; - retVal = -1; + retVal = AEC_BAD_PARAMETER_WARNING; } else if (msInSndCardBuf > kMaxTrustedDelayMs) { // The clamping is now done in ProcessExtended/Normal(). - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; - retVal = -1; + retVal = AEC_BAD_PARAMETER_WARNING; } // This returns the value of aec->extended_filter_enabled. @@ -378,15 +376,13 @@ int32_t WebRtcAec_Process(void* aecInst, msInSndCardBuf, skew); } else { - if (ProcessNormal(aecpc, - nearend, - num_bands, - out, - nrOfSamples, - msInSndCardBuf, - skew) != 0) { - retVal = -1; - } + retVal = ProcessNormal(aecpc, + nearend, + num_bands, + out, + nrOfSamples, + msInSndCardBuf, + skew); } #ifdef WEBRTC_AEC_DEBUG_DUMP @@ -405,31 +401,26 @@ int32_t WebRtcAec_Process(void* aecInst, int WebRtcAec_set_config(void* handle, AecConfig config) { Aec* self = (Aec*)handle; if (self->initFlag != initCheck) { - self->lastError = AEC_UNINITIALIZED_ERROR; - return -1; + return AEC_UNINITIALIZED_ERROR; } if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) { - self->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } self->skewMode = config.skewMode; if (config.nlpMode != kAecNlpConservative && config.nlpMode != kAecNlpModerate && config.nlpMode != kAecNlpAggressive) { - self->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) { - self->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) { - self->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; + return AEC_BAD_PARAMETER_ERROR; } WebRtcAec_SetConfigCore( @@ -440,12 +431,10 @@ int WebRtcAec_set_config(void* handle, AecConfig config) { int WebRtcAec_get_echo_status(void* handle, int* status) { Aec* self = (Aec*)handle; if (status == NULL) { - self->lastError = AEC_NULL_POINTER_ERROR; - return -1; + return AEC_NULL_POINTER_ERROR; } if (self->initFlag != initCheck) { - self->lastError = AEC_UNINITIALIZED_ERROR; - return -1; + return AEC_UNINITIALIZED_ERROR; } *status = WebRtcAec_echo_state(self->aec); @@ -466,12 +455,10 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) { return -1; } if (metrics == NULL) { - self->lastError = AEC_NULL_POINTER_ERROR; - return -1; + return AEC_NULL_POINTER_ERROR; } if (self->initFlag != initCheck) { - self->lastError = AEC_UNINITIALIZED_ERROR; - return -1; + return AEC_UNINITIALIZED_ERROR; } WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp); @@ -556,32 +543,24 @@ int WebRtcAec_GetDelayMetrics(void* handle, float* fraction_poor_delays) { Aec* self = handle; if (median == NULL) { - self->lastError = AEC_NULL_POINTER_ERROR; - return -1; + return AEC_NULL_POINTER_ERROR; } if (std == NULL) { - self->lastError = AEC_NULL_POINTER_ERROR; - return -1; + return AEC_NULL_POINTER_ERROR; } if (self->initFlag != initCheck) { - self->lastError = AEC_UNINITIALIZED_ERROR; - return -1; + return AEC_UNINITIALIZED_ERROR; } if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std, fraction_poor_delays) == -1) { // Logging disabled. - self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR; - return -1; + return AEC_UNSUPPORTED_FUNCTION_ERROR; } return 0; } -int32_t WebRtcAec_get_error_code(void* aecInst) { - Aec* aecpc = aecInst; - return aecpc->lastError; -} AecCore* WebRtcAec_aec_core(void* handle) { if (!handle) { @@ -617,7 +596,7 @@ static int ProcessNormal(Aec* aecpc, retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew); if (retVal == -1) { aecpc->skew = 0; - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + retVal = AEC_BAD_PARAMETER_WARNING; } aecpc->skew /= aecpc->sampFactor * nrOfSamples; diff --git a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h b/webrtc/modules/audio_processing/aec/echo_cancellation.h index a340cf84d0..de84b2e6d1 100644 --- a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h +++ b/webrtc/modules/audio_processing/aec/echo_cancellation.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_ #include <stddef.h> @@ -109,13 +109,32 @@ int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq); * Outputs Description * ------------------------------------------------------------------- * int32_t return 0: OK - * -1: error + * 12000-12050: error code */ int32_t WebRtcAec_BufferFarend(void* aecInst, const float* farend, size_t nrOfSamples); /* + * Reports any errors that would arise if buffering a farend buffer + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * const float* farend In buffer containing one frame of + * farend signal for L band + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 12000-12050: error code + */ +int32_t WebRtcAec_GetBufferFarendError(void* aecInst, + const float* farend, + size_t nrOfSamples); + +/* * Runs the echo canceller on an 80 or 160 sample blocks of data. * * Inputs Description @@ -136,7 +155,7 @@ int32_t WebRtcAec_BufferFarend(void* aecInst, * float* const* out Out buffer, one frame of processed nearend * for each band * int32_t return 0: OK - * -1: error + * 12000-12050: error code */ int32_t WebRtcAec_Process(void* aecInst, const float* const* nearend, @@ -157,8 +176,8 @@ int32_t WebRtcAec_Process(void* aecInst, * * Outputs Description * ------------------------------------------------------------------- - * int return 0: OK - * -1: error + * int return 0: OK + * 12000-12050: error code */ int WebRtcAec_set_config(void* handle, AecConfig config); @@ -173,8 +192,8 @@ int WebRtcAec_set_config(void* handle, AecConfig config); * ------------------------------------------------------------------- * int* status 0: Almost certainly nearend single-talk * 1: Might not be neared single-talk - * int return 0: OK - * -1: error + * int return 0: OK + * 12000-12050: error code */ int WebRtcAec_get_echo_status(void* handle, int* status); @@ -189,8 +208,8 @@ int WebRtcAec_get_echo_status(void* handle, int* status); * ------------------------------------------------------------------- * AecMetrics* metrics Struct which will be filled out with the * current echo metrics. - * int return 0: OK - * -1: error + * int return 0: OK + * 12000-12050: error code */ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics); @@ -208,27 +227,14 @@ int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics); * float* fraction_poor_delays Fraction of the delay estimates that may * cause the AEC to perform poorly. * - * int return 0: OK - * -1: error + * int return 0: OK + * 12000-12050: error code */ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std, float* fraction_poor_delays); -/* - * Gets the last error code. - * - * Inputs Description - * ------------------------------------------------------------------- - * void* aecInst Pointer to the AEC instance - * - * Outputs Description - * ------------------------------------------------------------------- - * int32_t return 11000-11100: error code - */ -int32_t WebRtcAec_get_error_code(void* aecInst); - // Returns a pointer to the low level AEC handle. // // Input: @@ -242,4 +248,4 @@ struct AecCore* WebRtcAec_aec_core(void* handle); #ifdef __cplusplus } #endif -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_H_ diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h b/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h index 95a6cf3324..e87219f33d 100644 --- a/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h +++ b/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h @@ -57,8 +57,6 @@ typedef struct { RingBuffer* far_pre_buf; // Time domain far-end pre-buffer. - int lastError; - int farend_started; AecCore* aec; diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc b/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc index 315ac3e9f9..42db082ff9 100644 --- a/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc +++ b/webrtc/modules/audio_processing/aec/echo_cancellation_unittest.cc @@ -10,7 +10,7 @@ // TODO(bjornv): Make this a comprehensive test. -#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" +#include "webrtc/modules/audio_processing/aec/echo_cancellation.h" #include <stdlib.h> #include <time.h> diff --git a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc index 07e3cf8add..567118d828 100644 --- a/webrtc/modules/audio_processing/aec/system_delay_unittest.cc +++ b/webrtc/modules/audio_processing/aec/system_delay_unittest.cc @@ -13,8 +13,7 @@ extern "C" { #include "webrtc/modules/audio_processing/aec/aec_core.h" } #include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h" -#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" -#include "webrtc/test/testsupport/gtest_disable.h" +#include "webrtc/modules/audio_processing/aec/echo_cancellation.h" #include "webrtc/typedefs.h" namespace { diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.c b/webrtc/modules/audio_processing/aecm/aecm_core.c index f0d85d5328..6bf1cf7f3e 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core.c +++ b/webrtc/modules/audio_processing/aecm/aecm_core.c @@ -16,7 +16,7 @@ #include "webrtc/common_audio/ring_buffer.h" #include "webrtc/common_audio/signal_processing/include/real_fft.h" -#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/aecm/echo_control_mobile.h" #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" #include "webrtc/system_wrappers/include/compile_assert_c.h" #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_c.c b/webrtc/modules/audio_processing/aecm/aecm_core_c.c index df95e8bedf..3a8fafa4ec 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core_c.c +++ b/webrtc/modules/audio_processing/aecm/aecm_core_c.c @@ -16,7 +16,7 @@ #include "webrtc/common_audio/ring_buffer.h" #include "webrtc/common_audio/signal_processing/include/real_fft.h" -#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/aecm/echo_control_mobile.h" #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" #include "webrtc/system_wrappers/include/compile_assert_c.h" #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_mips.c b/webrtc/modules/audio_processing/aecm/aecm_core_mips.c index 3c2343a892..3ca9982ebf 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core_mips.c +++ b/webrtc/modules/audio_processing/aecm/aecm_core_mips.c @@ -12,7 +12,7 @@ #include <assert.h> -#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/aecm/echo_control_mobile.h" #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { diff --git a/webrtc/modules/audio_processing/aecm/echo_control_mobile.c b/webrtc/modules/audio_processing/aecm/echo_control_mobile.c index 83781e97fe..91e6f0e80c 100644 --- a/webrtc/modules/audio_processing/aecm/echo_control_mobile.c +++ b/webrtc/modules/audio_processing/aecm/echo_control_mobile.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/aecm/echo_control_mobile.h" #ifdef AEC_DEBUG #include <stdio.h> @@ -68,8 +68,6 @@ typedef struct // Structures RingBuffer *farendBuf; - int lastError; - AecmCore* aecmCore; } AecMobile; @@ -100,7 +98,6 @@ void* WebRtcAecm_Create() { } aecm->initFlag = 0; - aecm->lastError = 0; #ifdef AEC_DEBUG aecm->aecmCore->farFile = fopen("aecFar.pcm","wb"); @@ -151,16 +148,14 @@ int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq) if (sampFreq != 8000 && sampFreq != 16000) { - aecm->lastError = AECM_BAD_PARAMETER_ERROR; - return -1; + return AECM_BAD_PARAMETER_ERROR; } aecm->sampFreq = sampFreq; // Initialize AECM core if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1) { - aecm->lastError = AECM_UNSPECIFIED_ERROR; - return -1; + return AECM_UNSPECIFIED_ERROR; } // Initialize farend buffer @@ -191,51 +186,53 @@ int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq) if (WebRtcAecm_set_config(aecm, aecConfig) == -1) { - aecm->lastError = AECM_UNSPECIFIED_ERROR; - return -1; + return AECM_UNSPECIFIED_ERROR; } return 0; } -int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend, - size_t nrOfSamples) -{ +// Returns any error that is caused when buffering the +// farend signal. +int32_t WebRtcAecm_GetBufferFarendError(void *aecmInst, const int16_t *farend, + size_t nrOfSamples) { AecMobile* aecm = aecmInst; - int32_t retVal = 0; - if (aecm == NULL) - { - return -1; - } + if (aecm == NULL) + return -1; - if (farend == NULL) - { - aecm->lastError = AECM_NULL_POINTER_ERROR; - return -1; - } + if (farend == NULL) + return AECM_NULL_POINTER_ERROR; - if (aecm->initFlag != kInitCheck) - { - aecm->lastError = AECM_UNINITIALIZED_ERROR; - return -1; - } + if (aecm->initFlag != kInitCheck) + return AECM_UNINITIALIZED_ERROR; - if (nrOfSamples != 80 && nrOfSamples != 160) - { - aecm->lastError = AECM_BAD_PARAMETER_ERROR; - return -1; - } + if (nrOfSamples != 80 && nrOfSamples != 160) + return AECM_BAD_PARAMETER_ERROR; - // TODO: Is this really a good idea? - if (!aecm->ECstartup) - { - WebRtcAecm_DelayComp(aecm); - } + return 0; +} - WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples); - return retVal; +int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend, + size_t nrOfSamples) { + AecMobile* aecm = aecmInst; + + const int32_t err = + WebRtcAecm_GetBufferFarendError(aecmInst, farend, nrOfSamples); + + if (err != 0) + return err; + + // TODO(unknown): Is this really a good idea? + if (!aecm->ECstartup) + { + WebRtcAecm_DelayComp(aecm); + } + + WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples); + + return 0; } int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy, @@ -259,38 +256,32 @@ int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy, if (nearendNoisy == NULL) { - aecm->lastError = AECM_NULL_POINTER_ERROR; - return -1; + return AECM_NULL_POINTER_ERROR; } if (out == NULL) { - aecm->lastError = AECM_NULL_POINTER_ERROR; - return -1; + return AECM_NULL_POINTER_ERROR; } if (aecm->initFlag != kInitCheck) { - aecm->lastError = AECM_UNINITIALIZED_ERROR; - return -1; + return AECM_UNINITIALIZED_ERROR; } if (nrOfSamples != 80 && nrOfSamples != 160) { - aecm->lastError = AECM_BAD_PARAMETER_ERROR; - return -1; + return AECM_BAD_PARAMETER_ERROR; } if (msInSndCardBuf < 0) { msInSndCardBuf = 0; - aecm->lastError = AECM_BAD_PARAMETER_WARNING; - retVal = -1; + retVal = AECM_BAD_PARAMETER_WARNING; } else if (msInSndCardBuf > 500) { msInSndCardBuf = 500; - aecm->lastError = AECM_BAD_PARAMETER_WARNING; - retVal = -1; + retVal = AECM_BAD_PARAMETER_WARNING; } msInSndCardBuf += 10; aecm->msInSndCardBuf = msInSndCardBuf; @@ -453,21 +444,18 @@ int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config) if (aecm->initFlag != kInitCheck) { - aecm->lastError = AECM_UNINITIALIZED_ERROR; - return -1; + return AECM_UNINITIALIZED_ERROR; } if (config.cngMode != AecmFalse && config.cngMode != AecmTrue) { - aecm->lastError = AECM_BAD_PARAMETER_ERROR; - return -1; + return AECM_BAD_PARAMETER_ERROR; } aecm->aecmCore->cngMode = config.cngMode; if (config.echoMode < 0 || config.echoMode > 4) { - aecm->lastError = AECM_BAD_PARAMETER_ERROR; - return -1; + return AECM_BAD_PARAMETER_ERROR; } aecm->echoMode = config.echoMode; @@ -524,33 +512,6 @@ int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config) return 0; } -int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config) -{ - AecMobile* aecm = aecmInst; - - if (aecm == NULL) - { - return -1; - } - - if (config == NULL) - { - aecm->lastError = AECM_NULL_POINTER_ERROR; - return -1; - } - - if (aecm->initFlag != kInitCheck) - { - aecm->lastError = AECM_UNINITIALIZED_ERROR; - return -1; - } - - config->cngMode = aecm->aecmCore->cngMode; - config->echoMode = aecm->echoMode; - - return 0; -} - int32_t WebRtcAecm_InitEchoPath(void* aecmInst, const void* echo_path, size_t size_bytes) @@ -562,19 +523,16 @@ int32_t WebRtcAecm_InitEchoPath(void* aecmInst, return -1; } if (echo_path == NULL) { - aecm->lastError = AECM_NULL_POINTER_ERROR; - return -1; + return AECM_NULL_POINTER_ERROR; } if (size_bytes != WebRtcAecm_echo_path_size_bytes()) { // Input channel size does not match the size of AECM - aecm->lastError = AECM_BAD_PARAMETER_ERROR; - return -1; + return AECM_BAD_PARAMETER_ERROR; } if (aecm->initFlag != kInitCheck) { - aecm->lastError = AECM_UNINITIALIZED_ERROR; - return -1; + return AECM_UNINITIALIZED_ERROR; } WebRtcAecm_InitEchoPathCore(aecm->aecmCore, echo_path_ptr); @@ -593,19 +551,16 @@ int32_t WebRtcAecm_GetEchoPath(void* aecmInst, return -1; } if (echo_path == NULL) { - aecm->lastError = AECM_NULL_POINTER_ERROR; - return -1; + return AECM_NULL_POINTER_ERROR; } if (size_bytes != WebRtcAecm_echo_path_size_bytes()) { // Input channel size does not match the size of AECM - aecm->lastError = AECM_BAD_PARAMETER_ERROR; - return -1; + return AECM_BAD_PARAMETER_ERROR; } if (aecm->initFlag != kInitCheck) { - aecm->lastError = AECM_UNINITIALIZED_ERROR; - return -1; + return AECM_UNINITIALIZED_ERROR; } memcpy(echo_path_ptr, aecm->aecmCore->channelStored, size_bytes); @@ -617,17 +572,6 @@ size_t WebRtcAecm_echo_path_size_bytes() return (PART_LEN1 * sizeof(int16_t)); } -int32_t WebRtcAecm_get_error_code(void *aecmInst) -{ - AecMobile* aecm = aecmInst; - - if (aecm == NULL) - { - return -1; - } - - return aecm->lastError; -} static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) { short delayNew, nSampSndCard; diff --git a/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h b/webrtc/modules/audio_processing/aecm/echo_control_mobile.h index 7ae15c2a3d..b45ff59907 100644 --- a/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h +++ b/webrtc/modules/audio_processing/aecm/echo_control_mobile.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ #include <stdlib.h> @@ -66,7 +66,7 @@ void WebRtcAecm_Free(void* aecmInst); * Outputs Description * ------------------------------------------------------------------- * int32_t return 0: OK - * -1: error + * 1200-12004,12100: error/warning */ int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq); @@ -83,13 +83,32 @@ int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq); * Outputs Description * ------------------------------------------------------------------- * int32_t return 0: OK - * -1: error + * 1200-12004,12100: error/warning */ int32_t WebRtcAecm_BufferFarend(void* aecmInst, const int16_t* farend, size_t nrOfSamples); /* + * Reports any errors that would arise when buffering a farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecmInst Pointer to the AECM instance + * int16_t* farend In buffer containing one frame of + * farend signal + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * 1200-12004,12100: error/warning + */ +int32_t WebRtcAecm_GetBufferFarendError(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples); + +/* * Runs the AECM on an 80 or 160 sample blocks of data. * * Inputs Description @@ -112,7 +131,7 @@ int32_t WebRtcAecm_BufferFarend(void* aecmInst, * ------------------------------------------------------------------- * int16_t* out Out buffer, one frame of processed nearend * int32_t return 0: OK - * -1: error + * 1200-12004,12100: error/warning */ int32_t WebRtcAecm_Process(void* aecmInst, const int16_t* nearendNoisy, @@ -133,27 +152,11 @@ int32_t WebRtcAecm_Process(void* aecmInst, * Outputs Description * ------------------------------------------------------------------- * int32_t return 0: OK - * -1: error + * 1200-12004,12100: error/warning */ int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config); /* - * This function enables the user to set certain parameters on-the-fly - * - * Inputs Description - * ------------------------------------------------------------------- - * void* aecmInst Pointer to the AECM instance - * - * Outputs Description - * ------------------------------------------------------------------- - * AecmConfig* config Pointer to the config instance that - * all properties will be written to - * int32_t return 0: OK - * -1: error - */ -int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config); - -/* * This function enables the user to set the echo path on-the-fly. * * Inputs Description @@ -165,7 +168,7 @@ int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config); * Outputs Description * ------------------------------------------------------------------- * int32_t return 0: OK - * -1: error + * 1200-12004,12100: error/warning */ int32_t WebRtcAecm_InitEchoPath(void* aecmInst, const void* echo_path, @@ -184,7 +187,7 @@ int32_t WebRtcAecm_InitEchoPath(void* aecmInst, * Outputs Description * ------------------------------------------------------------------- * int32_t return 0: OK - * -1: error + * 1200-12004,12100: error/warning */ int32_t WebRtcAecm_GetEchoPath(void* aecmInst, void* echo_path, @@ -199,20 +202,8 @@ int32_t WebRtcAecm_GetEchoPath(void* aecmInst, */ size_t WebRtcAecm_echo_path_size_bytes(); -/* - * Gets the last error code. - * - * Inputs Description - * ------------------------------------------------------------------- - * void* aecmInst Pointer to the AECM instance - * - * Outputs Description - * ------------------------------------------------------------------- - * int32_t return 11000-11100: error code - */ -int32_t WebRtcAecm_get_error_code(void *aecmInst); #ifdef __cplusplus } #endif -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_ECHO_CONTROL_MOBILE_H_ diff --git a/webrtc/modules/audio_processing/agc/agc.cc b/webrtc/modules/audio_processing/agc/agc.cc index 706b963aa1..fc78f07ebb 100644 --- a/webrtc/modules/audio_processing/agc/agc.cc +++ b/webrtc/modules/audio_processing/agc/agc.cc @@ -19,7 +19,7 @@ #include "webrtc/base/checks.h" #include "webrtc/modules/audio_processing/agc/histogram.h" #include "webrtc/modules/audio_processing/agc/utility.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" namespace webrtc { namespace { diff --git a/webrtc/modules/audio_processing/agc/agc_manager_direct.cc b/webrtc/modules/audio_processing/agc/agc_manager_direct.cc index 867022dcbf..e56984a1b1 100644 --- a/webrtc/modules/audio_processing/agc/agc_manager_direct.cc +++ b/webrtc/modules/audio_processing/agc/agc_manager_direct.cc @@ -19,7 +19,7 @@ #include "webrtc/modules/audio_processing/agc/gain_map_internal.h" #include "webrtc/modules/audio_processing/gain_control_impl.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" #include "webrtc/system_wrappers/include/logging.h" namespace webrtc { @@ -168,19 +168,19 @@ int AgcManagerDirect::Initialize() { // example, what happens when we change devices. if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) { - LOG_FERR1(LS_ERROR, set_mode, GainControl::kFixedDigital); + LOG(LS_ERROR) << "set_mode(GainControl::kFixedDigital) failed."; return -1; } if (gctrl_->set_target_level_dbfs(2) != 0) { - LOG_FERR1(LS_ERROR, set_target_level_dbfs, 2); + LOG(LS_ERROR) << "set_target_level_dbfs(2) failed."; return -1; } if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) { - LOG_FERR1(LS_ERROR, set_compression_gain_db, kDefaultCompressionGain); + LOG(LS_ERROR) << "set_compression_gain_db(kDefaultCompressionGain) failed."; return -1; } if (gctrl_->enable_limiter(true) != 0) { - LOG_FERR1(LS_ERROR, enable_limiter, true); + LOG(LS_ERROR) << "enable_limiter(true) failed."; return -1; } return 0; @@ -244,7 +244,7 @@ void AgcManagerDirect::Process(const int16_t* audio, } if (agc_->Process(audio, length, sample_rate_hz) != 0) { - LOG_FERR0(LS_ERROR, Agc::Process); + LOG(LS_ERROR) << "Agc::Process failed"; assert(false); } @@ -434,7 +434,8 @@ void AgcManagerDirect::UpdateCompressor() { compression_ = new_compression; compression_accumulator_ = new_compression; if (gctrl_->set_compression_gain_db(compression_) != 0) { - LOG_FERR1(LS_ERROR, set_compression_gain_db, compression_); + LOG(LS_ERROR) << "set_compression_gain_db(" << compression_ + << ") failed."; } } } diff --git a/webrtc/modules/audio_processing/agc/agc_unittest.cc b/webrtc/modules/audio_processing/agc/agc_unittest.cc index 66a8a2b1b3..25b99d8773 100644 --- a/webrtc/modules/audio_processing/agc/agc_unittest.cc +++ b/webrtc/modules/audio_processing/agc/agc_unittest.cc @@ -13,7 +13,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" #include "webrtc/test/testsupport/fileutils.h" #include "webrtc/tools/agc/test_utils.h" diff --git a/webrtc/modules/audio_processing/agc/histogram.cc b/webrtc/modules/audio_processing/agc/histogram.cc index 1d3035fe12..5c66727a9f 100644 --- a/webrtc/modules/audio_processing/agc/histogram.cc +++ b/webrtc/modules/audio_processing/agc/histogram.cc @@ -13,7 +13,7 @@ #include <cmath> #include <cstring> -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" namespace webrtc { diff --git a/webrtc/modules/audio_processing/agc/legacy/analog_agc.c b/webrtc/modules/audio_processing/agc/legacy/analog_agc.c index be644d9701..3a1dc9d5ce 100644 --- a/webrtc/modules/audio_processing/agc/legacy/analog_agc.c +++ b/webrtc/modules/audio_processing/agc/legacy/analog_agc.c @@ -250,34 +250,35 @@ int WebRtcAgc_AddMic(void *state, int16_t* const* in_mic, size_t num_bands, return 0; } -int WebRtcAgc_AddFarend(void *state, const int16_t *in_far, size_t samples) -{ +int WebRtcAgc_AddFarend(void *state, const int16_t *in_far, size_t samples) { + LegacyAgc* stt = (LegacyAgc*)state; + + int err = WebRtcAgc_GetAddFarendError(state, samples); + + if (err != 0) + return err; + + return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples); +} + +int WebRtcAgc_GetAddFarendError(void *state, size_t samples) { LegacyAgc* stt; stt = (LegacyAgc*)state; - if (stt == NULL) - { - return -1; - } + if (stt == NULL) + return -1; - if (stt->fs == 8000) - { - if (samples != 80) - { - return -1; - } - } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) - { - if (samples != 160) - { - return -1; - } - } else - { - return -1; - } + if (stt->fs == 8000) { + if (samples != 80) + return -1; + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { + if (samples != 160) + return -1; + } else { + return -1; + } - return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples); + return 0; } int WebRtcAgc_VirtualMic(void *agcInst, int16_t* const* in_near, diff --git a/webrtc/modules/audio_processing/agc/legacy/gain_control.h b/webrtc/modules/audio_processing/agc/legacy/gain_control.h index 08c1988f01..db942fe5ec 100644 --- a/webrtc/modules/audio_processing/agc/legacy/gain_control.h +++ b/webrtc/modules/audio_processing/agc/legacy/gain_control.h @@ -50,6 +50,20 @@ extern "C" #endif /* + * This function analyses the number of samples passed to + * farend and produces any error code that could arise. + * + * Input: + * - agcInst : AGC instance. + * - samples : Number of samples in input vector. + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error. + */ +int WebRtcAgc_GetAddFarendError(void* state, size_t samples); + +/* * This function processes a 10 ms frame of far-end speech to determine * if there is active speech. The length of the input speech vector must be * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or diff --git a/webrtc/modules/audio_processing/agc/mock_agc.h b/webrtc/modules/audio_processing/agc/mock_agc.h index 13dbd2edd5..e362200d86 100644 --- a/webrtc/modules/audio_processing/agc/mock_agc.h +++ b/webrtc/modules/audio_processing/agc/mock_agc.h @@ -14,7 +14,7 @@ #include "webrtc/modules/audio_processing/agc/agc.h" #include "gmock/gmock.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" namespace webrtc { diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index 81790a159b..ff64267e8c 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -26,7 +26,7 @@ const size_t kSamplesPer48kHzChannel = 480; int KeyboardChannelIndex(const StreamConfig& stream_config) { if (!stream_config.has_keyboard()) { assert(false); - return -1; + return 0; } return stream_config.num_channels(); @@ -44,9 +44,9 @@ size_t NumBandsFromSamplesPerChannel(size_t num_frames) { } // namespace AudioBuffer::AudioBuffer(size_t input_num_frames, - int num_input_channels, + size_t num_input_channels, size_t process_num_frames, - int num_process_channels, + size_t num_process_channels, size_t output_num_frames) : input_num_frames_(input_num_frames), num_input_channels_(num_input_channels), @@ -74,7 +74,7 @@ AudioBuffer::AudioBuffer(size_t input_num_frames, num_proc_channels_)); if (input_num_frames_ != proc_num_frames_) { - for (int i = 0; i < num_proc_channels_; ++i) { + for (size_t i = 0; i < num_proc_channels_; ++i) { input_resamplers_.push_back( new PushSincResampler(input_num_frames_, proc_num_frames_)); @@ -82,7 +82,7 @@ AudioBuffer::AudioBuffer(size_t input_num_frames, } if (output_num_frames_ != proc_num_frames_) { - for (int i = 0; i < num_proc_channels_; ++i) { + for (size_t i = 0; i < num_proc_channels_; ++i) { output_resamplers_.push_back( new PushSincResampler(proc_num_frames_, output_num_frames_)); @@ -130,7 +130,7 @@ void AudioBuffer::CopyFrom(const float* const* data, // Resample. if (input_num_frames_ != proc_num_frames_) { - for (int i = 0; i < num_proc_channels_; ++i) { + for (size_t i = 0; i < num_proc_channels_; ++i) { input_resamplers_[i]->Resample(data_ptr[i], input_num_frames_, process_buffer_->channels()[i], @@ -140,7 +140,7 @@ void AudioBuffer::CopyFrom(const float* const* data, } // Convert to the S16 range. - for (int i = 0; i < num_proc_channels_; ++i) { + for (size_t i = 0; i < num_proc_channels_; ++i) { FloatToFloatS16(data_ptr[i], proc_num_frames_, data_->fbuf()->channels()[i]); @@ -150,7 +150,7 @@ void AudioBuffer::CopyFrom(const float* const* data, void AudioBuffer::CopyTo(const StreamConfig& stream_config, float* const* data) { assert(stream_config.num_frames() == output_num_frames_); - assert(stream_config.num_channels() == num_channels_); + assert(stream_config.num_channels() == num_channels_ || num_channels_ == 1); // Convert to the float range. float* const* data_ptr = data; @@ -158,7 +158,7 @@ void AudioBuffer::CopyTo(const StreamConfig& stream_config, // Convert to an intermediate buffer for subsequent resampling. data_ptr = process_buffer_->channels(); } - for (int i = 0; i < num_channels_; ++i) { + for (size_t i = 0; i < num_channels_; ++i) { FloatS16ToFloat(data_->fbuf()->channels()[i], proc_num_frames_, data_ptr[i]); @@ -166,13 +166,18 @@ void AudioBuffer::CopyTo(const StreamConfig& stream_config, // Resample. if (output_num_frames_ != proc_num_frames_) { - for (int i = 0; i < num_channels_; ++i) { + for (size_t i = 0; i < num_channels_; ++i) { output_resamplers_[i]->Resample(data_ptr[i], proc_num_frames_, data[i], output_num_frames_); } } + + // Upmix. + for (size_t i = num_channels_; i < stream_config.num_channels(); ++i) { + memcpy(data[i], data[0], output_num_frames_ * sizeof(**data)); + } } void AudioBuffer::InitForNewData() { @@ -192,13 +197,13 @@ int16_t* const* AudioBuffer::channels() { return data_->ibuf()->channels(); } -const int16_t* const* AudioBuffer::split_bands_const(int channel) const { +const int16_t* const* AudioBuffer::split_bands_const(size_t channel) const { return split_data_.get() ? split_data_->ibuf_const()->bands(channel) : data_->ibuf_const()->bands(channel); } -int16_t* const* AudioBuffer::split_bands(int channel) { +int16_t* const* AudioBuffer::split_bands(size_t channel) { mixed_low_pass_valid_ = false; return split_data_.get() ? split_data_->ibuf()->bands(channel) : @@ -249,13 +254,13 @@ float* const* AudioBuffer::channels_f() { return data_->fbuf()->channels(); } -const float* const* AudioBuffer::split_bands_const_f(int channel) const { +const float* const* AudioBuffer::split_bands_const_f(size_t channel) const { return split_data_.get() ? split_data_->fbuf_const()->bands(channel) : data_->fbuf_const()->bands(channel); } -float* const* AudioBuffer::split_bands_f(int channel) { +float* const* AudioBuffer::split_bands_f(size_t channel) { mixed_low_pass_valid_ = false; return split_data_.get() ? split_data_->fbuf()->bands(channel) : @@ -336,11 +341,11 @@ AudioFrame::VADActivity AudioBuffer::activity() const { return activity_; } -int AudioBuffer::num_channels() const { +size_t AudioBuffer::num_channels() const { return num_channels_; } -void AudioBuffer::set_num_channels(int num_channels) { +void AudioBuffer::set_num_channels(size_t num_channels) { num_channels_ = num_channels; } @@ -393,7 +398,7 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { // Resample. if (input_num_frames_ != proc_num_frames_) { - for (int i = 0; i < num_proc_channels_; ++i) { + for (size_t i = 0; i < num_proc_channels_; ++i) { input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i], input_num_frames_, data_->fbuf()->channels()[i], @@ -418,7 +423,7 @@ void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) { output_buffer_.reset( new IFChannelBuffer(output_num_frames_, num_channels_)); } - for (int i = 0; i < num_channels_; ++i) { + for (size_t i = 0; i < num_channels_; ++i) { output_resamplers_[i]->Resample( data_->fbuf()->channels()[i], proc_num_frames_, output_buffer_->fbuf()->channels()[i], output_num_frames_); @@ -443,7 +448,7 @@ void AudioBuffer::CopyLowPassToReference() { new ChannelBuffer<int16_t>(num_split_frames_, num_proc_channels_)); } - for (int i = 0; i < num_proc_channels_; i++) { + for (size_t i = 0; i < num_proc_channels_; i++) { memcpy(low_pass_reference_channels_->channels()[i], split_bands_const(i)[kBand0To8kHz], low_pass_reference_channels_->num_frames_per_band() * diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index 864633f267..ff12ca2d95 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -15,7 +15,7 @@ #include "webrtc/common_audio/channel_buffer.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/audio_processing/splitting_filter.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" #include "webrtc/system_wrappers/include/scoped_vector.h" #include "webrtc/typedefs.h" @@ -34,14 +34,14 @@ class AudioBuffer { public: // TODO(ajm): Switch to take ChannelLayouts. AudioBuffer(size_t input_num_frames, - int num_input_channels, + size_t num_input_channels, size_t process_num_frames, - int num_process_channels, + size_t num_process_channels, size_t output_num_frames); virtual ~AudioBuffer(); - int num_channels() const; - void set_num_channels(int num_channels); + size_t num_channels() const; + void set_num_channels(size_t num_channels); size_t num_frames() const; size_t num_frames_per_band() const; size_t num_keyboard_frames() const; @@ -65,10 +65,10 @@ class AudioBuffer { // 0 <= channel < |num_proc_channels_| // 0 <= band < |num_bands_| // 0 <= sample < |num_split_frames_| - int16_t* const* split_bands(int channel); - const int16_t* const* split_bands_const(int channel) const; - float* const* split_bands_f(int channel); - const float* const* split_bands_const_f(int channel) const; + int16_t* const* split_bands(size_t channel); + const int16_t* const* split_bands_const(size_t channel) const; + float* const* split_bands_f(size_t channel); + const float* const* split_bands_const_f(size_t channel) const; // Returns a pointer array to the channels for a specific band. // Usage: @@ -128,16 +128,16 @@ class AudioBuffer { // The audio is passed into DeinterleaveFrom() or CopyFrom() with input // format (samples per channel and number of channels). const size_t input_num_frames_; - const int num_input_channels_; + const size_t num_input_channels_; // The audio is stored by DeinterleaveFrom() or CopyFrom() with processing // format. const size_t proc_num_frames_; - const int num_proc_channels_; + const size_t num_proc_channels_; // The audio is returned by InterleaveTo() and CopyTo() with output samples // per channels and the current number of channels. This last one can be // changed at any time using set_num_channels(). const size_t output_num_frames_; - int num_channels_; + size_t num_channels_; size_t num_bands_; size_t num_split_frames_; diff --git a/webrtc/modules/audio_processing/audio_processing.gypi b/webrtc/modules/audio_processing/audio_processing.gypi index 8f1fbdf0be..7ddd4f5a15 100644 --- a/webrtc/modules/audio_processing/audio_processing.gypi +++ b/webrtc/modules/audio_processing/audio_processing.gypi @@ -41,11 +41,11 @@ 'aec/aec_resampler.h', 'aec/echo_cancellation.c', 'aec/echo_cancellation_internal.h', - 'aec/include/echo_cancellation.h', + 'aec/echo_cancellation.h', 'aecm/aecm_core.c', 'aecm/aecm_core.h', 'aecm/echo_control_mobile.c', - 'aecm/include/echo_control_mobile.h', + 'aecm/echo_control_mobile.h', 'agc/agc.cc', 'agc/agc.h', 'agc/agc_manager_direct.cc', @@ -162,7 +162,7 @@ ['prefer_fixed_point==1', { 'defines': ['WEBRTC_NS_FIXED'], 'sources': [ - 'ns/include/noise_suppression_x.h', + 'ns/noise_suppression_x.h', 'ns/noise_suppression_x.c', 'ns/nsx_core.c', 'ns/nsx_core.h', @@ -183,7 +183,7 @@ 'defines': ['WEBRTC_NS_FLOAT'], 'sources': [ 'ns/defines.h', - 'ns/include/noise_suppression.h', + 'ns/noise_suppression.h', 'ns/noise_suppression.c', 'ns/ns_core.c', 'ns/ns_core.h', diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index c6574151d0..744309c774 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -15,6 +15,7 @@ #include "webrtc/base/checks.h" #include "webrtc/base/platform_file.h" +#include "webrtc/base/trace_event.h" #include "webrtc/common_audio/audio_converter.h" #include "webrtc/common_audio/channel_buffer.h" #include "webrtc/common_audio/include/audio_util.h" @@ -36,8 +37,7 @@ extern "C" { #include "webrtc/modules/audio_processing/processing_component.h" #include "webrtc/modules/audio_processing/transient/transient_suppressor.h" #include "webrtc/modules/audio_processing/voice_detection_impl.h" -#include "webrtc/modules/interface/module_common_types.h" -#include "webrtc/system_wrappers/include/critical_section_wrapper.h" +#include "webrtc/modules/include/module_common_types.h" #include "webrtc/system_wrappers/include/file_wrapper.h" #include "webrtc/system_wrappers/include/logging.h" #include "webrtc/system_wrappers/include/metrics.h" @@ -75,7 +75,6 @@ static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) { assert(false); return false; } - } // namespace // Throughout webrtc, it's assumed that success is represented by zero. @@ -147,6 +146,35 @@ class GainControlForNewAgc : public GainControl, public VolumeCallbacks { int volume_; }; +struct AudioProcessingImpl::ApmPublicSubmodules { + ApmPublicSubmodules() + : echo_cancellation(nullptr), + echo_control_mobile(nullptr), + gain_control(nullptr) {} + // Accessed externally of APM without any lock acquired. + EchoCancellationImpl* echo_cancellation; + EchoControlMobileImpl* echo_control_mobile; + GainControlImpl* gain_control; + rtc::scoped_ptr<HighPassFilterImpl> high_pass_filter; + rtc::scoped_ptr<LevelEstimatorImpl> level_estimator; + rtc::scoped_ptr<NoiseSuppressionImpl> noise_suppression; + rtc::scoped_ptr<VoiceDetectionImpl> voice_detection; + rtc::scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc; + + // Accessed internally from both render and capture. + rtc::scoped_ptr<TransientSuppressor> transient_suppressor; + rtc::scoped_ptr<IntelligibilityEnhancer> intelligibility_enhancer; +}; + +struct AudioProcessingImpl::ApmPrivateSubmodules { + explicit ApmPrivateSubmodules(Beamformer<float>* beamformer) + : beamformer(beamformer) {} + // Accessed internally from capture or during initialization + std::list<ProcessingComponent*> component_list; + rtc::scoped_ptr<Beamformer<float>> beamformer; + rtc::scoped_ptr<AgcManagerDirect> agc_manager; +}; + const int AudioProcessing::kNativeSampleRatesHz[] = { AudioProcessing::kSampleRate8kHz, AudioProcessing::kSampleRate16kHz, @@ -172,7 +200,7 @@ AudioProcessing* AudioProcessing::Create(const Config& config, AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer); if (apm->Initialize() != kNoError) { delete apm; - apm = NULL; + apm = nullptr; } return apm; @@ -183,102 +211,82 @@ AudioProcessingImpl::AudioProcessingImpl(const Config& config) AudioProcessingImpl::AudioProcessingImpl(const Config& config, Beamformer<float>* beamformer) - : echo_cancellation_(NULL), - echo_control_mobile_(NULL), - gain_control_(NULL), - high_pass_filter_(NULL), - level_estimator_(NULL), - noise_suppression_(NULL), - voice_detection_(NULL), - crit_(CriticalSectionWrapper::CreateCriticalSection()), -#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - debug_file_(FileWrapper::Create()), - event_msg_(new audioproc::Event()), -#endif - api_format_({{{kSampleRate16kHz, 1, false}, - {kSampleRate16kHz, 1, false}, - {kSampleRate16kHz, 1, false}, - {kSampleRate16kHz, 1, false}}}), - fwd_proc_format_(kSampleRate16kHz), - rev_proc_format_(kSampleRate16kHz, 1), - split_rate_(kSampleRate16kHz), - stream_delay_ms_(0), - delay_offset_ms_(0), - was_stream_delay_set_(false), - last_stream_delay_ms_(0), - last_aec_system_delay_ms_(0), - stream_delay_jumps_(-1), - aec_system_delay_jumps_(-1), - output_will_be_muted_(false), - key_pressed_(false), + : public_submodules_(new ApmPublicSubmodules()), + private_submodules_(new ApmPrivateSubmodules(beamformer)), + constants_(config.Get<ExperimentalAgc>().startup_min_volume, #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) - use_new_agc_(false), + false, #else - use_new_agc_(config.Get<ExperimentalAgc>().enabled), + config.Get<ExperimentalAgc>().enabled, #endif - agc_startup_min_volume_(config.Get<ExperimentalAgc>().startup_min_volume), + config.Get<Intelligibility>().enabled), + #if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) - transient_suppressor_enabled_(false), + capture_(false, #else - transient_suppressor_enabled_(config.Get<ExperimentalNs>().enabled), + capture_(config.Get<ExperimentalNs>().enabled, #endif - beamformer_enabled_(config.Get<Beamforming>().enabled), - beamformer_(beamformer), - array_geometry_(config.Get<Beamforming>().array_geometry), - target_direction_(config.Get<Beamforming>().target_direction), - intelligibility_enabled_(config.Get<Intelligibility>().enabled) { - echo_cancellation_ = new EchoCancellationImpl(this, crit_); - component_list_.push_back(echo_cancellation_); - - echo_control_mobile_ = new EchoControlMobileImpl(this, crit_); - component_list_.push_back(echo_control_mobile_); - - gain_control_ = new GainControlImpl(this, crit_); - component_list_.push_back(gain_control_); - - high_pass_filter_ = new HighPassFilterImpl(this, crit_); - component_list_.push_back(high_pass_filter_); - - level_estimator_ = new LevelEstimatorImpl(this, crit_); - component_list_.push_back(level_estimator_); - - noise_suppression_ = new NoiseSuppressionImpl(this, crit_); - component_list_.push_back(noise_suppression_); - - voice_detection_ = new VoiceDetectionImpl(this, crit_); - component_list_.push_back(voice_detection_); - - gain_control_for_new_agc_.reset(new GainControlForNewAgc(gain_control_)); + config.Get<Beamforming>().array_geometry, + config.Get<Beamforming>().target_direction), + capture_nonlocked_(config.Get<Beamforming>().enabled) +{ + { + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + + public_submodules_->echo_cancellation = + new EchoCancellationImpl(this, &crit_render_, &crit_capture_); + public_submodules_->echo_control_mobile = + new EchoControlMobileImpl(this, &crit_render_, &crit_capture_); + public_submodules_->gain_control = + new GainControlImpl(this, &crit_capture_, &crit_capture_); + public_submodules_->high_pass_filter.reset( + new HighPassFilterImpl(&crit_capture_)); + public_submodules_->level_estimator.reset( + new LevelEstimatorImpl(&crit_capture_)); + public_submodules_->noise_suppression.reset( + new NoiseSuppressionImpl(&crit_capture_)); + public_submodules_->voice_detection.reset( + new VoiceDetectionImpl(&crit_capture_)); + public_submodules_->gain_control_for_new_agc.reset( + new GainControlForNewAgc(public_submodules_->gain_control)); + + private_submodules_->component_list.push_back( + public_submodules_->echo_cancellation); + private_submodules_->component_list.push_back( + public_submodules_->echo_control_mobile); + private_submodules_->component_list.push_back( + public_submodules_->gain_control); + } SetExtraOptions(config); } AudioProcessingImpl::~AudioProcessingImpl() { - { - CriticalSectionScoped crit_scoped(crit_); - // Depends on gain_control_ and gain_control_for_new_agc_. - agc_manager_.reset(); - // Depends on gain_control_. - gain_control_for_new_agc_.reset(); - while (!component_list_.empty()) { - ProcessingComponent* component = component_list_.front(); - component->Destroy(); - delete component; - component_list_.pop_front(); - } + // Depends on gain_control_ and + // public_submodules_->gain_control_for_new_agc. + private_submodules_->agc_manager.reset(); + // Depends on gain_control_. + public_submodules_->gain_control_for_new_agc.reset(); + while (!private_submodules_->component_list.empty()) { + ProcessingComponent* component = + private_submodules_->component_list.front(); + component->Destroy(); + delete component; + private_submodules_->component_list.pop_front(); + } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - if (debug_file_->Open()) { - debug_file_->CloseFile(); - } -#endif + if (debug_dump_.debug_file->Open()) { + debug_dump_.debug_file->CloseFile(); } - delete crit_; - crit_ = NULL; +#endif } int AudioProcessingImpl::Initialize() { - CriticalSectionScoped crit_scoped(crit_); + // Run in a single-threaded manner during initialization. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); return InitializeLocked(); } @@ -306,44 +314,73 @@ int AudioProcessingImpl::Initialize(int input_sample_rate_hz, } int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) { - CriticalSectionScoped crit_scoped(crit_); + // Run in a single-threaded manner during initialization. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + return InitializeLocked(processing_config); +} + +int AudioProcessingImpl::MaybeInitializeRender( + const ProcessingConfig& processing_config) { + return MaybeInitialize(processing_config); +} + +int AudioProcessingImpl::MaybeInitializeCapture( + const ProcessingConfig& processing_config) { + return MaybeInitialize(processing_config); +} + +// Calls InitializeLocked() if any of the audio parameters have changed from +// their current values (needs to be called while holding the crit_render_lock). +int AudioProcessingImpl::MaybeInitialize( + const ProcessingConfig& processing_config) { + // Called from both threads. Thread check is therefore not possible. + if (processing_config == formats_.api_format) { + return kNoError; + } + + rtc::CritScope cs_capture(&crit_capture_); return InitializeLocked(processing_config); } int AudioProcessingImpl::InitializeLocked() { const int fwd_audio_buffer_channels = - beamformer_enabled_ ? api_format_.input_stream().num_channels() - : api_format_.output_stream().num_channels(); + capture_nonlocked_.beamformer_enabled + ? formats_.api_format.input_stream().num_channels() + : formats_.api_format.output_stream().num_channels(); const int rev_audio_buffer_out_num_frames = - api_format_.reverse_output_stream().num_frames() == 0 - ? rev_proc_format_.num_frames() - : api_format_.reverse_output_stream().num_frames(); - if (api_format_.reverse_input_stream().num_channels() > 0) { - render_audio_.reset(new AudioBuffer( - api_format_.reverse_input_stream().num_frames(), - api_format_.reverse_input_stream().num_channels(), - rev_proc_format_.num_frames(), rev_proc_format_.num_channels(), + formats_.api_format.reverse_output_stream().num_frames() == 0 + ? formats_.rev_proc_format.num_frames() + : formats_.api_format.reverse_output_stream().num_frames(); + if (formats_.api_format.reverse_input_stream().num_channels() > 0) { + render_.render_audio.reset(new AudioBuffer( + formats_.api_format.reverse_input_stream().num_frames(), + formats_.api_format.reverse_input_stream().num_channels(), + formats_.rev_proc_format.num_frames(), + formats_.rev_proc_format.num_channels(), rev_audio_buffer_out_num_frames)); if (rev_conversion_needed()) { - render_converter_ = AudioConverter::Create( - api_format_.reverse_input_stream().num_channels(), - api_format_.reverse_input_stream().num_frames(), - api_format_.reverse_output_stream().num_channels(), - api_format_.reverse_output_stream().num_frames()); + render_.render_converter = AudioConverter::Create( + formats_.api_format.reverse_input_stream().num_channels(), + formats_.api_format.reverse_input_stream().num_frames(), + formats_.api_format.reverse_output_stream().num_channels(), + formats_.api_format.reverse_output_stream().num_frames()); } else { - render_converter_.reset(nullptr); + render_.render_converter.reset(nullptr); } } else { - render_audio_.reset(nullptr); - render_converter_.reset(nullptr); + render_.render_audio.reset(nullptr); + render_.render_converter.reset(nullptr); } - capture_audio_.reset(new AudioBuffer( - api_format_.input_stream().num_frames(), - api_format_.input_stream().num_channels(), fwd_proc_format_.num_frames(), - fwd_audio_buffer_channels, api_format_.output_stream().num_frames())); + capture_.capture_audio.reset( + new AudioBuffer(formats_.api_format.input_stream().num_frames(), + formats_.api_format.input_stream().num_channels(), + capture_nonlocked_.fwd_proc_format.num_frames(), + fwd_audio_buffer_channels, + formats_.api_format.output_stream().num_frames())); // Initialize all components. - for (auto item : component_list_) { + for (auto item : private_submodules_->component_list) { int err = item->Initialize(); if (err != kNoError) { return err; @@ -351,15 +388,16 @@ int AudioProcessingImpl::InitializeLocked() { } InitializeExperimentalAgc(); - InitializeTransient(); - InitializeBeamformer(); - InitializeIntelligibility(); + InitializeHighPassFilter(); + InitializeNoiseSuppression(); + InitializeLevelEstimator(); + InitializeVoiceDetection(); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - if (debug_file_->Open()) { + if (debug_dump_.debug_file->Open()) { int err = WriteInitMessage(); if (err != kNoError) { return err; @@ -372,16 +410,13 @@ int AudioProcessingImpl::InitializeLocked() { int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { for (const auto& stream : config.streams) { - if (stream.num_channels() < 0) { - return kBadNumberChannelsError; - } if (stream.num_channels() > 0 && stream.sample_rate_hz() <= 0) { return kBadSampleRateError; } } - const int num_in_channels = config.input_stream().num_channels(); - const int num_out_channels = config.output_stream().num_channels(); + const size_t num_in_channels = config.input_stream().num_channels(); + const size_t num_out_channels = config.output_stream().num_channels(); // Need at least one input channel. // Need either one output channel or as many outputs as there are inputs. @@ -390,18 +425,17 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { return kBadNumberChannelsError; } - if (beamformer_enabled_ && - (static_cast<size_t>(num_in_channels) != array_geometry_.size() || - num_out_channels > 1)) { + if (capture_nonlocked_.beamformer_enabled && + num_in_channels != capture_.array_geometry.size()) { return kBadNumberChannelsError; } - api_format_ = config; + formats_.api_format = config; // We process at the closest native rate >= min(input rate, output rate)... const int min_proc_rate = - std::min(api_format_.input_stream().sample_rate_hz(), - api_format_.output_stream().sample_rate_hz()); + std::min(formats_.api_format.input_stream().sample_rate_hz(), + formats_.api_format.output_stream().sample_rate_hz()); int fwd_proc_rate; for (size_t i = 0; i < kNumNativeSampleRates; ++i) { fwd_proc_rate = kNativeSampleRatesHz[i]; @@ -410,20 +444,20 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { } } // ...with one exception. - if (echo_control_mobile_->is_enabled() && + if (public_submodules_->echo_control_mobile->is_enabled() && min_proc_rate > kMaxAECMSampleRateHz) { fwd_proc_rate = kMaxAECMSampleRateHz; } - fwd_proc_format_ = StreamConfig(fwd_proc_rate); + capture_nonlocked_.fwd_proc_format = StreamConfig(fwd_proc_rate); // We normally process the reverse stream at 16 kHz. Unless... int rev_proc_rate = kSampleRate16kHz; - if (fwd_proc_format_.sample_rate_hz() == kSampleRate8kHz) { + if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate8kHz) { // ...the forward stream is at 8 kHz. rev_proc_rate = kSampleRate8kHz; } else { - if (api_format_.reverse_input_stream().sample_rate_hz() == + if (formats_.api_format.reverse_input_stream().sample_rate_hz() == kSampleRate32kHz) { // ...or the input is at 32 kHz, in which case we use the splitting // filter rather than the resampler. @@ -433,66 +467,89 @@ int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { // Always downmix the reverse stream to mono for analysis. This has been // demonstrated to work well for AEC in most practical scenarios. - rev_proc_format_ = StreamConfig(rev_proc_rate, 1); + formats_.rev_proc_format = StreamConfig(rev_proc_rate, 1); - if (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz || - fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz) { - split_rate_ = kSampleRate16kHz; + if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate32kHz || + capture_nonlocked_.fwd_proc_format.sample_rate_hz() == kSampleRate48kHz) { + capture_nonlocked_.split_rate = kSampleRate16kHz; } else { - split_rate_ = fwd_proc_format_.sample_rate_hz(); + capture_nonlocked_.split_rate = + capture_nonlocked_.fwd_proc_format.sample_rate_hz(); } return InitializeLocked(); } -// Calls InitializeLocked() if any of the audio parameters have changed from -// their current values. -int AudioProcessingImpl::MaybeInitializeLocked( - const ProcessingConfig& processing_config) { - if (processing_config == api_format_) { - return kNoError; - } - return InitializeLocked(processing_config); -} - void AudioProcessingImpl::SetExtraOptions(const Config& config) { - CriticalSectionScoped crit_scoped(crit_); - for (auto item : component_list_) { + // Run in a single-threaded manner when setting the extra options. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + for (auto item : private_submodules_->component_list) { item->SetExtraOptions(config); } - if (transient_suppressor_enabled_ != config.Get<ExperimentalNs>().enabled) { - transient_suppressor_enabled_ = config.Get<ExperimentalNs>().enabled; + if (capture_.transient_suppressor_enabled != + config.Get<ExperimentalNs>().enabled) { + capture_.transient_suppressor_enabled = + config.Get<ExperimentalNs>().enabled; InitializeTransient(); } + +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD + if (capture_nonlocked_.beamformer_enabled != + config.Get<Beamforming>().enabled) { + capture_nonlocked_.beamformer_enabled = config.Get<Beamforming>().enabled; + if (config.Get<Beamforming>().array_geometry.size() > 1) { + capture_.array_geometry = config.Get<Beamforming>().array_geometry; + } + capture_.target_direction = config.Get<Beamforming>().target_direction; + InitializeBeamformer(); + } +#endif // WEBRTC_ANDROID_PLATFORM_BUILD } +int AudioProcessingImpl::input_sample_rate_hz() const { + // Accessed from outside APM, hence a lock is needed. + rtc::CritScope cs(&crit_capture_); + return formats_.api_format.input_stream().sample_rate_hz(); +} int AudioProcessingImpl::proc_sample_rate_hz() const { - return fwd_proc_format_.sample_rate_hz(); + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.fwd_proc_format.sample_rate_hz(); } int AudioProcessingImpl::proc_split_sample_rate_hz() const { - return split_rate_; + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.split_rate; +} + +size_t AudioProcessingImpl::num_reverse_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.rev_proc_format.num_channels(); } -int AudioProcessingImpl::num_reverse_channels() const { - return rev_proc_format_.num_channels(); +size_t AudioProcessingImpl::num_input_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.api_format.input_stream().num_channels(); } -int AudioProcessingImpl::num_input_channels() const { - return api_format_.input_stream().num_channels(); +size_t AudioProcessingImpl::num_proc_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.beamformer_enabled ? 1 : num_output_channels(); } -int AudioProcessingImpl::num_output_channels() const { - return api_format_.output_stream().num_channels(); +size_t AudioProcessingImpl::num_output_channels() const { + // Used as callback from submodules, hence locking is not allowed. + return formats_.api_format.output_stream().num_channels(); } void AudioProcessingImpl::set_output_will_be_muted(bool muted) { - CriticalSectionScoped lock(crit_); - output_will_be_muted_ = muted; - if (agc_manager_.get()) { - agc_manager_->SetCaptureMuted(output_will_be_muted_); + rtc::CritScope cs(&crit_capture_); + capture_.output_will_be_muted = muted; + if (private_submodules_->agc_manager.get()) { + private_submodules_->agc_manager->SetCaptureMuted( + capture_.output_will_be_muted); } } @@ -504,13 +561,21 @@ int AudioProcessingImpl::ProcessStream(const float* const* src, int output_sample_rate_hz, ChannelLayout output_layout, float* const* dest) { - CriticalSectionScoped crit_scoped(crit_); - StreamConfig input_stream = api_format_.input_stream(); + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_ChannelLayout"); + StreamConfig input_stream; + StreamConfig output_stream; + { + // Access the formats_.api_format.input_stream beneath the capture lock. + // The lock must be released as it is later required in the call + // to ProcessStream(,,,); + rtc::CritScope cs(&crit_capture_); + input_stream = formats_.api_format.input_stream(); + output_stream = formats_.api_format.output_stream(); + } + input_stream.set_sample_rate_hz(input_sample_rate_hz); input_stream.set_num_channels(ChannelsFromLayout(input_layout)); input_stream.set_has_keyboard(LayoutHasKeyboard(input_layout)); - - StreamConfig output_stream = api_format_.output_stream(); output_stream.set_sample_rate_hz(output_sample_rate_hz); output_stream.set_num_channels(ChannelsFromLayout(output_layout)); output_stream.set_has_keyboard(LayoutHasKeyboard(output_layout)); @@ -525,44 +590,64 @@ int AudioProcessingImpl::ProcessStream(const float* const* src, const StreamConfig& input_config, const StreamConfig& output_config, float* const* dest) { - CriticalSectionScoped crit_scoped(crit_); - if (!src || !dest) { - return kNullPointerError; + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_StreamConfig"); + ProcessingConfig processing_config; + { + // Acquire the capture lock in order to safely call the function + // that retrieves the render side data. This function accesses apm + // getters that need the capture lock held when being called. + rtc::CritScope cs_capture(&crit_capture_); + public_submodules_->echo_cancellation->ReadQueuedRenderData(); + public_submodules_->echo_control_mobile->ReadQueuedRenderData(); + public_submodules_->gain_control->ReadQueuedRenderData(); + + if (!src || !dest) { + return kNullPointerError; + } + + processing_config = formats_.api_format; } - ProcessingConfig processing_config = api_format_; processing_config.input_stream() = input_config; processing_config.output_stream() = output_config; - RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + { + // Do conditional reinitialization. + rtc::CritScope cs_render(&crit_render_); + RETURN_ON_ERR(MaybeInitializeCapture(processing_config)); + } + rtc::CritScope cs_capture(&crit_capture_); assert(processing_config.input_stream().num_frames() == - api_format_.input_stream().num_frames()); + formats_.api_format.input_stream().num_frames()); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - if (debug_file_->Open()) { + if (debug_dump_.debug_file->Open()) { RETURN_ON_ERR(WriteConfigMessage(false)); - event_msg_->set_type(audioproc::Event::STREAM); - audioproc::Stream* msg = event_msg_->mutable_stream(); + debug_dump_.capture.event_msg->set_type(audioproc::Event::STREAM); + audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream(); const size_t channel_size = - sizeof(float) * api_format_.input_stream().num_frames(); - for (int i = 0; i < api_format_.input_stream().num_channels(); ++i) + sizeof(float) * formats_.api_format.input_stream().num_frames(); + for (size_t i = 0; i < formats_.api_format.input_stream().num_channels(); + ++i) msg->add_input_channel(src[i], channel_size); } #endif - capture_audio_->CopyFrom(src, api_format_.input_stream()); + capture_.capture_audio->CopyFrom(src, formats_.api_format.input_stream()); RETURN_ON_ERR(ProcessStreamLocked()); - capture_audio_->CopyTo(api_format_.output_stream(), dest); + capture_.capture_audio->CopyTo(formats_.api_format.output_stream(), dest); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - if (debug_file_->Open()) { - audioproc::Stream* msg = event_msg_->mutable_stream(); + if (debug_dump_.debug_file->Open()) { + audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream(); const size_t channel_size = - sizeof(float) * api_format_.output_stream().num_frames(); - for (int i = 0; i < api_format_.output_stream().num_channels(); ++i) + sizeof(float) * formats_.api_format.output_stream().num_frames(); + for (size_t i = 0; i < formats_.api_format.output_stream().num_channels(); + ++i) msg->add_output_channel(dest[i], channel_size); - RETURN_ON_ERR(WriteMessageToDebugFile()); + RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), + &crit_debug_, &debug_dump_.capture)); } #endif @@ -570,7 +655,20 @@ int AudioProcessingImpl::ProcessStream(const float* const* src, } int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { - CriticalSectionScoped crit_scoped(crit_); + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessStream_AudioFrame"); + { + // Acquire the capture lock in order to safely call the function + // that retrieves the render side data. This function accesses apm + // getters that need the capture lock held when being called. + // The lock needs to be released as + // public_submodules_->echo_control_mobile->is_enabled() aquires this lock + // as well. + rtc::CritScope cs_capture(&crit_capture_); + public_submodules_->echo_cancellation->ReadQueuedRenderData(); + public_submodules_->echo_control_mobile->ReadQueuedRenderData(); + public_submodules_->gain_control->ReadQueuedRenderData(); + } + if (!frame) { return kNullPointerError; } @@ -581,46 +679,62 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { frame->sample_rate_hz_ != kSampleRate48kHz) { return kBadSampleRateError; } - if (echo_control_mobile_->is_enabled() && + + if (public_submodules_->echo_control_mobile->is_enabled() && frame->sample_rate_hz_ > kMaxAECMSampleRateHz) { LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates"; return kUnsupportedComponentError; } - // TODO(ajm): The input and output rates and channels are currently - // constrained to be identical in the int16 interface. - ProcessingConfig processing_config = api_format_; + ProcessingConfig processing_config; + { + // Aquire lock for the access of api_format. + // The lock is released immediately due to the conditional + // reinitialization. + rtc::CritScope cs_capture(&crit_capture_); + // TODO(ajm): The input and output rates and channels are currently + // constrained to be identical in the int16 interface. + processing_config = formats_.api_format; + } processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_); processing_config.input_stream().set_num_channels(frame->num_channels_); processing_config.output_stream().set_sample_rate_hz(frame->sample_rate_hz_); processing_config.output_stream().set_num_channels(frame->num_channels_); - RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); - if (frame->samples_per_channel_ != api_format_.input_stream().num_frames()) { + { + // Do conditional reinitialization. + rtc::CritScope cs_render(&crit_render_); + RETURN_ON_ERR(MaybeInitializeCapture(processing_config)); + } + rtc::CritScope cs_capture(&crit_capture_); + if (frame->samples_per_channel_ != + formats_.api_format.input_stream().num_frames()) { return kBadDataLengthError; } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - if (debug_file_->Open()) { - event_msg_->set_type(audioproc::Event::STREAM); - audioproc::Stream* msg = event_msg_->mutable_stream(); + if (debug_dump_.debug_file->Open()) { + debug_dump_.capture.event_msg->set_type(audioproc::Event::STREAM); + audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream(); const size_t data_size = sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; msg->set_input_data(frame->data_, data_size); } #endif - capture_audio_->DeinterleaveFrom(frame); + capture_.capture_audio->DeinterleaveFrom(frame); RETURN_ON_ERR(ProcessStreamLocked()); - capture_audio_->InterleaveTo(frame, output_copy_needed(is_data_processed())); + capture_.capture_audio->InterleaveTo(frame, + output_copy_needed(is_data_processed())); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - if (debug_file_->Open()) { - audioproc::Stream* msg = event_msg_->mutable_stream(); + if (debug_dump_.debug_file->Open()) { + audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream(); const size_t data_size = sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; msg->set_output_data(frame->data_, data_size); - RETURN_ON_ERR(WriteMessageToDebugFile()); + RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), + &crit_debug_, &debug_dump_.capture)); } #endif @@ -629,22 +743,25 @@ int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { int AudioProcessingImpl::ProcessStreamLocked() { #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - if (debug_file_->Open()) { - audioproc::Stream* msg = event_msg_->mutable_stream(); - msg->set_delay(stream_delay_ms_); - msg->set_drift(echo_cancellation_->stream_drift_samples()); + if (debug_dump_.debug_file->Open()) { + audioproc::Stream* msg = debug_dump_.capture.event_msg->mutable_stream(); + msg->set_delay(capture_nonlocked_.stream_delay_ms); + msg->set_drift( + public_submodules_->echo_cancellation->stream_drift_samples()); msg->set_level(gain_control()->stream_analog_level()); - msg->set_keypress(key_pressed_); + msg->set_keypress(capture_.key_pressed); } #endif MaybeUpdateHistograms(); - AudioBuffer* ca = capture_audio_.get(); // For brevity. + AudioBuffer* ca = capture_.capture_audio.get(); // For brevity. - if (use_new_agc_ && gain_control_->is_enabled()) { - agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(), - fwd_proc_format_.num_frames()); + if (constants_.use_new_agc && + public_submodules_->gain_control->is_enabled()) { + private_submodules_->agc_manager->AnalyzePreProcess( + ca->channels()[0], ca->num_channels(), + capture_nonlocked_.fwd_proc_format.num_frames()); } bool data_processed = is_data_processed(); @@ -652,34 +769,41 @@ int AudioProcessingImpl::ProcessStreamLocked() { ca->SplitIntoFrequencyBands(); } - if (intelligibility_enabled_) { - intelligibility_enhancer_->AnalyzeCaptureAudio( - ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels()); + if (constants_.intelligibility_enabled) { + public_submodules_->intelligibility_enhancer->AnalyzeCaptureAudio( + ca->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate, + ca->num_channels()); } - if (beamformer_enabled_) { - beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f()); + if (capture_nonlocked_.beamformer_enabled) { + private_submodules_->beamformer->ProcessChunk(*ca->split_data_f(), + ca->split_data_f()); ca->set_num_channels(1); } - RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(ca)); - RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(ca)); - RETURN_ON_ERR(noise_suppression_->AnalyzeCaptureAudio(ca)); - RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(ca)); + public_submodules_->high_pass_filter->ProcessCaptureAudio(ca); + RETURN_ON_ERR(public_submodules_->gain_control->AnalyzeCaptureAudio(ca)); + public_submodules_->noise_suppression->AnalyzeCaptureAudio(ca); + RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessCaptureAudio(ca)); - if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) { + if (public_submodules_->echo_control_mobile->is_enabled() && + public_submodules_->noise_suppression->is_enabled()) { ca->CopyLowPassToReference(); } - RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca)); - RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca)); - RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca)); + public_submodules_->noise_suppression->ProcessCaptureAudio(ca); + RETURN_ON_ERR( + public_submodules_->echo_control_mobile->ProcessCaptureAudio(ca)); + public_submodules_->voice_detection->ProcessCaptureAudio(ca); - if (use_new_agc_ && gain_control_->is_enabled() && - (!beamformer_enabled_ || beamformer_->is_target_present())) { - agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz], - ca->num_frames_per_band(), split_rate_); + if (constants_.use_new_agc && + public_submodules_->gain_control->is_enabled() && + (!capture_nonlocked_.beamformer_enabled || + private_submodules_->beamformer->is_target_present())) { + private_submodules_->agc_manager->Process( + ca->split_bands_const(0)[kBand0To8kHz], ca->num_frames_per_band(), + capture_nonlocked_.split_rate); } - RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca)); + RETURN_ON_ERR(public_submodules_->gain_control->ProcessCaptureAudio(ca)); if (synthesis_needed(data_processed)) { ca->MergeFrequencyBands(); @@ -687,21 +811,23 @@ int AudioProcessingImpl::ProcessStreamLocked() { // TODO(aluebs): Investigate if the transient suppression placement should be // before or after the AGC. - if (transient_suppressor_enabled_) { + if (capture_.transient_suppressor_enabled) { float voice_probability = - agc_manager_.get() ? agc_manager_->voice_probability() : 1.f; + private_submodules_->agc_manager.get() + ? private_submodules_->agc_manager->voice_probability() + : 1.f; - transient_suppressor_->Suppress( + public_submodules_->transient_suppressor->Suppress( ca->channels_f()[0], ca->num_frames(), ca->num_channels(), ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(), ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability, - key_pressed_); + capture_.key_pressed); } // The level estimator operates on the recombined data. - RETURN_ON_ERR(level_estimator_->ProcessStream(ca)); + public_submodules_->level_estimator->ProcessStream(ca); - was_stream_delay_set_ = false; + capture_.was_stream_delay_set = false; return kNoError; } @@ -709,13 +835,15 @@ int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, size_t samples_per_channel, int rev_sample_rate_hz, ChannelLayout layout) { + TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_ChannelLayout"); + rtc::CritScope cs(&crit_render_); const StreamConfig reverse_config = { rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), }; if (samples_per_channel != reverse_config.num_frames()) { return kBadDataLengthError; } - return AnalyzeReverseStream(data, reverse_config, reverse_config); + return AnalyzeReverseStreamLocked(data, reverse_config, reverse_config); } int AudioProcessingImpl::ProcessReverseStream( @@ -723,13 +851,17 @@ int AudioProcessingImpl::ProcessReverseStream( const StreamConfig& reverse_input_config, const StreamConfig& reverse_output_config, float* const* dest) { - RETURN_ON_ERR( - AnalyzeReverseStream(src, reverse_input_config, reverse_output_config)); + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_StreamConfig"); + rtc::CritScope cs(&crit_render_); + RETURN_ON_ERR(AnalyzeReverseStreamLocked(src, reverse_input_config, + reverse_output_config)); if (is_rev_processed()) { - render_audio_->CopyTo(api_format_.reverse_output_stream(), dest); - } else if (rev_conversion_needed()) { - render_converter_->Convert(src, reverse_input_config.num_samples(), dest, - reverse_output_config.num_samples()); + render_.render_audio->CopyTo(formats_.api_format.reverse_output_stream(), + dest); + } else if (render_check_rev_conversion_needed()) { + render_.render_converter->Convert(src, reverse_input_config.num_samples(), + dest, + reverse_output_config.num_samples()); } else { CopyAudioIfNeeded(src, reverse_input_config.num_frames(), reverse_input_config.num_channels(), dest); @@ -738,55 +870,61 @@ int AudioProcessingImpl::ProcessReverseStream( return kNoError; } -int AudioProcessingImpl::AnalyzeReverseStream( +int AudioProcessingImpl::AnalyzeReverseStreamLocked( const float* const* src, const StreamConfig& reverse_input_config, const StreamConfig& reverse_output_config) { - CriticalSectionScoped crit_scoped(crit_); - if (src == NULL) { + if (src == nullptr) { return kNullPointerError; } - if (reverse_input_config.num_channels() <= 0) { + if (reverse_input_config.num_channels() == 0) { return kBadNumberChannelsError; } - ProcessingConfig processing_config = api_format_; + ProcessingConfig processing_config = formats_.api_format; processing_config.reverse_input_stream() = reverse_input_config; processing_config.reverse_output_stream() = reverse_output_config; - RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + RETURN_ON_ERR(MaybeInitializeRender(processing_config)); assert(reverse_input_config.num_frames() == - api_format_.reverse_input_stream().num_frames()); + formats_.api_format.reverse_input_stream().num_frames()); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - if (debug_file_->Open()) { - event_msg_->set_type(audioproc::Event::REVERSE_STREAM); - audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); + if (debug_dump_.debug_file->Open()) { + debug_dump_.render.event_msg->set_type(audioproc::Event::REVERSE_STREAM); + audioproc::ReverseStream* msg = + debug_dump_.render.event_msg->mutable_reverse_stream(); const size_t channel_size = - sizeof(float) * api_format_.reverse_input_stream().num_frames(); - for (int i = 0; i < api_format_.reverse_input_stream().num_channels(); ++i) + sizeof(float) * formats_.api_format.reverse_input_stream().num_frames(); + for (size_t i = 0; + i < formats_.api_format.reverse_input_stream().num_channels(); ++i) msg->add_channel(src[i], channel_size); - RETURN_ON_ERR(WriteMessageToDebugFile()); + RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), + &crit_debug_, &debug_dump_.render)); } #endif - render_audio_->CopyFrom(src, api_format_.reverse_input_stream()); + render_.render_audio->CopyFrom(src, + formats_.api_format.reverse_input_stream()); return ProcessReverseStreamLocked(); } int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { + TRACE_EVENT0("webrtc", "AudioProcessing::ProcessReverseStream_AudioFrame"); RETURN_ON_ERR(AnalyzeReverseStream(frame)); + rtc::CritScope cs(&crit_render_); if (is_rev_processed()) { - render_audio_->InterleaveTo(frame, true); + render_.render_audio->InterleaveTo(frame, true); } return kNoError; } int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { - CriticalSectionScoped crit_scoped(crit_); - if (frame == NULL) { + TRACE_EVENT0("webrtc", "AudioProcessing::AnalyzeReverseStream_AudioFrame"); + rtc::CritScope cs(&crit_render_); + if (frame == nullptr) { return kNullPointerError; } // Must be a native rate. @@ -797,7 +935,8 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { return kBadSampleRateError; } // This interface does not tolerate different forward and reverse rates. - if (frame->sample_rate_hz_ != api_format_.input_stream().sample_rate_hz()) { + if (frame->sample_rate_hz_ != + formats_.api_format.input_stream().sample_rate_hz()) { return kBadSampleRateError; } @@ -805,7 +944,7 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { return kBadNumberChannelsError; } - ProcessingConfig processing_config = api_format_; + ProcessingConfig processing_config = formats_.api_format; processing_config.reverse_input_stream().set_sample_rate_hz( frame->sample_rate_hz_); processing_config.reverse_input_stream().set_num_channels( @@ -815,44 +954,52 @@ int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { processing_config.reverse_output_stream().set_num_channels( frame->num_channels_); - RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + RETURN_ON_ERR(MaybeInitializeRender(processing_config)); if (frame->samples_per_channel_ != - api_format_.reverse_input_stream().num_frames()) { + formats_.api_format.reverse_input_stream().num_frames()) { return kBadDataLengthError; } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP - if (debug_file_->Open()) { - event_msg_->set_type(audioproc::Event::REVERSE_STREAM); - audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); + if (debug_dump_.debug_file->Open()) { + debug_dump_.render.event_msg->set_type(audioproc::Event::REVERSE_STREAM); + audioproc::ReverseStream* msg = + debug_dump_.render.event_msg->mutable_reverse_stream(); const size_t data_size = sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; msg->set_data(frame->data_, data_size); - RETURN_ON_ERR(WriteMessageToDebugFile()); + RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), + &crit_debug_, &debug_dump_.render)); } #endif - render_audio_->DeinterleaveFrom(frame); + render_.render_audio->DeinterleaveFrom(frame); return ProcessReverseStreamLocked(); } int AudioProcessingImpl::ProcessReverseStreamLocked() { - AudioBuffer* ra = render_audio_.get(); // For brevity. - if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) { + AudioBuffer* ra = render_.render_audio.get(); // For brevity. + if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz) { ra->SplitIntoFrequencyBands(); } - if (intelligibility_enabled_) { - intelligibility_enhancer_->ProcessRenderAudio( - ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels()); + if (constants_.intelligibility_enabled) { + // Currently run in single-threaded mode when the intelligibility + // enhancer is activated. + // TODO(peah): Fix to be properly multi-threaded. + rtc::CritScope cs(&crit_capture_); + public_submodules_->intelligibility_enhancer->ProcessRenderAudio( + ra->split_channels_f(kBand0To8kHz), capture_nonlocked_.split_rate, + ra->num_channels()); } - RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra)); - RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra)); - if (!use_new_agc_) { - RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); + RETURN_ON_ERR(public_submodules_->echo_cancellation->ProcessRenderAudio(ra)); + RETURN_ON_ERR( + public_submodules_->echo_control_mobile->ProcessRenderAudio(ra)); + if (!constants_.use_new_agc) { + RETURN_ON_ERR(public_submodules_->gain_control->ProcessRenderAudio(ra)); } - if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz && + if (formats_.rev_proc_format.sample_rate_hz() == kSampleRate32kHz && is_rev_processed()) { ra->MergeFrequencyBands(); } @@ -861,9 +1008,10 @@ int AudioProcessingImpl::ProcessReverseStreamLocked() { } int AudioProcessingImpl::set_stream_delay_ms(int delay) { + rtc::CritScope cs(&crit_capture_); Error retval = kNoError; - was_stream_delay_set_ = true; - delay += delay_offset_ms_; + capture_.was_stream_delay_set = true; + delay += capture_.delay_offset_ms; if (delay < 0) { delay = 0; @@ -876,50 +1024,56 @@ int AudioProcessingImpl::set_stream_delay_ms(int delay) { retval = kBadStreamParameterWarning; } - stream_delay_ms_ = delay; + capture_nonlocked_.stream_delay_ms = delay; return retval; } int AudioProcessingImpl::stream_delay_ms() const { - return stream_delay_ms_; + // Used as callback from submodules, hence locking is not allowed. + return capture_nonlocked_.stream_delay_ms; } bool AudioProcessingImpl::was_stream_delay_set() const { - return was_stream_delay_set_; + // Used as callback from submodules, hence locking is not allowed. + return capture_.was_stream_delay_set; } void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) { - key_pressed_ = key_pressed; + rtc::CritScope cs(&crit_capture_); + capture_.key_pressed = key_pressed; } void AudioProcessingImpl::set_delay_offset_ms(int offset) { - CriticalSectionScoped crit_scoped(crit_); - delay_offset_ms_ = offset; + rtc::CritScope cs(&crit_capture_); + capture_.delay_offset_ms = offset; } int AudioProcessingImpl::delay_offset_ms() const { - return delay_offset_ms_; + rtc::CritScope cs(&crit_capture_); + return capture_.delay_offset_ms; } int AudioProcessingImpl::StartDebugRecording( const char filename[AudioProcessing::kMaxFilenameSize]) { - CriticalSectionScoped crit_scoped(crit_); + // Run in a single-threaded manner. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); static_assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize, ""); - if (filename == NULL) { + if (filename == nullptr) { return kNullPointerError; } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP // Stop any ongoing recording. - if (debug_file_->Open()) { - if (debug_file_->CloseFile() == -1) { + if (debug_dump_.debug_file->Open()) { + if (debug_dump_.debug_file->CloseFile() == -1) { return kFileError; } } - if (debug_file_->OpenFile(filename, false) == -1) { - debug_file_->CloseFile(); + if (debug_dump_.debug_file->OpenFile(filename, false) == -1) { + debug_dump_.debug_file->CloseFile(); return kFileError; } @@ -932,21 +1086,23 @@ int AudioProcessingImpl::StartDebugRecording( } int AudioProcessingImpl::StartDebugRecording(FILE* handle) { - CriticalSectionScoped crit_scoped(crit_); + // Run in a single-threaded manner. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); - if (handle == NULL) { + if (handle == nullptr) { return kNullPointerError; } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP // Stop any ongoing recording. - if (debug_file_->Open()) { - if (debug_file_->CloseFile() == -1) { + if (debug_dump_.debug_file->Open()) { + if (debug_dump_.debug_file->CloseFile() == -1) { return kFileError; } } - if (debug_file_->OpenFromFileHandle(handle, true, false) == -1) { + if (debug_dump_.debug_file->OpenFromFileHandle(handle, true, false) == -1) { return kFileError; } @@ -960,17 +1116,22 @@ int AudioProcessingImpl::StartDebugRecording(FILE* handle) { int AudioProcessingImpl::StartDebugRecordingForPlatformFile( rtc::PlatformFile handle) { + // Run in a single-threaded manner. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); FILE* stream = rtc::FdopenPlatformFileForWriting(handle); return StartDebugRecording(stream); } int AudioProcessingImpl::StopDebugRecording() { - CriticalSectionScoped crit_scoped(crit_); + // Run in a single-threaded manner. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP // We just return if recording hasn't started. - if (debug_file_->Open()) { - if (debug_file_->CloseFile() == -1) { + if (debug_dump_.debug_file->Open()) { + if (debug_dump_.debug_file->CloseFile() == -1) { return kFileError; } } @@ -981,58 +1142,87 @@ int AudioProcessingImpl::StopDebugRecording() { } EchoCancellation* AudioProcessingImpl::echo_cancellation() const { - return echo_cancellation_; + // Adding a lock here has no effect as it allows any access to the submodule + // from the returned pointer. + return public_submodules_->echo_cancellation; } EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const { - return echo_control_mobile_; + // Adding a lock here has no effect as it allows any access to the submodule + // from the returned pointer. + return public_submodules_->echo_control_mobile; } GainControl* AudioProcessingImpl::gain_control() const { - if (use_new_agc_) { - return gain_control_for_new_agc_.get(); + // Adding a lock here has no effect as it allows any access to the submodule + // from the returned pointer. + if (constants_.use_new_agc) { + return public_submodules_->gain_control_for_new_agc.get(); } - return gain_control_; + return public_submodules_->gain_control; } HighPassFilter* AudioProcessingImpl::high_pass_filter() const { - return high_pass_filter_; + // Adding a lock here has no effect as it allows any access to the submodule + // from the returned pointer. + return public_submodules_->high_pass_filter.get(); } LevelEstimator* AudioProcessingImpl::level_estimator() const { - return level_estimator_; + // Adding a lock here has no effect as it allows any access to the submodule + // from the returned pointer. + return public_submodules_->level_estimator.get(); } NoiseSuppression* AudioProcessingImpl::noise_suppression() const { - return noise_suppression_; + // Adding a lock here has no effect as it allows any access to the submodule + // from the returned pointer. + return public_submodules_->noise_suppression.get(); } VoiceDetection* AudioProcessingImpl::voice_detection() const { - return voice_detection_; + // Adding a lock here has no effect as it allows any access to the submodule + // from the returned pointer. + return public_submodules_->voice_detection.get(); } bool AudioProcessingImpl::is_data_processed() const { - if (beamformer_enabled_) { + if (capture_nonlocked_.beamformer_enabled) { return true; } int enabled_count = 0; - for (auto item : component_list_) { + for (auto item : private_submodules_->component_list) { if (item->is_component_enabled()) { enabled_count++; } } + if (public_submodules_->high_pass_filter->is_enabled()) { + enabled_count++; + } + if (public_submodules_->noise_suppression->is_enabled()) { + enabled_count++; + } + if (public_submodules_->level_estimator->is_enabled()) { + enabled_count++; + } + if (public_submodules_->voice_detection->is_enabled()) { + enabled_count++; + } - // Data is unchanged if no components are enabled, or if only level_estimator_ - // or voice_detection_ is enabled. + // Data is unchanged if no components are enabled, or if only + // public_submodules_->level_estimator + // or public_submodules_->voice_detection is enabled. if (enabled_count == 0) { return false; } else if (enabled_count == 1) { - if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) { + if (public_submodules_->level_estimator->is_enabled() || + public_submodules_->voice_detection->is_enabled()) { return false; } } else if (enabled_count == 2) { - if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) { + if (public_submodules_->level_estimator->is_enabled() && + public_submodules_->voice_detection->is_enabled()) { return false; } } @@ -1041,149 +1231,194 @@ bool AudioProcessingImpl::is_data_processed() const { bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const { // Check if we've upmixed or downmixed the audio. - return ((api_format_.output_stream().num_channels() != - api_format_.input_stream().num_channels()) || - is_data_processed || transient_suppressor_enabled_); + return ((formats_.api_format.output_stream().num_channels() != + formats_.api_format.input_stream().num_channels()) || + is_data_processed || capture_.transient_suppressor_enabled); } bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const { return (is_data_processed && - (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz || - fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz)); + (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == + kSampleRate32kHz || + capture_nonlocked_.fwd_proc_format.sample_rate_hz() == + kSampleRate48kHz)); } bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { - if (!is_data_processed && !voice_detection_->is_enabled() && - !transient_suppressor_enabled_) { - // Only level_estimator_ is enabled. + if (!is_data_processed && + !public_submodules_->voice_detection->is_enabled() && + !capture_.transient_suppressor_enabled) { + // Only public_submodules_->level_estimator is enabled. return false; - } else if (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz || - fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz) { - // Something besides level_estimator_ is enabled, and we have super-wb. + } else if (capture_nonlocked_.fwd_proc_format.sample_rate_hz() == + kSampleRate32kHz || + capture_nonlocked_.fwd_proc_format.sample_rate_hz() == + kSampleRate48kHz) { + // Something besides public_submodules_->level_estimator is enabled, and we + // have super-wb. return true; } return false; } bool AudioProcessingImpl::is_rev_processed() const { - return intelligibility_enabled_ && intelligibility_enhancer_->active(); + return constants_.intelligibility_enabled && + public_submodules_->intelligibility_enhancer->active(); +} + +bool AudioProcessingImpl::render_check_rev_conversion_needed() const { + return rev_conversion_needed(); } bool AudioProcessingImpl::rev_conversion_needed() const { - return (api_format_.reverse_input_stream() != - api_format_.reverse_output_stream()); + return (formats_.api_format.reverse_input_stream() != + formats_.api_format.reverse_output_stream()); } void AudioProcessingImpl::InitializeExperimentalAgc() { - if (use_new_agc_) { - if (!agc_manager_.get()) { - agc_manager_.reset(new AgcManagerDirect(gain_control_, - gain_control_for_new_agc_.get(), - agc_startup_min_volume_)); + if (constants_.use_new_agc) { + if (!private_submodules_->agc_manager.get()) { + private_submodules_->agc_manager.reset(new AgcManagerDirect( + public_submodules_->gain_control, + public_submodules_->gain_control_for_new_agc.get(), + constants_.agc_startup_min_volume)); } - agc_manager_->Initialize(); - agc_manager_->SetCaptureMuted(output_will_be_muted_); + private_submodules_->agc_manager->Initialize(); + private_submodules_->agc_manager->SetCaptureMuted( + capture_.output_will_be_muted); } } void AudioProcessingImpl::InitializeTransient() { - if (transient_suppressor_enabled_) { - if (!transient_suppressor_.get()) { - transient_suppressor_.reset(new TransientSuppressor()); + if (capture_.transient_suppressor_enabled) { + if (!public_submodules_->transient_suppressor.get()) { + public_submodules_->transient_suppressor.reset(new TransientSuppressor()); } - transient_suppressor_->Initialize( - fwd_proc_format_.sample_rate_hz(), split_rate_, - api_format_.output_stream().num_channels()); + public_submodules_->transient_suppressor->Initialize( + capture_nonlocked_.fwd_proc_format.sample_rate_hz(), + capture_nonlocked_.split_rate, + num_proc_channels()); } } void AudioProcessingImpl::InitializeBeamformer() { - if (beamformer_enabled_) { - if (!beamformer_) { - beamformer_.reset( - new NonlinearBeamformer(array_geometry_, target_direction_)); + if (capture_nonlocked_.beamformer_enabled) { + if (!private_submodules_->beamformer) { + private_submodules_->beamformer.reset(new NonlinearBeamformer( + capture_.array_geometry, capture_.target_direction)); } - beamformer_->Initialize(kChunkSizeMs, split_rate_); + private_submodules_->beamformer->Initialize(kChunkSizeMs, + capture_nonlocked_.split_rate); } } void AudioProcessingImpl::InitializeIntelligibility() { - if (intelligibility_enabled_) { + if (constants_.intelligibility_enabled) { IntelligibilityEnhancer::Config config; - config.sample_rate_hz = split_rate_; - config.num_capture_channels = capture_audio_->num_channels(); - config.num_render_channels = render_audio_->num_channels(); - intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config)); + config.sample_rate_hz = capture_nonlocked_.split_rate; + config.num_capture_channels = capture_.capture_audio->num_channels(); + config.num_render_channels = render_.render_audio->num_channels(); + public_submodules_->intelligibility_enhancer.reset( + new IntelligibilityEnhancer(config)); } } +void AudioProcessingImpl::InitializeHighPassFilter() { + public_submodules_->high_pass_filter->Initialize(num_proc_channels(), + proc_sample_rate_hz()); +} + +void AudioProcessingImpl::InitializeNoiseSuppression() { + public_submodules_->noise_suppression->Initialize(num_proc_channels(), + proc_sample_rate_hz()); +} + +void AudioProcessingImpl::InitializeLevelEstimator() { + public_submodules_->level_estimator->Initialize(); +} + +void AudioProcessingImpl::InitializeVoiceDetection() { + public_submodules_->voice_detection->Initialize(proc_split_sample_rate_hz()); +} + void AudioProcessingImpl::MaybeUpdateHistograms() { static const int kMinDiffDelayMs = 60; if (echo_cancellation()->is_enabled()) { // Activate delay_jumps_ counters if we know echo_cancellation is runnning. // If a stream has echo we know that the echo_cancellation is in process. - if (stream_delay_jumps_ == -1 && echo_cancellation()->stream_has_echo()) { - stream_delay_jumps_ = 0; + if (capture_.stream_delay_jumps == -1 && + echo_cancellation()->stream_has_echo()) { + capture_.stream_delay_jumps = 0; } - if (aec_system_delay_jumps_ == -1 && + if (capture_.aec_system_delay_jumps == -1 && echo_cancellation()->stream_has_echo()) { - aec_system_delay_jumps_ = 0; + capture_.aec_system_delay_jumps = 0; } // Detect a jump in platform reported system delay and log the difference. - const int diff_stream_delay_ms = stream_delay_ms_ - last_stream_delay_ms_; - if (diff_stream_delay_ms > kMinDiffDelayMs && last_stream_delay_ms_ != 0) { - RTC_HISTOGRAM_COUNTS("WebRTC.Audio.PlatformReportedStreamDelayJump", - diff_stream_delay_ms, kMinDiffDelayMs, 1000, 100); - if (stream_delay_jumps_ == -1) { - stream_delay_jumps_ = 0; // Activate counter if needed. + const int diff_stream_delay_ms = + capture_nonlocked_.stream_delay_ms - capture_.last_stream_delay_ms; + if (diff_stream_delay_ms > kMinDiffDelayMs && + capture_.last_stream_delay_ms != 0) { + RTC_HISTOGRAM_COUNTS_SPARSE( + "WebRTC.Audio.PlatformReportedStreamDelayJump", diff_stream_delay_ms, + kMinDiffDelayMs, 1000, 100); + if (capture_.stream_delay_jumps == -1) { + capture_.stream_delay_jumps = 0; // Activate counter if needed. } - stream_delay_jumps_++; + capture_.stream_delay_jumps++; } - last_stream_delay_ms_ = stream_delay_ms_; + capture_.last_stream_delay_ms = capture_nonlocked_.stream_delay_ms; // Detect a jump in AEC system delay and log the difference. - const int frames_per_ms = rtc::CheckedDivExact(split_rate_, 1000); + const int frames_per_ms = + rtc::CheckedDivExact(capture_nonlocked_.split_rate, 1000); const int aec_system_delay_ms = WebRtcAec_system_delay(echo_cancellation()->aec_core()) / frames_per_ms; const int diff_aec_system_delay_ms = - aec_system_delay_ms - last_aec_system_delay_ms_; + aec_system_delay_ms - capture_.last_aec_system_delay_ms; if (diff_aec_system_delay_ms > kMinDiffDelayMs && - last_aec_system_delay_ms_ != 0) { - RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AecSystemDelayJump", - diff_aec_system_delay_ms, kMinDiffDelayMs, 1000, - 100); - if (aec_system_delay_jumps_ == -1) { - aec_system_delay_jumps_ = 0; // Activate counter if needed. + capture_.last_aec_system_delay_ms != 0) { + RTC_HISTOGRAM_COUNTS_SPARSE("WebRTC.Audio.AecSystemDelayJump", + diff_aec_system_delay_ms, kMinDiffDelayMs, + 1000, 100); + if (capture_.aec_system_delay_jumps == -1) { + capture_.aec_system_delay_jumps = 0; // Activate counter if needed. } - aec_system_delay_jumps_++; + capture_.aec_system_delay_jumps++; } - last_aec_system_delay_ms_ = aec_system_delay_ms; + capture_.last_aec_system_delay_ms = aec_system_delay_ms; } } void AudioProcessingImpl::UpdateHistogramsOnCallEnd() { - CriticalSectionScoped crit_scoped(crit_); - if (stream_delay_jumps_ > -1) { - RTC_HISTOGRAM_ENUMERATION( + // Run in a single-threaded manner. + rtc::CritScope cs_render(&crit_render_); + rtc::CritScope cs_capture(&crit_capture_); + + if (capture_.stream_delay_jumps > -1) { + RTC_HISTOGRAM_ENUMERATION_SPARSE( "WebRTC.Audio.NumOfPlatformReportedStreamDelayJumps", - stream_delay_jumps_, 51); + capture_.stream_delay_jumps, 51); } - stream_delay_jumps_ = -1; - last_stream_delay_ms_ = 0; + capture_.stream_delay_jumps = -1; + capture_.last_stream_delay_ms = 0; - if (aec_system_delay_jumps_ > -1) { - RTC_HISTOGRAM_ENUMERATION("WebRTC.Audio.NumOfAecSystemDelayJumps", - aec_system_delay_jumps_, 51); + if (capture_.aec_system_delay_jumps > -1) { + RTC_HISTOGRAM_ENUMERATION_SPARSE("WebRTC.Audio.NumOfAecSystemDelayJumps", + capture_.aec_system_delay_jumps, 51); } - aec_system_delay_jumps_ = -1; - last_aec_system_delay_ms_ = 0; + capture_.aec_system_delay_jumps = -1; + capture_.last_aec_system_delay_ms = 0; } #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP -int AudioProcessingImpl::WriteMessageToDebugFile() { - int32_t size = event_msg_->ByteSize(); +int AudioProcessingImpl::WriteMessageToDebugFile( + FileWrapper* debug_file, + rtc::CriticalSection* crit_debug, + ApmDebugDumpThreadState* debug_state) { + int32_t size = debug_state->event_msg->ByteSize(); if (size <= 0) { return kUnspecifiedError; } @@ -1192,82 +1427,100 @@ int AudioProcessingImpl::WriteMessageToDebugFile() { // pretty safe in assuming little-endian. #endif - if (!event_msg_->SerializeToString(&event_str_)) { + if (!debug_state->event_msg->SerializeToString(&debug_state->event_str)) { return kUnspecifiedError; } - // Write message preceded by its size. - if (!debug_file_->Write(&size, sizeof(int32_t))) { - return kFileError; - } - if (!debug_file_->Write(event_str_.data(), event_str_.length())) { - return kFileError; + { + // Ensure atomic writes of the message. + rtc::CritScope cs_capture(crit_debug); + // Write message preceded by its size. + if (!debug_file->Write(&size, sizeof(int32_t))) { + return kFileError; + } + if (!debug_file->Write(debug_state->event_str.data(), + debug_state->event_str.length())) { + return kFileError; + } } - event_msg_->Clear(); + debug_state->event_msg->Clear(); return kNoError; } int AudioProcessingImpl::WriteInitMessage() { - event_msg_->set_type(audioproc::Event::INIT); - audioproc::Init* msg = event_msg_->mutable_init(); - msg->set_sample_rate(api_format_.input_stream().sample_rate_hz()); - msg->set_num_input_channels(api_format_.input_stream().num_channels()); - msg->set_num_output_channels(api_format_.output_stream().num_channels()); - msg->set_num_reverse_channels( - api_format_.reverse_input_stream().num_channels()); + debug_dump_.capture.event_msg->set_type(audioproc::Event::INIT); + audioproc::Init* msg = debug_dump_.capture.event_msg->mutable_init(); + msg->set_sample_rate(formats_.api_format.input_stream().sample_rate_hz()); + + msg->set_num_input_channels(static_cast<google::protobuf::int32>( + formats_.api_format.input_stream().num_channels())); + msg->set_num_output_channels(static_cast<google::protobuf::int32>( + formats_.api_format.output_stream().num_channels())); + msg->set_num_reverse_channels(static_cast<google::protobuf::int32>( + formats_.api_format.reverse_input_stream().num_channels())); msg->set_reverse_sample_rate( - api_format_.reverse_input_stream().sample_rate_hz()); - msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz()); - // TODO(ekmeyerson): Add reverse output fields to event_msg_. - - RETURN_ON_ERR(WriteMessageToDebugFile()); + formats_.api_format.reverse_input_stream().sample_rate_hz()); + msg->set_output_sample_rate( + formats_.api_format.output_stream().sample_rate_hz()); + // TODO(ekmeyerson): Add reverse output fields to + // debug_dump_.capture.event_msg. + + RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), + &crit_debug_, &debug_dump_.capture)); return kNoError; } int AudioProcessingImpl::WriteConfigMessage(bool forced) { audioproc::Config config; - config.set_aec_enabled(echo_cancellation_->is_enabled()); + config.set_aec_enabled(public_submodules_->echo_cancellation->is_enabled()); config.set_aec_delay_agnostic_enabled( - echo_cancellation_->is_delay_agnostic_enabled()); + public_submodules_->echo_cancellation->is_delay_agnostic_enabled()); config.set_aec_drift_compensation_enabled( - echo_cancellation_->is_drift_compensation_enabled()); + public_submodules_->echo_cancellation->is_drift_compensation_enabled()); config.set_aec_extended_filter_enabled( - echo_cancellation_->is_extended_filter_enabled()); - config.set_aec_suppression_level( - static_cast<int>(echo_cancellation_->suppression_level())); + public_submodules_->echo_cancellation->is_extended_filter_enabled()); + config.set_aec_suppression_level(static_cast<int>( + public_submodules_->echo_cancellation->suppression_level())); - config.set_aecm_enabled(echo_control_mobile_->is_enabled()); + config.set_aecm_enabled( + public_submodules_->echo_control_mobile->is_enabled()); config.set_aecm_comfort_noise_enabled( - echo_control_mobile_->is_comfort_noise_enabled()); - config.set_aecm_routing_mode( - static_cast<int>(echo_control_mobile_->routing_mode())); + public_submodules_->echo_control_mobile->is_comfort_noise_enabled()); + config.set_aecm_routing_mode(static_cast<int>( + public_submodules_->echo_control_mobile->routing_mode())); - config.set_agc_enabled(gain_control_->is_enabled()); - config.set_agc_mode(static_cast<int>(gain_control_->mode())); - config.set_agc_limiter_enabled(gain_control_->is_limiter_enabled()); - config.set_noise_robust_agc_enabled(use_new_agc_); + config.set_agc_enabled(public_submodules_->gain_control->is_enabled()); + config.set_agc_mode( + static_cast<int>(public_submodules_->gain_control->mode())); + config.set_agc_limiter_enabled( + public_submodules_->gain_control->is_limiter_enabled()); + config.set_noise_robust_agc_enabled(constants_.use_new_agc); - config.set_hpf_enabled(high_pass_filter_->is_enabled()); + config.set_hpf_enabled(public_submodules_->high_pass_filter->is_enabled()); - config.set_ns_enabled(noise_suppression_->is_enabled()); - config.set_ns_level(static_cast<int>(noise_suppression_->level())); + config.set_ns_enabled(public_submodules_->noise_suppression->is_enabled()); + config.set_ns_level( + static_cast<int>(public_submodules_->noise_suppression->level())); - config.set_transient_suppression_enabled(transient_suppressor_enabled_); + config.set_transient_suppression_enabled( + capture_.transient_suppressor_enabled); std::string serialized_config = config.SerializeAsString(); - if (!forced && last_serialized_config_ == serialized_config) { + if (!forced && + debug_dump_.capture.last_serialized_config == serialized_config) { return kNoError; } - last_serialized_config_ = serialized_config; + debug_dump_.capture.last_serialized_config = serialized_config; - event_msg_->set_type(audioproc::Event::CONFIG); - event_msg_->mutable_config()->CopyFrom(config); + debug_dump_.capture.event_msg->set_type(audioproc::Event::CONFIG); + debug_dump_.capture.event_msg->mutable_config()->CopyFrom(config); - RETURN_ON_ERR(WriteMessageToDebugFile()); + RETURN_ON_ERR(WriteMessageToDebugFile(debug_dump_.debug_file.get(), + &crit_debug_, &debug_dump_.capture)); return kNoError; } #endif // WEBRTC_AUDIOPROC_DEBUG_DUMP diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index 542886ee10..b310896903 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -15,50 +15,38 @@ #include <string> #include <vector> +#include "webrtc/base/criticalsection.h" #include "webrtc/base/scoped_ptr.h" #include "webrtc/base/thread_annotations.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/system_wrappers/include/file_wrapper.h" + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP +// Files generated at build-time by the protobuf compiler. +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "webrtc/audio_processing/debug.pb.h" +#endif +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP namespace webrtc { class AgcManagerDirect; -class AudioBuffer; class AudioConverter; template<typename T> class Beamformer; -class CriticalSectionWrapper; -class EchoCancellationImpl; -class EchoControlMobileImpl; -class FileWrapper; -class GainControlImpl; -class GainControlForNewAgc; -class HighPassFilterImpl; -class LevelEstimatorImpl; -class NoiseSuppressionImpl; -class ProcessingComponent; -class TransientSuppressor; -class VoiceDetectionImpl; -class IntelligibilityEnhancer; - -#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP -namespace audioproc { - -class Event; - -} // namespace audioproc -#endif - class AudioProcessingImpl : public AudioProcessing { public: + // Methods forcing APM to run in a single-threaded manner. + // Acquires both the render and capture locks. explicit AudioProcessingImpl(const Config& config); - // AudioProcessingImpl takes ownership of beamformer. AudioProcessingImpl(const Config& config, Beamformer<float>* beamformer); virtual ~AudioProcessingImpl(); - - // AudioProcessing methods. int Initialize() override; int Initialize(int input_sample_rate_hz, int output_sample_rate_hz, @@ -68,12 +56,14 @@ class AudioProcessingImpl : public AudioProcessing { ChannelLayout reverse_layout) override; int Initialize(const ProcessingConfig& processing_config) override; void SetExtraOptions(const Config& config) override; - int proc_sample_rate_hz() const override; - int proc_split_sample_rate_hz() const override; - int num_input_channels() const override; - int num_output_channels() const override; - int num_reverse_channels() const override; - void set_output_will_be_muted(bool muted) override; + void UpdateHistogramsOnCallEnd() override; + int StartDebugRecording(const char filename[kMaxFilenameSize]) override; + int StartDebugRecording(FILE* handle) override; + int StartDebugRecordingForPlatformFile(rtc::PlatformFile handle) override; + int StopDebugRecording() override; + + // Capture-side exclusive methods possibly running APM in a + // multi-threaded manner. Acquire the capture lock. int ProcessStream(AudioFrame* frame) override; int ProcessStream(const float* const* src, size_t samples_per_channel, @@ -86,6 +76,15 @@ class AudioProcessingImpl : public AudioProcessing { const StreamConfig& input_config, const StreamConfig& output_config, float* const* dest) override; + void set_output_will_be_muted(bool muted) override; + int set_stream_delay_ms(int delay) override; + void set_delay_offset_ms(int offset) override; + int delay_offset_ms() const override; + void set_stream_key_pressed(bool key_pressed) override; + int input_sample_rate_hz() const override; + + // Render-side exclusive methods possibly running APM in a + // multi-threaded manner. Acquire the render lock. int AnalyzeReverseStream(AudioFrame* frame) override; int ProcessReverseStream(AudioFrame* frame) override; int AnalyzeReverseStream(const float* const* data, @@ -96,17 +95,25 @@ class AudioProcessingImpl : public AudioProcessing { const StreamConfig& reverse_input_config, const StreamConfig& reverse_output_config, float* const* dest) override; - int set_stream_delay_ms(int delay) override; + + // Methods only accessed from APM submodules or + // from AudioProcessing tests in a single-threaded manner. + // Hence there is no need for locks in these. + int proc_sample_rate_hz() const override; + int proc_split_sample_rate_hz() const override; + size_t num_input_channels() const override; + size_t num_proc_channels() const override; + size_t num_output_channels() const override; + size_t num_reverse_channels() const override; int stream_delay_ms() const override; - bool was_stream_delay_set() const override; - void set_delay_offset_ms(int offset) override; - int delay_offset_ms() const override; - void set_stream_key_pressed(bool key_pressed) override; - int StartDebugRecording(const char filename[kMaxFilenameSize]) override; - int StartDebugRecording(FILE* handle) override; - int StartDebugRecordingForPlatformFile(rtc::PlatformFile handle) override; - int StopDebugRecording() override; - void UpdateHistogramsOnCallEnd() override; + bool was_stream_delay_set() const override + EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + // Methods returning pointers to APM submodules. + // No locks are aquired in those, as those locks + // would offer no protection (the submodules are + // created only once in a single-treaded manner + // during APM creation). EchoCancellation* echo_cancellation() const override; EchoControlMobile* echo_control_mobile() const override; GainControl* gain_control() const override; @@ -117,101 +124,216 @@ class AudioProcessingImpl : public AudioProcessing { protected: // Overridden in a mock. - virtual int InitializeLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); + virtual int InitializeLocked() + EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); private: + struct ApmPublicSubmodules; + struct ApmPrivateSubmodules; + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + // State for the debug dump. + struct ApmDebugDumpThreadState { + ApmDebugDumpThreadState() : event_msg(new audioproc::Event()) {} + rtc::scoped_ptr<audioproc::Event> event_msg; // Protobuf message. + std::string event_str; // Memory for protobuf serialization. + + // Serialized string of last saved APM configuration. + std::string last_serialized_config; + }; + + struct ApmDebugDumpState { + ApmDebugDumpState() : debug_file(FileWrapper::Create()) {} + rtc::scoped_ptr<FileWrapper> debug_file; + ApmDebugDumpThreadState render; + ApmDebugDumpThreadState capture; + }; +#endif + + // Method for modifying the formats struct that are called from both + // the render and capture threads. The check for whether modifications + // are needed is done while holding the render lock only, thereby avoiding + // that the capture thread blocks the render thread. + // The struct is modified in a single-threaded manner by holding both the + // render and capture locks. + int MaybeInitialize(const ProcessingConfig& config) + EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + + int MaybeInitializeRender(const ProcessingConfig& processing_config) + EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + + int MaybeInitializeCapture(const ProcessingConfig& processing_config) + EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + + // Method for checking for the need of conversion. Accesses the formats + // structs in a read manner but the requirement for the render lock to be held + // was added as it currently anyway is always called in that manner. + bool rev_conversion_needed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + bool render_check_rev_conversion_needed() const + EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + + // Methods requiring APM running in a single-threaded manner. + // Are called with both the render and capture locks already + // acquired. + void InitializeExperimentalAgc() + EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void InitializeTransient() + EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void InitializeBeamformer() + EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void InitializeIntelligibility() + EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + void InitializeHighPassFilter() + EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void InitializeNoiseSuppression() + EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void InitializeLevelEstimator() + EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void InitializeVoiceDetection() + EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); int InitializeLocked(const ProcessingConfig& config) - EXCLUSIVE_LOCKS_REQUIRED(crit_); - int MaybeInitializeLocked(const ProcessingConfig& config) - EXCLUSIVE_LOCKS_REQUIRED(crit_); + EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); + + // Capture-side exclusive methods possibly running APM in a multi-threaded + // manner that are called with the render lock already acquired. + int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + bool output_copy_needed(bool is_data_processed) const + EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + bool is_data_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + bool synthesis_needed(bool is_data_processed) const + EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + bool analysis_needed(bool is_data_processed) const + EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); + + // Render-side exclusive methods possibly running APM in a multi-threaded + // manner that are called with the render lock already acquired. // TODO(ekm): Remove once all clients updated to new interface. - int AnalyzeReverseStream(const float* const* src, - const StreamConfig& input_config, - const StreamConfig& output_config); - int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); - int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); - - bool is_data_processed() const; - bool output_copy_needed(bool is_data_processed) const; - bool synthesis_needed(bool is_data_processed) const; - bool analysis_needed(bool is_data_processed) const; - bool is_rev_processed() const; - bool rev_conversion_needed() const; - void InitializeExperimentalAgc() EXCLUSIVE_LOCKS_REQUIRED(crit_); - void InitializeTransient() EXCLUSIVE_LOCKS_REQUIRED(crit_); - void InitializeBeamformer() EXCLUSIVE_LOCKS_REQUIRED(crit_); - void InitializeIntelligibility() EXCLUSIVE_LOCKS_REQUIRED(crit_); - void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_); - - EchoCancellationImpl* echo_cancellation_; - EchoControlMobileImpl* echo_control_mobile_; - GainControlImpl* gain_control_; - HighPassFilterImpl* high_pass_filter_; - LevelEstimatorImpl* level_estimator_; - NoiseSuppressionImpl* noise_suppression_; - VoiceDetectionImpl* voice_detection_; - rtc::scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc_; - - std::list<ProcessingComponent*> component_list_; - CriticalSectionWrapper* crit_; - rtc::scoped_ptr<AudioBuffer> render_audio_; - rtc::scoped_ptr<AudioBuffer> capture_audio_; - rtc::scoped_ptr<AudioConverter> render_converter_; + int AnalyzeReverseStreamLocked(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config) + EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + bool is_rev_processed() const EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_render_); + +// Debug dump methods that are internal and called without locks. +// TODO(peah): Make thread safe. #ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP // TODO(andrew): make this more graceful. Ideally we would split this stuff // out into a separate class with an "enabled" and "disabled" implementation. - int WriteMessageToDebugFile(); - int WriteInitMessage(); + static int WriteMessageToDebugFile(FileWrapper* debug_file, + rtc::CriticalSection* crit_debug, + ApmDebugDumpThreadState* debug_state); + int WriteInitMessage() EXCLUSIVE_LOCKS_REQUIRED(crit_render_, crit_capture_); // Writes Config message. If not |forced|, only writes the current config if // it is different from the last saved one; if |forced|, writes the config // regardless of the last saved. - int WriteConfigMessage(bool forced); + int WriteConfigMessage(bool forced) EXCLUSIVE_LOCKS_REQUIRED(crit_capture_) + EXCLUSIVE_LOCKS_REQUIRED(crit_capture_); - rtc::scoped_ptr<FileWrapper> debug_file_; - rtc::scoped_ptr<audioproc::Event> event_msg_; // Protobuf message. - std::string event_str_; // Memory for protobuf serialization. + // Critical section. + mutable rtc::CriticalSection crit_debug_; - // Serialized string of last saved APM configuration. - std::string last_serialized_config_; + // Debug dump state. + ApmDebugDumpState debug_dump_; #endif - // Format of processing streams at input/output call sites. - ProcessingConfig api_format_; - - // Only the rate and samples fields of fwd_proc_format_ are used because the - // forward processing number of channels is mutable and is tracked by the - // capture_audio_. - StreamConfig fwd_proc_format_; - StreamConfig rev_proc_format_; - int split_rate_; - - int stream_delay_ms_; - int delay_offset_ms_; - bool was_stream_delay_set_; - int last_stream_delay_ms_; - int last_aec_system_delay_ms_; - int stream_delay_jumps_; - int aec_system_delay_jumps_; - - bool output_will_be_muted_ GUARDED_BY(crit_); - - bool key_pressed_; - - // Only set through the constructor's Config parameter. - const bool use_new_agc_; - rtc::scoped_ptr<AgcManagerDirect> agc_manager_ GUARDED_BY(crit_); - int agc_startup_min_volume_; - - bool transient_suppressor_enabled_; - rtc::scoped_ptr<TransientSuppressor> transient_suppressor_; - const bool beamformer_enabled_; - rtc::scoped_ptr<Beamformer<float>> beamformer_; - const std::vector<Point> array_geometry_; - const SphericalPointf target_direction_; - - bool intelligibility_enabled_; - rtc::scoped_ptr<IntelligibilityEnhancer> intelligibility_enhancer_; + // Critical sections. + mutable rtc::CriticalSection crit_render_ ACQUIRED_BEFORE(crit_capture_); + mutable rtc::CriticalSection crit_capture_; + + // Structs containing the pointers to the submodules. + rtc::scoped_ptr<ApmPublicSubmodules> public_submodules_; + rtc::scoped_ptr<ApmPrivateSubmodules> private_submodules_ + GUARDED_BY(crit_capture_); + + // State that is written to while holding both the render and capture locks + // but can be read without any lock being held. + // As this is only accessed internally of APM, and all internal methods in APM + // either are holding the render or capture locks, this construct is safe as + // it is not possible to read the variables while writing them. + struct ApmFormatState { + ApmFormatState() + : // Format of processing streams at input/output call sites. + api_format({{{kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}}}), + rev_proc_format(kSampleRate16kHz, 1) {} + ProcessingConfig api_format; + StreamConfig rev_proc_format; + } formats_; + + // APM constants. + const struct ApmConstants { + ApmConstants(int agc_startup_min_volume, + bool use_new_agc, + bool intelligibility_enabled) + : // Format of processing streams at input/output call sites. + agc_startup_min_volume(agc_startup_min_volume), + use_new_agc(use_new_agc), + intelligibility_enabled(intelligibility_enabled) {} + int agc_startup_min_volume; + bool use_new_agc; + bool intelligibility_enabled; + } constants_; + + struct ApmCaptureState { + ApmCaptureState(bool transient_suppressor_enabled, + const std::vector<Point>& array_geometry, + SphericalPointf target_direction) + : aec_system_delay_jumps(-1), + delay_offset_ms(0), + was_stream_delay_set(false), + last_stream_delay_ms(0), + last_aec_system_delay_ms(0), + stream_delay_jumps(-1), + output_will_be_muted(false), + key_pressed(false), + transient_suppressor_enabled(transient_suppressor_enabled), + array_geometry(array_geometry), + target_direction(target_direction), + fwd_proc_format(kSampleRate16kHz), + split_rate(kSampleRate16kHz) {} + int aec_system_delay_jumps; + int delay_offset_ms; + bool was_stream_delay_set; + int last_stream_delay_ms; + int last_aec_system_delay_ms; + int stream_delay_jumps; + bool output_will_be_muted; + bool key_pressed; + bool transient_suppressor_enabled; + std::vector<Point> array_geometry; + SphericalPointf target_direction; + rtc::scoped_ptr<AudioBuffer> capture_audio; + // Only the rate and samples fields of fwd_proc_format_ are used because the + // forward processing number of channels is mutable and is tracked by the + // capture_audio_. + StreamConfig fwd_proc_format; + int split_rate; + } capture_ GUARDED_BY(crit_capture_); + + struct ApmCaptureNonLockedState { + ApmCaptureNonLockedState(bool beamformer_enabled) + : fwd_proc_format(kSampleRate16kHz), + split_rate(kSampleRate16kHz), + stream_delay_ms(0), + beamformer_enabled(beamformer_enabled) {} + // Only the rate and samples fields of fwd_proc_format_ are used because the + // forward processing number of channels is mutable and is tracked by the + // capture_audio_. + StreamConfig fwd_proc_format; + int split_rate; + int stream_delay_ms; + bool beamformer_enabled; + } capture_nonlocked_; + + struct ApmRenderState { + rtc::scoped_ptr<AudioConverter> render_converter; + rtc::scoped_ptr<AudioBuffer> render_audio; + } render_ GUARDED_BY(crit_render_); }; } // namespace webrtc diff --git a/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc b/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc new file mode 100644 index 0000000000..e1e6a310a5 --- /dev/null +++ b/webrtc/modules/audio_processing/audio_processing_impl_locking_unittest.cc @@ -0,0 +1,1133 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/audio_processing_impl.h" + +#include <algorithm> +#include <vector> + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/base/array_view.h" +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/event.h" +#include "webrtc/base/platform_thread.h" +#include "webrtc/base/random.h" +#include "webrtc/config.h" +#include "webrtc/modules/audio_processing/test/test_utils.h" +#include "webrtc/modules/include/module_common_types.h" +#include "webrtc/system_wrappers/include/sleep.h" + +namespace webrtc { + +namespace { + +class AudioProcessingImplLockTest; + +// Type of the render thread APM API call to use in the test. +enum class RenderApiImpl { + ProcessReverseStreamImpl1, + ProcessReverseStreamImpl2, + AnalyzeReverseStreamImpl1, + AnalyzeReverseStreamImpl2 +}; + +// Type of the capture thread APM API call to use in the test. +enum class CaptureApiImpl { + ProcessStreamImpl1, + ProcessStreamImpl2, + ProcessStreamImpl3 +}; + +// The runtime parameter setting scheme to use in the test. +enum class RuntimeParameterSettingScheme { + SparseStreamMetadataChangeScheme, + ExtremeStreamMetadataChangeScheme, + FixedMonoStreamMetadataScheme, + FixedStereoStreamMetadataScheme +}; + +// Variant of echo canceller settings to use in the test. +enum class AecType { + BasicWebRtcAecSettings, + AecTurnedOff, + BasicWebRtcAecSettingsWithExtentedFilter, + BasicWebRtcAecSettingsWithDelayAgnosticAec, + BasicWebRtcAecSettingsWithAecMobile +}; + +// Thread-safe random number generator wrapper. +class RandomGenerator { + public: + RandomGenerator() : rand_gen_(42U) {} + + int RandInt(int min, int max) { + rtc::CritScope cs(&crit_); + return rand_gen_.Rand(min, max); + } + + int RandInt(int max) { + rtc::CritScope cs(&crit_); + return rand_gen_.Rand(max); + } + + float RandFloat() { + rtc::CritScope cs(&crit_); + return rand_gen_.Rand<float>(); + } + + private: + rtc::CriticalSection crit_; + Random rand_gen_ GUARDED_BY(crit_); +}; + +// Variables related to the audio data and formats. +struct AudioFrameData { + explicit AudioFrameData(int max_frame_size) { + // Set up the two-dimensional arrays needed for the APM API calls. + input_framechannels.resize(2 * max_frame_size); + input_frame.resize(2); + input_frame[0] = &input_framechannels[0]; + input_frame[1] = &input_framechannels[max_frame_size]; + + output_frame_channels.resize(2 * max_frame_size); + output_frame.resize(2); + output_frame[0] = &output_frame_channels[0]; + output_frame[1] = &output_frame_channels[max_frame_size]; + } + + AudioFrame frame; + std::vector<float*> output_frame; + std::vector<float> output_frame_channels; + AudioProcessing::ChannelLayout output_channel_layout = + AudioProcessing::ChannelLayout::kMono; + int input_sample_rate_hz = 16000; + int input_number_of_channels = -1; + std::vector<float*> input_frame; + std::vector<float> input_framechannels; + AudioProcessing::ChannelLayout input_channel_layout = + AudioProcessing::ChannelLayout::kMono; + int output_sample_rate_hz = 16000; + int output_number_of_channels = -1; + StreamConfig input_stream_config; + StreamConfig output_stream_config; + int input_samples_per_channel = -1; + int output_samples_per_channel = -1; +}; + +// The configuration for the test. +struct TestConfig { + // Test case generator for the test configurations to use in the brief tests. + static std::vector<TestConfig> GenerateBriefTestConfigs() { + std::vector<TestConfig> test_configs; + AecType aec_types[] = {AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec, + AecType::BasicWebRtcAecSettingsWithAecMobile}; + for (auto aec_type : aec_types) { + TestConfig test_config; + test_config.aec_type = aec_type; + + test_config.min_number_of_calls = 300; + + // Perform tests only with the extreme runtime parameter setting scheme. + test_config.runtime_parameter_setting_scheme = + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme; + + // Only test 16 kHz for this test suite. + test_config.initial_sample_rate_hz = 16000; + + // Create test config for the second processing API function set. + test_config.render_api_function = + RenderApiImpl::ProcessReverseStreamImpl2; + test_config.capture_api_function = CaptureApiImpl::ProcessStreamImpl2; + + // Create test config for the first processing API function set. + test_configs.push_back(test_config); + test_config.render_api_function = + RenderApiImpl::AnalyzeReverseStreamImpl2; + test_config.capture_api_function = CaptureApiImpl::ProcessStreamImpl3; + test_configs.push_back(test_config); + } + + // Return the created test configurations. + return test_configs; + } + + // Test case generator for the test configurations to use in the extensive + // tests. + static std::vector<TestConfig> GenerateExtensiveTestConfigs() { + // Lambda functions for the test config generation. + auto add_processing_apis = [](TestConfig test_config) { + struct AllowedApiCallCombinations { + RenderApiImpl render_api; + CaptureApiImpl capture_api; + }; + + const AllowedApiCallCombinations api_calls[] = { + {RenderApiImpl::ProcessReverseStreamImpl1, + CaptureApiImpl::ProcessStreamImpl1}, + {RenderApiImpl::AnalyzeReverseStreamImpl1, + CaptureApiImpl::ProcessStreamImpl1}, + {RenderApiImpl::ProcessReverseStreamImpl2, + CaptureApiImpl::ProcessStreamImpl2}, + {RenderApiImpl::ProcessReverseStreamImpl2, + CaptureApiImpl::ProcessStreamImpl3}, + {RenderApiImpl::AnalyzeReverseStreamImpl2, + CaptureApiImpl::ProcessStreamImpl2}, + {RenderApiImpl::AnalyzeReverseStreamImpl2, + CaptureApiImpl::ProcessStreamImpl3}}; + std::vector<TestConfig> out; + for (auto api_call : api_calls) { + test_config.render_api_function = api_call.render_api; + test_config.capture_api_function = api_call.capture_api; + out.push_back(test_config); + } + return out; + }; + + auto add_aec_settings = [](const std::vector<TestConfig>& in) { + std::vector<TestConfig> out; + AecType aec_types[] = { + AecType::BasicWebRtcAecSettings, AecType::AecTurnedOff, + AecType::BasicWebRtcAecSettingsWithExtentedFilter, + AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec, + AecType::BasicWebRtcAecSettingsWithAecMobile}; + for (auto test_config : in) { + for (auto aec_type : aec_types) { + test_config.aec_type = aec_type; + out.push_back(test_config); + } + } + return out; + }; + + auto add_settings_scheme = [](const std::vector<TestConfig>& in) { + std::vector<TestConfig> out; + RuntimeParameterSettingScheme schemes[] = { + RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme, + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme, + RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme, + RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme}; + + for (auto test_config : in) { + for (auto scheme : schemes) { + test_config.runtime_parameter_setting_scheme = scheme; + out.push_back(test_config); + } + } + return out; + }; + + auto add_sample_rates = [](const std::vector<TestConfig>& in) { + const int sample_rates[] = {8000, 16000, 32000, 48000}; + + std::vector<TestConfig> out; + for (auto test_config : in) { + auto available_rates = + (test_config.aec_type == + AecType::BasicWebRtcAecSettingsWithAecMobile + ? rtc::ArrayView<const int>(sample_rates, 2) + : rtc::ArrayView<const int>(sample_rates)); + + for (auto rate : available_rates) { + test_config.initial_sample_rate_hz = rate; + out.push_back(test_config); + } + } + return out; + }; + + // Generate test configurations of the relevant combinations of the + // parameters to + // test. + TestConfig test_config; + test_config.min_number_of_calls = 10000; + return add_sample_rates(add_settings_scheme( + add_aec_settings(add_processing_apis(test_config)))); + } + + RenderApiImpl render_api_function = RenderApiImpl::ProcessReverseStreamImpl2; + CaptureApiImpl capture_api_function = CaptureApiImpl::ProcessStreamImpl2; + RuntimeParameterSettingScheme runtime_parameter_setting_scheme = + RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme; + int initial_sample_rate_hz = 16000; + AecType aec_type = AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec; + int min_number_of_calls = 300; +}; + +// Handler for the frame counters. +class FrameCounters { + public: + void IncreaseRenderCounter() { + rtc::CritScope cs(&crit_); + render_count++; + } + + void IncreaseCaptureCounter() { + rtc::CritScope cs(&crit_); + capture_count++; + } + + int GetCaptureCounter() const { + rtc::CritScope cs(&crit_); + return capture_count; + } + + int GetRenderCounter() const { + rtc::CritScope cs(&crit_); + return render_count; + } + + int CaptureMinusRenderCounters() const { + rtc::CritScope cs(&crit_); + return capture_count - render_count; + } + + int RenderMinusCaptureCounters() const { + return -CaptureMinusRenderCounters(); + } + + bool BothCountersExceedeThreshold(int threshold) { + rtc::CritScope cs(&crit_); + return (render_count > threshold && capture_count > threshold); + } + + private: + mutable rtc::CriticalSection crit_; + int render_count GUARDED_BY(crit_) = 0; + int capture_count GUARDED_BY(crit_) = 0; +}; + +// Class for handling the capture side processing. +class CaptureProcessor { + public: + CaptureProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + AudioProcessingImplLockTest* test_framework, + TestConfig* test_config, + AudioProcessing* apm); + bool Process(); + + private: + static const int kMaxCallDifference = 10; + static const float kCaptureInputFloatLevel; + static const int kCaptureInputFixLevel = 1024; + + void PrepareFrame(); + void CallApmCaptureSide(); + void ApplyRuntimeSettingScheme(); + + RandomGenerator* const rand_gen_ = nullptr; + rtc::Event* const render_call_event_ = nullptr; + rtc::Event* const capture_call_event_ = nullptr; + FrameCounters* const frame_counters_ = nullptr; + AudioProcessingImplLockTest* const test_ = nullptr; + const TestConfig* const test_config_ = nullptr; + AudioProcessing* const apm_ = nullptr; + AudioFrameData frame_data_; +}; + +// Class for handling the stats processing. +class StatsProcessor { + public: + StatsProcessor(RandomGenerator* rand_gen, + TestConfig* test_config, + AudioProcessing* apm); + bool Process(); + + private: + RandomGenerator* rand_gen_ = nullptr; + TestConfig* test_config_ = nullptr; + AudioProcessing* apm_ = nullptr; +}; + +// Class for handling the render side processing. +class RenderProcessor { + public: + RenderProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + AudioProcessingImplLockTest* test_framework, + TestConfig* test_config, + AudioProcessing* apm); + bool Process(); + + private: + static const int kMaxCallDifference = 10; + static const int kRenderInputFixLevel = 16384; + static const float kRenderInputFloatLevel; + + void PrepareFrame(); + void CallApmRenderSide(); + void ApplyRuntimeSettingScheme(); + + RandomGenerator* const rand_gen_ = nullptr; + rtc::Event* const render_call_event_ = nullptr; + rtc::Event* const capture_call_event_ = nullptr; + FrameCounters* const frame_counters_ = nullptr; + AudioProcessingImplLockTest* const test_ = nullptr; + const TestConfig* const test_config_ = nullptr; + AudioProcessing* const apm_ = nullptr; + AudioFrameData frame_data_; + bool first_render_call_ = true; +}; + +class AudioProcessingImplLockTest + : public ::testing::TestWithParam<TestConfig> { + public: + AudioProcessingImplLockTest(); + bool RunTest(); + bool MaybeEndTest(); + + private: + static const int kTestTimeOutLimit = 10 * 60 * 1000; + static const int kMaxFrameSize = 480; + + // ::testing::TestWithParam<> implementation + void SetUp() override; + void TearDown() override; + + // Thread callback for the render thread + static bool RenderProcessorThreadFunc(void* context) { + return reinterpret_cast<AudioProcessingImplLockTest*>(context) + ->render_thread_state_.Process(); + } + + // Thread callback for the capture thread + static bool CaptureProcessorThreadFunc(void* context) { + return reinterpret_cast<AudioProcessingImplLockTest*>(context) + ->capture_thread_state_.Process(); + } + + // Thread callback for the stats thread + static bool StatsProcessorThreadFunc(void* context) { + return reinterpret_cast<AudioProcessingImplLockTest*>(context) + ->stats_thread_state_.Process(); + } + + // Tests whether all the required render and capture side calls have been + // done. + bool TestDone() { + return frame_counters_.BothCountersExceedeThreshold( + test_config_.min_number_of_calls); + } + + // Start the threads used in the test. + void StartThreads() { + render_thread_.Start(); + render_thread_.SetPriority(rtc::kRealtimePriority); + capture_thread_.Start(); + capture_thread_.SetPriority(rtc::kRealtimePriority); + stats_thread_.Start(); + stats_thread_.SetPriority(rtc::kNormalPriority); + } + + // Event handlers for the test. + rtc::Event test_complete_; + rtc::Event render_call_event_; + rtc::Event capture_call_event_; + + // Thread related variables. + rtc::PlatformThread render_thread_; + rtc::PlatformThread capture_thread_; + rtc::PlatformThread stats_thread_; + mutable RandomGenerator rand_gen_; + + rtc::scoped_ptr<AudioProcessing> apm_; + TestConfig test_config_; + FrameCounters frame_counters_; + RenderProcessor render_thread_state_; + CaptureProcessor capture_thread_state_; + StatsProcessor stats_thread_state_; +}; + +// Sleeps a random time between 0 and max_sleep milliseconds. +void SleepRandomMs(int max_sleep, RandomGenerator* rand_gen) { + int sleeptime = rand_gen->RandInt(0, max_sleep); + SleepMs(sleeptime); +} + +// Populates a float audio frame with random data. +void PopulateAudioFrame(float** frame, + float amplitude, + size_t num_channels, + size_t samples_per_channel, + RandomGenerator* rand_gen) { + for (size_t ch = 0; ch < num_channels; ch++) { + for (size_t k = 0; k < samples_per_channel; k++) { + // Store random 16 bit quantized float number between +-amplitude. + frame[ch][k] = amplitude * (2 * rand_gen->RandFloat() - 1); + } + } +} + +// Populates an audioframe frame of AudioFrame type with random data. +void PopulateAudioFrame(AudioFrame* frame, + int16_t amplitude, + RandomGenerator* rand_gen) { + ASSERT_GT(amplitude, 0); + ASSERT_LE(amplitude, 32767); + for (size_t ch = 0; ch < frame->num_channels_; ch++) { + for (size_t k = 0; k < frame->samples_per_channel_; k++) { + // Store random 16 bit number between -(amplitude+1) and + // amplitude. + frame->data_[k * ch] = + rand_gen->RandInt(2 * amplitude + 1) - amplitude - 1; + } + } +} + +AudioProcessingImplLockTest::AudioProcessingImplLockTest() + : test_complete_(false, false), + render_call_event_(false, false), + capture_call_event_(false, false), + render_thread_(RenderProcessorThreadFunc, this, "render"), + capture_thread_(CaptureProcessorThreadFunc, this, "capture"), + stats_thread_(StatsProcessorThreadFunc, this, "stats"), + apm_(AudioProcessingImpl::Create()), + render_thread_state_(kMaxFrameSize, + &rand_gen_, + &render_call_event_, + &capture_call_event_, + &frame_counters_, + this, + &test_config_, + apm_.get()), + capture_thread_state_(kMaxFrameSize, + &rand_gen_, + &render_call_event_, + &capture_call_event_, + &frame_counters_, + this, + &test_config_, + apm_.get()), + stats_thread_state_(&rand_gen_, &test_config_, apm_.get()) {} + +// Run the test with a timeout. +bool AudioProcessingImplLockTest::RunTest() { + StartThreads(); + return test_complete_.Wait(kTestTimeOutLimit); +} + +bool AudioProcessingImplLockTest::MaybeEndTest() { + if (HasFatalFailure() || TestDone()) { + test_complete_.Set(); + return true; + } + return false; +} + +// Setup of test and APM. +void AudioProcessingImplLockTest::SetUp() { + test_config_ = static_cast<TestConfig>(GetParam()); + + ASSERT_EQ(apm_->kNoError, apm_->level_estimator()->Enable(true)); + ASSERT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + + ASSERT_EQ(apm_->kNoError, + apm_->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + ASSERT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + + ASSERT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(true)); + ASSERT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); + + Config config; + if (test_config_.aec_type == AecType::AecTurnedOff) { + ASSERT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(false)); + ASSERT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false)); + } else if (test_config_.aec_type == + AecType::BasicWebRtcAecSettingsWithAecMobile) { + ASSERT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true)); + ASSERT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false)); + } else { + ASSERT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(false)); + ASSERT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + ASSERT_EQ(apm_->kNoError, apm_->echo_cancellation()->enable_metrics(true)); + ASSERT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_delay_logging(true)); + + config.Set<ExtendedFilter>( + new ExtendedFilter(test_config_.aec_type == + AecType::BasicWebRtcAecSettingsWithExtentedFilter)); + + config.Set<DelayAgnostic>( + new DelayAgnostic(test_config_.aec_type == + AecType::BasicWebRtcAecSettingsWithDelayAgnosticAec)); + + apm_->SetExtraOptions(config); + } +} + +void AudioProcessingImplLockTest::TearDown() { + render_call_event_.Set(); + capture_call_event_.Set(); + render_thread_.Stop(); + capture_thread_.Stop(); + stats_thread_.Stop(); +} + +StatsProcessor::StatsProcessor(RandomGenerator* rand_gen, + TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), test_config_(test_config), apm_(apm) {} + +// Implements the callback functionality for the statistics +// collection thread. +bool StatsProcessor::Process() { + SleepRandomMs(100, rand_gen_); + + EXPECT_EQ(apm_->echo_cancellation()->is_enabled(), + ((test_config_->aec_type != AecType::AecTurnedOff) && + (test_config_->aec_type != + AecType::BasicWebRtcAecSettingsWithAecMobile))); + apm_->echo_cancellation()->stream_drift_samples(); + EXPECT_EQ(apm_->echo_control_mobile()->is_enabled(), + (test_config_->aec_type != AecType::AecTurnedOff) && + (test_config_->aec_type == + AecType::BasicWebRtcAecSettingsWithAecMobile)); + EXPECT_TRUE(apm_->gain_control()->is_enabled()); + apm_->gain_control()->stream_analog_level(); + EXPECT_TRUE(apm_->noise_suppression()->is_enabled()); + + // The below return values are not testable. + apm_->noise_suppression()->speech_probability(); + apm_->voice_detection()->is_enabled(); + + return true; +} + +const float CaptureProcessor::kCaptureInputFloatLevel = 0.03125f; + +CaptureProcessor::CaptureProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + AudioProcessingImplLockTest* test_framework, + TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), + render_call_event_(render_call_event), + capture_call_event_(capture_call_event), + frame_counters_(shared_counters_state), + test_(test_framework), + test_config_(test_config), + apm_(apm), + frame_data_(max_frame_size) {} + +// Implements the callback functionality for the capture thread. +bool CaptureProcessor::Process() { + // Sleep a random time to simulate thread jitter. + SleepRandomMs(3, rand_gen_); + + // Check whether the test is done. + if (test_->MaybeEndTest()) { + return false; + } + + // Ensure that the number of render and capture calls do not + // differ too much. + if (frame_counters_->CaptureMinusRenderCounters() > kMaxCallDifference) { + render_call_event_->Wait(rtc::Event::kForever); + } + + // Apply any specified capture side APM non-processing runtime calls. + ApplyRuntimeSettingScheme(); + + // Apply the capture side processing call. + CallApmCaptureSide(); + + // Increase the number of capture-side calls. + frame_counters_->IncreaseCaptureCounter(); + + // Flag to the render thread that another capture API call has occurred + // by triggering this threads call event. + capture_call_event_->Set(); + + return true; +} + +// Prepares a frame with relevant audio data and metadata. +void CaptureProcessor::PrepareFrame() { + // Restrict to a common fixed sample rate if the AudioFrame + // interface is used. + if (test_config_->capture_api_function == + CaptureApiImpl::ProcessStreamImpl1) { + frame_data_.input_sample_rate_hz = test_config_->initial_sample_rate_hz; + frame_data_.output_sample_rate_hz = test_config_->initial_sample_rate_hz; + } + + // Prepare the audioframe data and metadata. + frame_data_.input_samples_per_channel = + frame_data_.input_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000; + frame_data_.frame.sample_rate_hz_ = frame_data_.input_sample_rate_hz; + frame_data_.frame.num_channels_ = frame_data_.input_number_of_channels; + frame_data_.frame.samples_per_channel_ = + frame_data_.input_samples_per_channel; + PopulateAudioFrame(&frame_data_.frame, kCaptureInputFixLevel, rand_gen_); + + // Prepare the float audio input data and metadata. + frame_data_.input_stream_config.set_sample_rate_hz( + frame_data_.input_sample_rate_hz); + frame_data_.input_stream_config.set_num_channels( + frame_data_.input_number_of_channels); + frame_data_.input_stream_config.set_has_keyboard(false); + PopulateAudioFrame(&frame_data_.input_frame[0], kCaptureInputFloatLevel, + frame_data_.input_number_of_channels, + frame_data_.input_samples_per_channel, rand_gen_); + frame_data_.input_channel_layout = + (frame_data_.input_number_of_channels == 1 + ? AudioProcessing::ChannelLayout::kMono + : AudioProcessing::ChannelLayout::kStereo); + + // Prepare the float audio output data and metadata. + frame_data_.output_samples_per_channel = + frame_data_.output_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000; + frame_data_.output_stream_config.set_sample_rate_hz( + frame_data_.output_sample_rate_hz); + frame_data_.output_stream_config.set_num_channels( + frame_data_.output_number_of_channels); + frame_data_.output_stream_config.set_has_keyboard(false); + frame_data_.output_channel_layout = + (frame_data_.output_number_of_channels == 1 + ? AudioProcessing::ChannelLayout::kMono + : AudioProcessing::ChannelLayout::kStereo); +} + +// Applies the capture side processing API call. +void CaptureProcessor::CallApmCaptureSide() { + // Prepare a proper capture side processing API call input. + PrepareFrame(); + + // Set the stream delay + apm_->set_stream_delay_ms(30); + + // Call the specified capture side API processing method. + int result = AudioProcessing::kNoError; + switch (test_config_->capture_api_function) { + case CaptureApiImpl::ProcessStreamImpl1: + result = apm_->ProcessStream(&frame_data_.frame); + break; + case CaptureApiImpl::ProcessStreamImpl2: + result = apm_->ProcessStream( + &frame_data_.input_frame[0], frame_data_.input_samples_per_channel, + frame_data_.input_sample_rate_hz, frame_data_.input_channel_layout, + frame_data_.output_sample_rate_hz, frame_data_.output_channel_layout, + &frame_data_.output_frame[0]); + break; + case CaptureApiImpl::ProcessStreamImpl3: + result = apm_->ProcessStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + break; + default: + FAIL(); + } + + // Check the return code for error. + ASSERT_EQ(AudioProcessing::kNoError, result); +} + +// Applies any runtime capture APM API calls and audio stream characteristics +// specified by the scheme for the test. +void CaptureProcessor::ApplyRuntimeSettingScheme() { + const int capture_count_local = frame_counters_->GetCaptureCounter(); + + // Update the number of channels and sample rates for the input and output. + // Note that the counts frequencies for when to set parameters + // are set using prime numbers in order to ensure that the + // permutation scheme in the parameter setting changes. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + if (capture_count_local == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (capture_count_local % 11 == 0) + frame_data_.input_sample_rate_hz = 32000; + else if (capture_count_local % 73 == 0) + frame_data_.input_sample_rate_hz = 48000; + else if (capture_count_local % 89 == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (capture_count_local % 97 == 0) + frame_data_.input_sample_rate_hz = 8000; + + if (capture_count_local == 0) + frame_data_.input_number_of_channels = 1; + else if (capture_count_local % 4 == 0) + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + + if (capture_count_local == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (capture_count_local % 5 == 0) + frame_data_.output_sample_rate_hz = 32000; + else if (capture_count_local % 47 == 0) + frame_data_.output_sample_rate_hz = 48000; + else if (capture_count_local % 53 == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (capture_count_local % 71 == 0) + frame_data_.output_sample_rate_hz = 8000; + + if (capture_count_local == 0) + frame_data_.output_number_of_channels = 1; + else if (capture_count_local % 8 == 0) + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + if (capture_count_local % 2 == 0) { + frame_data_.input_number_of_channels = 1; + frame_data_.input_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + } else { + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + if (frame_data_.input_sample_rate_hz == 8000) + frame_data_.input_sample_rate_hz = 16000; + else if (frame_data_.input_sample_rate_hz == 16000) + frame_data_.input_sample_rate_hz = 32000; + else if (frame_data_.input_sample_rate_hz == 32000) + frame_data_.input_sample_rate_hz = 48000; + else if (frame_data_.input_sample_rate_hz == 48000) + frame_data_.input_sample_rate_hz = 8000; + + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + if (frame_data_.output_sample_rate_hz == 8000) + frame_data_.output_sample_rate_hz = 16000; + else if (frame_data_.output_sample_rate_hz == 16000) + frame_data_.output_sample_rate_hz = 32000; + else if (frame_data_.output_sample_rate_hz == 32000) + frame_data_.output_sample_rate_hz = 48000; + else if (frame_data_.output_sample_rate_hz == 48000) + frame_data_.output_sample_rate_hz = 8000; + } + break; + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + if (capture_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + } + break; + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (capture_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 2; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 2; + } + break; + default: + FAIL(); + } + + // Call any specified runtime APM setter and + // getter calls. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (capture_count_local % 2 == 0) { + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm_->set_stream_delay_ms(30)); + apm_->set_stream_key_pressed(true); + apm_->set_delay_offset_ms(15); + EXPECT_EQ(apm_->delay_offset_ms(), 15); + } else { + ASSERT_EQ(AudioProcessing::Error::kNoError, + apm_->set_stream_delay_ms(50)); + apm_->set_stream_key_pressed(false); + apm_->set_delay_offset_ms(20); + EXPECT_EQ(apm_->delay_offset_ms(), 20); + apm_->delay_offset_ms(); + } + break; + default: + FAIL(); + } + + // Restric the number of output channels not to exceed + // the number of input channels. + frame_data_.output_number_of_channels = + std::min(frame_data_.output_number_of_channels, + frame_data_.input_number_of_channels); +} + +const float RenderProcessor::kRenderInputFloatLevel = 0.5f; + +RenderProcessor::RenderProcessor(int max_frame_size, + RandomGenerator* rand_gen, + rtc::Event* render_call_event, + rtc::Event* capture_call_event, + FrameCounters* shared_counters_state, + AudioProcessingImplLockTest* test_framework, + TestConfig* test_config, + AudioProcessing* apm) + : rand_gen_(rand_gen), + render_call_event_(render_call_event), + capture_call_event_(capture_call_event), + frame_counters_(shared_counters_state), + test_(test_framework), + test_config_(test_config), + apm_(apm), + frame_data_(max_frame_size) {} + +// Implements the callback functionality for the render thread. +bool RenderProcessor::Process() { + // Conditional wait to ensure that a capture call has been done + // before the first render call is performed (implicitly + // required by the APM API). + if (first_render_call_) { + capture_call_event_->Wait(rtc::Event::kForever); + first_render_call_ = false; + } + + // Sleep a random time to simulate thread jitter. + SleepRandomMs(3, rand_gen_); + + // Check whether the test is done. + if (test_->MaybeEndTest()) { + return false; + } + + // Ensure that the number of render and capture calls do not + // differ too much. + if (frame_counters_->RenderMinusCaptureCounters() > kMaxCallDifference) { + capture_call_event_->Wait(rtc::Event::kForever); + } + + // Apply any specified render side APM non-processing runtime calls. + ApplyRuntimeSettingScheme(); + + // Apply the render side processing call. + CallApmRenderSide(); + + // Increase the number of render-side calls. + frame_counters_->IncreaseRenderCounter(); + + // Flag to the capture thread that another render API call has occurred + // by triggering this threads call event. + render_call_event_->Set(); + return true; +} + +// Prepares the render side frame and the accompanying metadata +// with the appropriate information. +void RenderProcessor::PrepareFrame() { + // Restrict to a common fixed sample rate if the AudioFrame interface is + // used. + if ((test_config_->render_api_function == + RenderApiImpl::AnalyzeReverseStreamImpl1) || + (test_config_->render_api_function == + RenderApiImpl::ProcessReverseStreamImpl1) || + (test_config_->aec_type != + AecType::BasicWebRtcAecSettingsWithAecMobile)) { + frame_data_.input_sample_rate_hz = test_config_->initial_sample_rate_hz; + frame_data_.output_sample_rate_hz = test_config_->initial_sample_rate_hz; + } + + // Prepare the audioframe data and metadata + frame_data_.input_samples_per_channel = + frame_data_.input_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000; + frame_data_.frame.sample_rate_hz_ = frame_data_.input_sample_rate_hz; + frame_data_.frame.num_channels_ = frame_data_.input_number_of_channels; + frame_data_.frame.samples_per_channel_ = + frame_data_.input_samples_per_channel; + PopulateAudioFrame(&frame_data_.frame, kRenderInputFixLevel, rand_gen_); + + // Prepare the float audio input data and metadata. + frame_data_.input_stream_config.set_sample_rate_hz( + frame_data_.input_sample_rate_hz); + frame_data_.input_stream_config.set_num_channels( + frame_data_.input_number_of_channels); + frame_data_.input_stream_config.set_has_keyboard(false); + PopulateAudioFrame(&frame_data_.input_frame[0], kRenderInputFloatLevel, + frame_data_.input_number_of_channels, + frame_data_.input_samples_per_channel, rand_gen_); + frame_data_.input_channel_layout = + (frame_data_.input_number_of_channels == 1 + ? AudioProcessing::ChannelLayout::kMono + : AudioProcessing::ChannelLayout::kStereo); + + // Prepare the float audio output data and metadata. + frame_data_.output_samples_per_channel = + frame_data_.output_sample_rate_hz * AudioProcessing::kChunkSizeMs / 1000; + frame_data_.output_stream_config.set_sample_rate_hz( + frame_data_.output_sample_rate_hz); + frame_data_.output_stream_config.set_num_channels( + frame_data_.output_number_of_channels); + frame_data_.output_stream_config.set_has_keyboard(false); + frame_data_.output_channel_layout = + (frame_data_.output_number_of_channels == 1 + ? AudioProcessing::ChannelLayout::kMono + : AudioProcessing::ChannelLayout::kStereo); +} + +// Makes the render side processing API call. +void RenderProcessor::CallApmRenderSide() { + // Prepare a proper render side processing API call input. + PrepareFrame(); + + // Call the specified render side API processing method. + int result = AudioProcessing::kNoError; + switch (test_config_->render_api_function) { + case RenderApiImpl::ProcessReverseStreamImpl1: + result = apm_->ProcessReverseStream(&frame_data_.frame); + break; + case RenderApiImpl::ProcessReverseStreamImpl2: + result = apm_->ProcessReverseStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + break; + case RenderApiImpl::AnalyzeReverseStreamImpl1: + result = apm_->AnalyzeReverseStream(&frame_data_.frame); + break; + case RenderApiImpl::AnalyzeReverseStreamImpl2: + result = apm_->AnalyzeReverseStream( + &frame_data_.input_frame[0], frame_data_.input_samples_per_channel, + frame_data_.input_sample_rate_hz, frame_data_.input_channel_layout); + break; + default: + FAIL(); + } + + // Check the return code for error. + ASSERT_EQ(AudioProcessing::kNoError, result); +} + +// Applies any render capture side APM API calls and audio stream +// characteristics +// specified by the scheme for the test. +void RenderProcessor::ApplyRuntimeSettingScheme() { + const int render_count_local = frame_counters_->GetRenderCounter(); + + // Update the number of channels and sample rates for the input and output. + // Note that the counts frequencies for when to set parameters + // are set using prime numbers in order to ensure that the + // permutation scheme in the parameter setting changes. + switch (test_config_->runtime_parameter_setting_scheme) { + case RuntimeParameterSettingScheme::SparseStreamMetadataChangeScheme: + if (render_count_local == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (render_count_local % 47 == 0) + frame_data_.input_sample_rate_hz = 32000; + else if (render_count_local % 71 == 0) + frame_data_.input_sample_rate_hz = 48000; + else if (render_count_local % 79 == 0) + frame_data_.input_sample_rate_hz = 16000; + else if (render_count_local % 83 == 0) + frame_data_.input_sample_rate_hz = 8000; + + if (render_count_local == 0) + frame_data_.input_number_of_channels = 1; + else if (render_count_local % 4 == 0) + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + + if (render_count_local == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (render_count_local % 17 == 0) + frame_data_.output_sample_rate_hz = 32000; + else if (render_count_local % 19 == 0) + frame_data_.output_sample_rate_hz = 48000; + else if (render_count_local % 29 == 0) + frame_data_.output_sample_rate_hz = 16000; + else if (render_count_local % 61 == 0) + frame_data_.output_sample_rate_hz = 8000; + + if (render_count_local == 0) + frame_data_.output_number_of_channels = 1; + else if (render_count_local % 8 == 0) + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + break; + case RuntimeParameterSettingScheme::ExtremeStreamMetadataChangeScheme: + if (render_count_local == 0) { + frame_data_.input_number_of_channels = 1; + frame_data_.input_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + } else { + frame_data_.input_number_of_channels = + (frame_data_.input_number_of_channels == 1 ? 2 : 1); + if (frame_data_.input_sample_rate_hz == 8000) + frame_data_.input_sample_rate_hz = 16000; + else if (frame_data_.input_sample_rate_hz == 16000) + frame_data_.input_sample_rate_hz = 32000; + else if (frame_data_.input_sample_rate_hz == 32000) + frame_data_.input_sample_rate_hz = 48000; + else if (frame_data_.input_sample_rate_hz == 48000) + frame_data_.input_sample_rate_hz = 8000; + + frame_data_.output_number_of_channels = + (frame_data_.output_number_of_channels == 1 ? 2 : 1); + if (frame_data_.output_sample_rate_hz == 8000) + frame_data_.output_sample_rate_hz = 16000; + else if (frame_data_.output_sample_rate_hz == 16000) + frame_data_.output_sample_rate_hz = 32000; + else if (frame_data_.output_sample_rate_hz == 32000) + frame_data_.output_sample_rate_hz = 48000; + else if (frame_data_.output_sample_rate_hz == 48000) + frame_data_.output_sample_rate_hz = 8000; + } + break; + case RuntimeParameterSettingScheme::FixedMonoStreamMetadataScheme: + if (render_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 1; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 1; + } + break; + case RuntimeParameterSettingScheme::FixedStereoStreamMetadataScheme: + if (render_count_local == 0) { + frame_data_.input_sample_rate_hz = 16000; + frame_data_.input_number_of_channels = 2; + frame_data_.output_sample_rate_hz = 16000; + frame_data_.output_number_of_channels = 2; + } + break; + default: + FAIL(); + } + + // Restric the number of output channels not to exceed + // the number of input channels. + frame_data_.output_number_of_channels = + std::min(frame_data_.output_number_of_channels, + frame_data_.input_number_of_channels); +} + +} // anonymous namespace + +TEST_P(AudioProcessingImplLockTest, LockTest) { + // Run test and verify that it did not time out. + ASSERT_TRUE(RunTest()); +} + +// Instantiate tests from the extreme test configuration set. +INSTANTIATE_TEST_CASE_P( + DISABLED_AudioProcessingImplLockExtensive, + AudioProcessingImplLockTest, + ::testing::ValuesIn(TestConfig::GenerateExtensiveTestConfigs())); + +INSTANTIATE_TEST_CASE_P( + AudioProcessingImplLockBrief, + AudioProcessingImplLockTest, + ::testing::ValuesIn(TestConfig::GenerateBriefTestConfigs())); + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc b/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc index f4c36d0009..ed20daaa61 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl_unittest.cc @@ -14,7 +14,7 @@ #include "testing/gtest/include/gtest/gtest.h" #include "webrtc/config.h" #include "webrtc/modules/audio_processing/test/test_utils.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" using ::testing::Invoke; using ::testing::Return; diff --git a/webrtc/modules/audio_processing/audio_processing_performance_unittest.cc b/webrtc/modules/audio_processing/audio_processing_performance_unittest.cc new file mode 100644 index 0000000000..0c8c060ea3 --- /dev/null +++ b/webrtc/modules/audio_processing/audio_processing_performance_unittest.cc @@ -0,0 +1,724 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "webrtc/modules/audio_processing/audio_processing_impl.h" + +#include <math.h> + +#include <algorithm> +#include <vector> + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/base/array_view.h" +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/platform_thread.h" +#include "webrtc/base/random.h" +#include "webrtc/base/safe_conversions.h" +#include "webrtc/config.h" +#include "webrtc/modules/audio_processing/test/test_utils.h" +#include "webrtc/modules/include/module_common_types.h" +#include "webrtc/system_wrappers/include/clock.h" +#include "webrtc/system_wrappers/include/event_wrapper.h" +#include "webrtc/system_wrappers/include/sleep.h" +#include "webrtc/test/testsupport/perf_test.h" + +namespace webrtc { + +namespace { + +static const bool kPrintAllDurations = false; + +class CallSimulator; + +// Type of the render thread APM API call to use in the test. +enum class ProcessorType { kRender, kCapture }; + +// Variant of APM processing settings to use in the test. +enum class SettingsType { + kDefaultApmDesktop, + kDefaultApmMobile, + kDefaultApmDesktopAndBeamformer, + kDefaultApmDesktopAndIntelligibilityEnhancer, + kAllSubmodulesTurnedOff, + kDefaultDesktopApmWithoutDelayAgnostic, + kDefaultDesktopApmWithoutExtendedFilter +}; + +// Variables related to the audio data and formats. +struct AudioFrameData { + explicit AudioFrameData(size_t max_frame_size) { + // Set up the two-dimensional arrays needed for the APM API calls. + input_framechannels.resize(2 * max_frame_size); + input_frame.resize(2); + input_frame[0] = &input_framechannels[0]; + input_frame[1] = &input_framechannels[max_frame_size]; + + output_frame_channels.resize(2 * max_frame_size); + output_frame.resize(2); + output_frame[0] = &output_frame_channels[0]; + output_frame[1] = &output_frame_channels[max_frame_size]; + } + + std::vector<float> output_frame_channels; + std::vector<float*> output_frame; + std::vector<float> input_framechannels; + std::vector<float*> input_frame; + StreamConfig input_stream_config; + StreamConfig output_stream_config; +}; + +// The configuration for the test. +struct SimulationConfig { + SimulationConfig(int sample_rate_hz, SettingsType simulation_settings) + : sample_rate_hz(sample_rate_hz), + simulation_settings(simulation_settings) {} + + static std::vector<SimulationConfig> GenerateSimulationConfigs() { + std::vector<SimulationConfig> simulation_configs; +#ifndef WEBRTC_ANDROID + const SettingsType desktop_settings[] = { + SettingsType::kDefaultApmDesktop, SettingsType::kAllSubmodulesTurnedOff, + SettingsType::kDefaultDesktopApmWithoutDelayAgnostic, + SettingsType::kDefaultDesktopApmWithoutExtendedFilter}; + + const int desktop_sample_rates[] = {8000, 16000, 32000, 48000}; + + for (auto sample_rate : desktop_sample_rates) { + for (auto settings : desktop_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } + + const SettingsType intelligibility_enhancer_settings[] = { + SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer}; + + const int intelligibility_enhancer_sample_rates[] = {8000, 16000, 32000, + 48000}; + + for (auto sample_rate : intelligibility_enhancer_sample_rates) { + for (auto settings : intelligibility_enhancer_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } + + const SettingsType beamformer_settings[] = { + SettingsType::kDefaultApmDesktopAndBeamformer}; + + const int beamformer_sample_rates[] = {8000, 16000, 32000, 48000}; + + for (auto sample_rate : beamformer_sample_rates) { + for (auto settings : beamformer_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } +#endif + + const SettingsType mobile_settings[] = {SettingsType::kDefaultApmMobile}; + + const int mobile_sample_rates[] = {8000, 16000}; + + for (auto sample_rate : mobile_sample_rates) { + for (auto settings : mobile_settings) { + simulation_configs.push_back(SimulationConfig(sample_rate, settings)); + } + } + + return simulation_configs; + } + + std::string SettingsDescription() const { + std::string description; + switch (simulation_settings) { + case SettingsType::kDefaultApmMobile: + description = "DefaultApmMobile"; + break; + case SettingsType::kDefaultApmDesktop: + description = "DefaultApmDesktop"; + break; + case SettingsType::kDefaultApmDesktopAndBeamformer: + description = "DefaultApmDesktopAndBeamformer"; + break; + case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer: + description = "DefaultApmDesktopAndIntelligibilityEnhancer"; + break; + case SettingsType::kAllSubmodulesTurnedOff: + description = "AllSubmodulesOff"; + break; + case SettingsType::kDefaultDesktopApmWithoutDelayAgnostic: + description = "DefaultDesktopApmWithoutDelayAgnostic"; + break; + case SettingsType::kDefaultDesktopApmWithoutExtendedFilter: + description = "DefaultDesktopApmWithoutExtendedFilter"; + break; + } + return description; + } + + int sample_rate_hz = 16000; + SettingsType simulation_settings = SettingsType::kDefaultApmDesktop; +}; + +// Handler for the frame counters. +class FrameCounters { + public: + void IncreaseRenderCounter() { + rtc::CritScope cs(&crit_); + render_count_++; + } + + void IncreaseCaptureCounter() { + rtc::CritScope cs(&crit_); + capture_count_++; + } + + int GetCaptureCounter() const { + rtc::CritScope cs(&crit_); + return capture_count_; + } + + int GetRenderCounter() const { + rtc::CritScope cs(&crit_); + return render_count_; + } + + int CaptureMinusRenderCounters() const { + rtc::CritScope cs(&crit_); + return capture_count_ - render_count_; + } + + int RenderMinusCaptureCounters() const { + return -CaptureMinusRenderCounters(); + } + + bool BothCountersExceedeThreshold(int threshold) const { + rtc::CritScope cs(&crit_); + return (render_count_ > threshold && capture_count_ > threshold); + } + + private: + mutable rtc::CriticalSection crit_; + int render_count_ GUARDED_BY(crit_) = 0; + int capture_count_ GUARDED_BY(crit_) = 0; +}; + +// Class that protects a flag using a lock. +class LockedFlag { + public: + bool get_flag() const { + rtc::CritScope cs(&crit_); + return flag_; + } + + void set_flag() { + rtc::CritScope cs(&crit_); + flag_ = true; + } + + private: + mutable rtc::CriticalSection crit_; + bool flag_ GUARDED_BY(crit_) = false; +}; + +// Parent class for the thread processors. +class TimedThreadApiProcessor { + public: + TimedThreadApiProcessor(ProcessorType processor_type, + Random* rand_gen, + FrameCounters* shared_counters_state, + LockedFlag* capture_call_checker, + CallSimulator* test_framework, + const SimulationConfig* simulation_config, + AudioProcessing* apm, + int num_durations_to_store, + float input_level, + int num_channels) + : rand_gen_(rand_gen), + frame_counters_(shared_counters_state), + capture_call_checker_(capture_call_checker), + test_(test_framework), + simulation_config_(simulation_config), + apm_(apm), + frame_data_(kMaxFrameSize), + clock_(webrtc::Clock::GetRealTimeClock()), + num_durations_to_store_(num_durations_to_store), + input_level_(input_level), + processor_type_(processor_type), + num_channels_(num_channels) { + api_call_durations_.reserve(num_durations_to_store_); + } + + // Implements the callback functionality for the threads. + bool Process(); + + // Method for printing out the simulation statistics. + void print_processor_statistics(std::string processor_name) const { + const std::string modifier = "_api_call_duration"; + + // Lambda function for creating a test printout string. + auto create_mean_and_std_string = [](int64_t average, + int64_t standard_dev) { + std::string s = std::to_string(average); + s += ", "; + s += std::to_string(standard_dev); + return s; + }; + + const std::string sample_rate_name = + "_" + std::to_string(simulation_config_->sample_rate_hz) + "Hz"; + + webrtc::test::PrintResultMeanAndError( + "apm_timing", sample_rate_name, processor_name, + create_mean_and_std_string(GetDurationAverage(), + GetDurationStandardDeviation()), + "us", false); + + if (kPrintAllDurations) { + std::string value_string = ""; + for (int64_t duration : api_call_durations_) { + value_string += std::to_string(duration) + ","; + } + webrtc::test::PrintResultList("apm_call_durations", sample_rate_name, + processor_name, value_string, "us", false); + } + } + + void AddDuration(int64_t duration) { + if (api_call_durations_.size() < num_durations_to_store_) { + api_call_durations_.push_back(duration); + } + } + + private: + static const int kMaxCallDifference = 10; + static const int kMaxFrameSize = 480; + static const int kNumInitializationFrames = 5; + + int64_t GetDurationStandardDeviation() const { + double variance = 0; + const int64_t average_duration = GetDurationAverage(); + for (size_t k = kNumInitializationFrames; k < api_call_durations_.size(); + k++) { + int64_t tmp = api_call_durations_[k] - average_duration; + variance += static_cast<double>(tmp * tmp); + } + const int denominator = rtc::checked_cast<int>(api_call_durations_.size()) - + kNumInitializationFrames; + return (denominator > 0 + ? rtc::checked_cast<int64_t>(sqrt(variance / denominator)) + : -1); + } + + int64_t GetDurationAverage() const { + int64_t average_duration = 0; + for (size_t k = kNumInitializationFrames; k < api_call_durations_.size(); + k++) { + average_duration += api_call_durations_[k]; + } + const int denominator = rtc::checked_cast<int>(api_call_durations_.size()) - + kNumInitializationFrames; + return (denominator > 0 ? average_duration / denominator : -1); + } + + int ProcessCapture() { + // Set the stream delay. + apm_->set_stream_delay_ms(30); + + // Call and time the specified capture side API processing method. + const int64_t start_time = clock_->TimeInMicroseconds(); + const int result = apm_->ProcessStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + const int64_t end_time = clock_->TimeInMicroseconds(); + + frame_counters_->IncreaseCaptureCounter(); + + AddDuration(end_time - start_time); + + if (first_process_call_) { + // Flag that the capture side has been called at least once + // (needed to ensure that a capture call has been done + // before the first render call is performed (implicitly + // required by the APM API). + capture_call_checker_->set_flag(); + first_process_call_ = false; + } + return result; + } + + bool ReadyToProcessCapture() { + return (frame_counters_->CaptureMinusRenderCounters() <= + kMaxCallDifference); + } + + int ProcessRender() { + // Call and time the specified render side API processing method. + const int64_t start_time = clock_->TimeInMicroseconds(); + const int result = apm_->ProcessReverseStream( + &frame_data_.input_frame[0], frame_data_.input_stream_config, + frame_data_.output_stream_config, &frame_data_.output_frame[0]); + const int64_t end_time = clock_->TimeInMicroseconds(); + frame_counters_->IncreaseRenderCounter(); + + AddDuration(end_time - start_time); + + return result; + } + + bool ReadyToProcessRender() { + // Do not process until at least one capture call has been done. + // (implicitly required by the APM API). + if (first_process_call_ && !capture_call_checker_->get_flag()) { + return false; + } + + // Ensure that the number of render and capture calls do not differ too + // much. + if (frame_counters_->RenderMinusCaptureCounters() > kMaxCallDifference) { + return false; + } + + first_process_call_ = false; + return true; + } + + void PrepareFrame() { + // Lambda function for populating a float multichannel audio frame + // with random data. + auto populate_audio_frame = [](float amplitude, size_t num_channels, + size_t samples_per_channel, Random* rand_gen, + float** frame) { + for (size_t ch = 0; ch < num_channels; ch++) { + for (size_t k = 0; k < samples_per_channel; k++) { + // Store random float number with a value between +-amplitude. + frame[ch][k] = amplitude * (2 * rand_gen->Rand<float>() - 1); + } + } + }; + + // Prepare the audio input data and metadata. + frame_data_.input_stream_config.set_sample_rate_hz( + simulation_config_->sample_rate_hz); + frame_data_.input_stream_config.set_num_channels(num_channels_); + frame_data_.input_stream_config.set_has_keyboard(false); + populate_audio_frame(input_level_, num_channels_, + (simulation_config_->sample_rate_hz * + AudioProcessing::kChunkSizeMs / 1000), + rand_gen_, &frame_data_.input_frame[0]); + + // Prepare the float audio output data and metadata. + frame_data_.output_stream_config.set_sample_rate_hz( + simulation_config_->sample_rate_hz); + frame_data_.output_stream_config.set_num_channels(1); + frame_data_.output_stream_config.set_has_keyboard(false); + } + + bool ReadyToProcess() { + switch (processor_type_) { + case ProcessorType::kRender: + return ReadyToProcessRender(); + break; + case ProcessorType::kCapture: + return ReadyToProcessCapture(); + break; + } + + // Should not be reached, but the return statement is needed for the code to + // build successfully on Android. + RTC_NOTREACHED(); + return false; + } + + Random* rand_gen_ = nullptr; + FrameCounters* frame_counters_ = nullptr; + LockedFlag* capture_call_checker_ = nullptr; + CallSimulator* test_ = nullptr; + const SimulationConfig* const simulation_config_ = nullptr; + AudioProcessing* apm_ = nullptr; + AudioFrameData frame_data_; + webrtc::Clock* clock_; + const size_t num_durations_to_store_; + std::vector<int64_t> api_call_durations_; + const float input_level_; + bool first_process_call_ = true; + const ProcessorType processor_type_; + const int num_channels_ = 1; +}; + +// Class for managing the test simulation. +class CallSimulator : public ::testing::TestWithParam<SimulationConfig> { + public: + CallSimulator() + : test_complete_(EventWrapper::Create()), + render_thread_( + new rtc::PlatformThread(RenderProcessorThreadFunc, this, "render")), + capture_thread_(new rtc::PlatformThread(CaptureProcessorThreadFunc, + this, + "capture")), + rand_gen_(42U), + simulation_config_(static_cast<SimulationConfig>(GetParam())) {} + + // Run the call simulation with a timeout. + EventTypeWrapper Run() { + StartThreads(); + + EventTypeWrapper result = test_complete_->Wait(kTestTimeout); + + StopThreads(); + + render_thread_state_->print_processor_statistics( + simulation_config_.SettingsDescription() + "_render"); + capture_thread_state_->print_processor_statistics( + simulation_config_.SettingsDescription() + "_capture"); + + return result; + } + + // Tests whether all the required render and capture side calls have been + // done. + bool MaybeEndTest() { + if (frame_counters_.BothCountersExceedeThreshold(kMinNumFramesToProcess)) { + test_complete_->Set(); + return true; + } + return false; + } + + private: + static const float kCaptureInputFloatLevel; + static const float kRenderInputFloatLevel; + static const int kMinNumFramesToProcess = 150; + static const int32_t kTestTimeout = 3 * 10 * kMinNumFramesToProcess; + + // ::testing::TestWithParam<> implementation. + void TearDown() override { StopThreads(); } + + // Stop all running threads. + void StopThreads() { + render_thread_->Stop(); + capture_thread_->Stop(); + } + + // Simulator and APM setup. + void SetUp() override { + // Lambda function for setting the default APM runtime settings for desktop. + auto set_default_desktop_apm_runtime_settings = [](AudioProcessing* apm) { + ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->enable_metrics(true)); + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->enable_delay_logging(true)); + }; + + // Lambda function for setting the default APM runtime settings for mobile. + auto set_default_mobile_apm_runtime_settings = [](AudioProcessing* apm) { + ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true)); + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(false)); + }; + + // Lambda function for turning off all of the APM runtime settings + // submodules. + auto turn_off_default_apm_runtime_settings = [](AudioProcessing* apm) { + ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(false)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(false)); + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->enable_metrics(false)); + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->enable_delay_logging(false)); + }; + + // Lambda function for adding default desktop APM settings to a config. + auto add_default_desktop_config = [](Config* config) { + config->Set<ExtendedFilter>(new ExtendedFilter(true)); + config->Set<DelayAgnostic>(new DelayAgnostic(true)); + }; + + // Lambda function for adding beamformer settings to a config. + auto add_beamformer_config = [](Config* config) { + const size_t num_mics = 2; + const std::vector<Point> array_geometry = + ParseArrayGeometry("0 0 0 0.05 0 0", num_mics); + RTC_CHECK_EQ(array_geometry.size(), num_mics); + + config->Set<Beamforming>( + new Beamforming(true, array_geometry, + SphericalPointf(DegreesToRadians(90), 0.f, 1.f))); + }; + + int num_capture_channels = 1; + switch (simulation_config_.simulation_settings) { + case SettingsType::kDefaultApmMobile: { + apm_.reset(AudioProcessingImpl::Create()); + ASSERT_TRUE(!!apm_); + set_default_mobile_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kDefaultApmDesktop: { + Config config; + add_default_desktop_config(&config); + apm_.reset(AudioProcessingImpl::Create(config)); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + apm_->SetExtraOptions(config); + break; + } + case SettingsType::kDefaultApmDesktopAndBeamformer: { + Config config; + add_beamformer_config(&config); + add_default_desktop_config(&config); + apm_.reset(AudioProcessingImpl::Create(config)); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + apm_->SetExtraOptions(config); + num_capture_channels = 2; + break; + } + case SettingsType::kDefaultApmDesktopAndIntelligibilityEnhancer: { + Config config; + config.Set<Intelligibility>(new Intelligibility(true)); + add_default_desktop_config(&config); + apm_.reset(AudioProcessingImpl::Create(config)); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + apm_->SetExtraOptions(config); + break; + } + case SettingsType::kAllSubmodulesTurnedOff: { + apm_.reset(AudioProcessingImpl::Create()); + ASSERT_TRUE(!!apm_); + turn_off_default_apm_runtime_settings(apm_.get()); + break; + } + case SettingsType::kDefaultDesktopApmWithoutDelayAgnostic: { + Config config; + config.Set<ExtendedFilter>(new ExtendedFilter(true)); + config.Set<DelayAgnostic>(new DelayAgnostic(false)); + apm_.reset(AudioProcessingImpl::Create(config)); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + apm_->SetExtraOptions(config); + break; + } + case SettingsType::kDefaultDesktopApmWithoutExtendedFilter: { + Config config; + config.Set<ExtendedFilter>(new ExtendedFilter(false)); + config.Set<DelayAgnostic>(new DelayAgnostic(true)); + apm_.reset(AudioProcessingImpl::Create(config)); + ASSERT_TRUE(!!apm_); + set_default_desktop_apm_runtime_settings(apm_.get()); + apm_->SetExtraOptions(config); + break; + } + } + + render_thread_state_.reset(new TimedThreadApiProcessor( + ProcessorType::kRender, &rand_gen_, &frame_counters_, + &capture_call_checker_, this, &simulation_config_, apm_.get(), + kMinNumFramesToProcess, kRenderInputFloatLevel, 1)); + capture_thread_state_.reset(new TimedThreadApiProcessor( + ProcessorType::kCapture, &rand_gen_, &frame_counters_, + &capture_call_checker_, this, &simulation_config_, apm_.get(), + kMinNumFramesToProcess, kCaptureInputFloatLevel, num_capture_channels)); + } + + // Thread callback for the render thread. + static bool RenderProcessorThreadFunc(void* context) { + return reinterpret_cast<CallSimulator*>(context) + ->render_thread_state_->Process(); + } + + // Thread callback for the capture thread. + static bool CaptureProcessorThreadFunc(void* context) { + return reinterpret_cast<CallSimulator*>(context) + ->capture_thread_state_->Process(); + } + + // Start the threads used in the test. + void StartThreads() { + ASSERT_NO_FATAL_FAILURE(render_thread_->Start()); + render_thread_->SetPriority(rtc::kRealtimePriority); + ASSERT_NO_FATAL_FAILURE(capture_thread_->Start()); + capture_thread_->SetPriority(rtc::kRealtimePriority); + } + + // Event handler for the test. + const rtc::scoped_ptr<EventWrapper> test_complete_; + + // Thread related variables. + rtc::scoped_ptr<rtc::PlatformThread> render_thread_; + rtc::scoped_ptr<rtc::PlatformThread> capture_thread_; + Random rand_gen_; + + rtc::scoped_ptr<AudioProcessing> apm_; + const SimulationConfig simulation_config_; + FrameCounters frame_counters_; + LockedFlag capture_call_checker_; + rtc::scoped_ptr<TimedThreadApiProcessor> render_thread_state_; + rtc::scoped_ptr<TimedThreadApiProcessor> capture_thread_state_; +}; + +// Implements the callback functionality for the threads. +bool TimedThreadApiProcessor::Process() { + PrepareFrame(); + + // Wait in a spinlock manner until it is ok to start processing. + // Note that SleepMs is not applicable since it only allows sleeping + // on a millisecond basis which is too long. + while (!ReadyToProcess()) { + } + + int result = AudioProcessing::kNoError; + switch (processor_type_) { + case ProcessorType::kRender: + result = ProcessRender(); + break; + case ProcessorType::kCapture: + result = ProcessCapture(); + break; + } + + EXPECT_EQ(result, AudioProcessing::kNoError); + + return !test_->MaybeEndTest(); +} + +const float CallSimulator::kRenderInputFloatLevel = 0.5f; +const float CallSimulator::kCaptureInputFloatLevel = 0.03125f; +} // anonymous namespace + +TEST_P(CallSimulator, ApiCallDurationTest) { + // Run test and verify that it did not time out. + EXPECT_EQ(kEventSignaled, Run()); +} + +INSTANTIATE_TEST_CASE_P( + AudioProcessingPerformanceTest, + CallSimulator, + ::testing::ValuesIn(SimulationConfig::GenerateSimulationConfigs())); + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/audio_processing_tests.gypi b/webrtc/modules/audio_processing/audio_processing_tests.gypi index 0314c69b04..523602baba 100644 --- a/webrtc/modules/audio_processing/audio_processing_tests.gypi +++ b/webrtc/modules/audio_processing/audio_processing_tests.gypi @@ -128,7 +128,11 @@ '<(webrtc_root)/test/test.gyp:test_support', '<(DEPTH)/third_party/gflags/gflags.gyp:gflags', ], - 'sources': [ 'test/audioproc_float.cc', ], + 'sources': [ + 'test/audio_file_processor.cc', + 'test/audio_file_processor.h', + 'test/audioproc_float.cc', + ], }, { 'target_name': 'unpack_aecdump', diff --git a/webrtc/modules/audio_processing/beamformer/array_util.cc b/webrtc/modules/audio_processing/beamformer/array_util.cc index 8aaeee9f59..6b1c474269 100644 --- a/webrtc/modules/audio_processing/beamformer/array_util.cc +++ b/webrtc/modules/audio_processing/beamformer/array_util.cc @@ -56,7 +56,7 @@ bool ArePerpendicular(const Point& a, const Point& b) { return std::abs(DotProduct(a, b)) < kMaxDotProduct; } -rtc::Maybe<Point> GetDirectionIfLinear( +rtc::Optional<Point> GetDirectionIfLinear( const std::vector<Point>& array_geometry) { RTC_DCHECK_GT(array_geometry.size(), 1u); const Point first_pair_direction = @@ -65,13 +65,14 @@ rtc::Maybe<Point> GetDirectionIfLinear( const Point pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]); if (!AreParallel(first_pair_direction, pair_direction)) { - return rtc::Maybe<Point>(); + return rtc::Optional<Point>(); } } - return first_pair_direction; + return rtc::Optional<Point>(first_pair_direction); } -rtc::Maybe<Point> GetNormalIfPlanar(const std::vector<Point>& array_geometry) { +rtc::Optional<Point> GetNormalIfPlanar( + const std::vector<Point>& array_geometry) { RTC_DCHECK_GT(array_geometry.size(), 1u); const Point first_pair_direction = PairDirection(array_geometry[0], array_geometry[1]); @@ -85,30 +86,30 @@ rtc::Maybe<Point> GetNormalIfPlanar(const std::vector<Point>& array_geometry) { } } if (is_linear) { - return rtc::Maybe<Point>(); + return rtc::Optional<Point>(); } const Point normal_direction = CrossProduct(first_pair_direction, pair_direction); for (; i < array_geometry.size(); ++i) { pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]); if (!ArePerpendicular(normal_direction, pair_direction)) { - return rtc::Maybe<Point>(); + return rtc::Optional<Point>(); } } - return normal_direction; + return rtc::Optional<Point>(normal_direction); } -rtc::Maybe<Point> GetArrayNormalIfExists( +rtc::Optional<Point> GetArrayNormalIfExists( const std::vector<Point>& array_geometry) { - const rtc::Maybe<Point> direction = GetDirectionIfLinear(array_geometry); + const rtc::Optional<Point> direction = GetDirectionIfLinear(array_geometry); if (direction) { - return Point(direction->y(), -direction->x(), 0.f); + return rtc::Optional<Point>(Point(direction->y(), -direction->x(), 0.f)); } - const rtc::Maybe<Point> normal = GetNormalIfPlanar(array_geometry); + const rtc::Optional<Point> normal = GetNormalIfPlanar(array_geometry); if (normal && normal->z() < kMaxDotProduct) { return normal; } - return rtc::Maybe<Point>(); + return rtc::Optional<Point>(); } Point AzimuthToPoint(float azimuth) { diff --git a/webrtc/modules/audio_processing/beamformer/array_util.h b/webrtc/modules/audio_processing/beamformer/array_util.h index 7fff9735a1..f86ad5dee6 100644 --- a/webrtc/modules/audio_processing/beamformer/array_util.h +++ b/webrtc/modules/audio_processing/beamformer/array_util.h @@ -14,7 +14,7 @@ #include <cmath> #include <vector> -#include "webrtc/base/maybe.h" +#include "webrtc/base/optional.h" namespace webrtc { @@ -59,15 +59,16 @@ float GetMinimumSpacing(const std::vector<Point>& array_geometry); // If the given array geometry is linear it returns the direction without // normalizing. -rtc::Maybe<Point> GetDirectionIfLinear( +rtc::Optional<Point> GetDirectionIfLinear( const std::vector<Point>& array_geometry); // If the given array geometry is planar it returns the normal without // normalizing. -rtc::Maybe<Point> GetNormalIfPlanar(const std::vector<Point>& array_geometry); +rtc::Optional<Point> GetNormalIfPlanar( + const std::vector<Point>& array_geometry); // Returns the normal of an array if it has one and it is in the xy-plane. -rtc::Maybe<Point> GetArrayNormalIfExists( +rtc::Optional<Point> GetArrayNormalIfExists( const std::vector<Point>& array_geometry); // The resulting Point will be in the xy-plane. diff --git a/webrtc/modules/audio_processing/beamformer/complex_matrix.h b/webrtc/modules/audio_processing/beamformer/complex_matrix.h index bfa3563b89..707c51564b 100644 --- a/webrtc/modules/audio_processing/beamformer/complex_matrix.h +++ b/webrtc/modules/audio_processing/beamformer/complex_matrix.h @@ -27,10 +27,10 @@ class ComplexMatrix : public Matrix<complex<T> > { public: ComplexMatrix() : Matrix<complex<T> >() {} - ComplexMatrix(int num_rows, int num_columns) + ComplexMatrix(size_t num_rows, size_t num_columns) : Matrix<complex<T> >(num_rows, num_columns) {} - ComplexMatrix(const complex<T>* data, int num_rows, int num_columns) + ComplexMatrix(const complex<T>* data, size_t num_rows, size_t num_columns) : Matrix<complex<T> >(data, num_rows, num_columns) {} // Complex Matrix operations. @@ -51,7 +51,7 @@ class ComplexMatrix : public Matrix<complex<T> > { ComplexMatrix& ConjugateTranspose() { this->CopyDataToScratch(); - int num_rows = this->num_rows(); + size_t num_rows = this->num_rows(); this->SetNumRows(this->num_columns()); this->SetNumColumns(num_rows); this->Resize(); @@ -82,8 +82,8 @@ class ComplexMatrix : public Matrix<complex<T> > { private: ComplexMatrix& ConjugateTranspose(const complex<T>* const* src) { complex<T>* const* elements = this->elements(); - for (int i = 0; i < this->num_rows(); ++i) { - for (int j = 0; j < this->num_columns(); ++j) { + for (size_t i = 0; i < this->num_rows(); ++i) { + for (size_t j = 0; j < this->num_columns(); ++j) { elements[i][j] = conj(src[j][i]); } } diff --git a/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc b/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc index d0728325fc..78f4df5ca9 100644 --- a/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc +++ b/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc @@ -27,7 +27,7 @@ float BesselJ0(float x) { // Calculates the Euclidean norm for a row vector. float Norm(const ComplexMatrix<float>& x) { - RTC_CHECK_EQ(1, x.num_rows()); + RTC_CHECK_EQ(1u, x.num_rows()); const size_t length = x.num_columns(); const complex<float>* elems = x.elements()[0]; float result = 0.f; @@ -43,8 +43,8 @@ void CovarianceMatrixGenerator::UniformCovarianceMatrix( float wave_number, const std::vector<Point>& geometry, ComplexMatrix<float>* mat) { - RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_rows()); - RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns()); + RTC_CHECK_EQ(geometry.size(), mat->num_rows()); + RTC_CHECK_EQ(geometry.size(), mat->num_columns()); complex<float>* const* mat_els = mat->elements(); for (size_t i = 0; i < geometry.size(); ++i) { @@ -68,8 +68,8 @@ void CovarianceMatrixGenerator::AngledCovarianceMatrix( int sample_rate, const std::vector<Point>& geometry, ComplexMatrix<float>* mat) { - RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_rows()); - RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns()); + RTC_CHECK_EQ(geometry.size(), mat->num_rows()); + RTC_CHECK_EQ(geometry.size(), mat->num_columns()); ComplexMatrix<float> interf_cov_vector(1, geometry.size()); ComplexMatrix<float> interf_cov_vector_transposed(geometry.size(), 1); @@ -94,8 +94,8 @@ void CovarianceMatrixGenerator::PhaseAlignmentMasks( const std::vector<Point>& geometry, float angle, ComplexMatrix<float>* mat) { - RTC_CHECK_EQ(1, mat->num_rows()); - RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns()); + RTC_CHECK_EQ(1u, mat->num_rows()); + RTC_CHECK_EQ(geometry.size(), mat->num_columns()); float freq_in_hertz = (static_cast<float>(frequency_bin) / fft_size) * sample_rate; diff --git a/webrtc/modules/audio_processing/beamformer/matrix.h b/webrtc/modules/audio_processing/beamformer/matrix.h index 162aef1dac..51c1cece97 100644 --- a/webrtc/modules/audio_processing/beamformer/matrix.h +++ b/webrtc/modules/audio_processing/beamformer/matrix.h @@ -67,7 +67,7 @@ class Matrix { Matrix() : num_rows_(0), num_columns_(0) {} // Allocates space for the elements and initializes all values to zero. - Matrix(int num_rows, int num_columns) + Matrix(size_t num_rows, size_t num_columns) : num_rows_(num_rows), num_columns_(num_columns) { Resize(); scratch_data_.resize(num_rows_ * num_columns_); @@ -75,7 +75,7 @@ class Matrix { } // Copies |data| into the new Matrix. - Matrix(const T* data, int num_rows, int num_columns) + Matrix(const T* data, size_t num_rows, size_t num_columns) : num_rows_(0), num_columns_(0) { CopyFrom(data, num_rows, num_columns); scratch_data_.resize(num_rows_ * num_columns_); @@ -90,23 +90,23 @@ class Matrix { } // Copy |data| into the Matrix. The current data is lost. - void CopyFrom(const T* const data, int num_rows, int num_columns) { + void CopyFrom(const T* const data, size_t num_rows, size_t num_columns) { Resize(num_rows, num_columns); memcpy(&data_[0], data, num_rows_ * num_columns_ * sizeof(data_[0])); } Matrix& CopyFromColumn(const T* const* src, size_t column_index, - int num_rows) { + size_t num_rows) { Resize(1, num_rows); - for (int i = 0; i < num_columns_; ++i) { + for (size_t i = 0; i < num_columns_; ++i) { data_[i] = src[i][column_index]; } return *this; } - void Resize(int num_rows, int num_columns) { + void Resize(size_t num_rows, size_t num_columns) { if (num_rows != num_rows_ || num_columns != num_columns_) { num_rows_ = num_rows; num_columns_ = num_columns; @@ -115,8 +115,8 @@ class Matrix { } // Accessors and mutators. - int num_rows() const { return num_rows_; } - int num_columns() const { return num_columns_; } + size_t num_rows() const { return num_rows_; } + size_t num_columns() const { return num_columns_; } T* const* elements() { return &elements_[0]; } const T* const* elements() const { return &elements_[0]; } @@ -124,7 +124,7 @@ class Matrix { RTC_CHECK_EQ(num_rows_, num_columns_); T trace = 0; - for (int i = 0; i < num_rows_; ++i) { + for (size_t i = 0; i < num_rows_; ++i) { trace += elements_[i][i]; } return trace; @@ -282,8 +282,8 @@ class Matrix { std::ostringstream ss; ss << std::endl << "Matrix" << std::endl; - for (int i = 0; i < num_rows_; ++i) { - for (int j = 0; j < num_columns_; ++j) { + for (size_t i = 0; i < num_rows_; ++i) { + for (size_t j = 0; j < num_columns_; ++j) { ss << elements_[i][j] << " "; } ss << std::endl; @@ -294,8 +294,8 @@ class Matrix { } protected: - void SetNumRows(const int num_rows) { num_rows_ = num_rows; } - void SetNumColumns(const int num_columns) { num_columns_ = num_columns; } + void SetNumRows(const size_t num_rows) { num_rows_ = num_rows; } + void SetNumColumns(const size_t num_columns) { num_columns_ = num_columns; } T* data() { return &data_[0]; } const T* data() const { return &data_[0]; } const T* const* scratch_elements() const { return &scratch_elements_[0]; } @@ -307,7 +307,7 @@ class Matrix { data_.resize(size); elements_.resize(num_rows_); - for (int i = 0; i < num_rows_; ++i) { + for (size_t i = 0; i < num_rows_; ++i) { elements_[i] = &data_[i * num_columns_]; } } @@ -317,14 +317,14 @@ class Matrix { scratch_data_ = data_; scratch_elements_.resize(num_rows_); - for (int i = 0; i < num_rows_; ++i) { + for (size_t i = 0; i < num_rows_; ++i) { scratch_elements_[i] = &scratch_data_[i * num_columns_]; } } private: - int num_rows_; - int num_columns_; + size_t num_rows_; + size_t num_columns_; std::vector<T> data_; std::vector<T*> elements_; @@ -336,8 +336,8 @@ class Matrix { // Helpers for Transpose and Multiply operations that unify in-place and // out-of-place solutions. Matrix& Transpose(const T* const* src) { - for (int i = 0; i < num_rows_; ++i) { - for (int j = 0; j < num_columns_; ++j) { + for (size_t i = 0; i < num_rows_; ++i) { + for (size_t j = 0; j < num_columns_; ++j) { elements_[i][j] = src[j][i]; } } @@ -345,11 +345,13 @@ class Matrix { return *this; } - Matrix& Multiply(const T* const* lhs, int num_rows_rhs, const T* const* rhs) { - for (int row = 0; row < num_rows_; ++row) { - for (int col = 0; col < num_columns_; ++col) { + Matrix& Multiply(const T* const* lhs, + size_t num_rows_rhs, + const T* const* rhs) { + for (size_t row = 0; row < num_rows_; ++row) { + for (size_t col = 0; col < num_columns_; ++col) { T cur_element = 0; - for (int i = 0; i < num_rows_rhs; ++i) { + for (size_t i = 0; i < num_rows_rhs; ++i) { cur_element += lhs[row][i] * rhs[i][col]; } diff --git a/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h b/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h index 7c58670068..9891a8220c 100644 --- a/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h +++ b/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h @@ -34,8 +34,8 @@ class MatrixTestHelpers { const T* const* expected_elements = expected.elements(); const T* const* actual_elements = actual.elements(); - for (int i = 0; i < expected.num_rows(); ++i) { - for (int j = 0; j < expected.num_columns(); ++j) { + for (size_t i = 0; i < expected.num_rows(); ++i) { + for (size_t j = 0; j < expected.num_columns(); ++j) { EXPECT_EQ(expected_elements[i][j], actual_elements[i][j]); } } @@ -48,8 +48,8 @@ class MatrixTestHelpers { const float* const* expected_elements = expected.elements(); const float* const* actual_elements = actual.elements(); - for (int i = 0; i < expected.num_rows(); ++i) { - for (int j = 0; j < expected.num_columns(); ++j) { + for (size_t i = 0; i < expected.num_rows(); ++i) { + for (size_t j = 0; j < expected.num_columns(); ++j) { EXPECT_NEAR(expected_elements[i][j], actual_elements[i][j], kTolerance); } } @@ -63,8 +63,8 @@ class MatrixTestHelpers { const complex<float>* const* expected_elements = expected.elements(); const complex<float>* const* actual_elements = actual.elements(); - for (int i = 0; i < expected.num_rows(); ++i) { - for (int j = 0; j < expected.num_columns(); ++j) { + for (size_t i = 0; i < expected.num_rows(); ++i) { + for (size_t j = 0; j < expected.num_columns(); ++j) { EXPECT_NEAR(expected_elements[i][j].real(), actual_elements[i][j].real(), kTolerance); @@ -84,8 +84,8 @@ class MatrixTestHelpers { const complex<float>* const* expected_elements = expected.elements(); const complex<float>* const* actual_elements = actual.elements(); - for (int i = 0; i < expected.num_rows(); ++i) { - for (int j = 0; j < expected.num_columns(); ++j) { + for (size_t i = 0; i < expected.num_rows(); ++i) { + for (size_t j = 0; j < expected.num_columns(); ++j) { EXPECT_NEAR(expected_elements[i][j].real(), actual_elements[i][j].real(), tolerance); diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc index 029fa089fc..6ea7234f6f 100644 --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc @@ -79,7 +79,7 @@ const float kCompensationGain = 2.f; // The returned norm is clamped to be non-negative. float Norm(const ComplexMatrix<float>& mat, const ComplexMatrix<float>& norm_mat) { - RTC_CHECK_EQ(norm_mat.num_rows(), 1); + RTC_CHECK_EQ(1u, norm_mat.num_rows()); RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows()); RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns()); @@ -89,8 +89,8 @@ float Norm(const ComplexMatrix<float>& mat, const complex<float>* const* mat_els = mat.elements(); const complex<float>* const* norm_mat_els = norm_mat.elements(); - for (int i = 0; i < norm_mat.num_columns(); ++i) { - for (int j = 0; j < norm_mat.num_columns(); ++j) { + for (size_t i = 0; i < norm_mat.num_columns(); ++i) { + for (size_t j = 0; j < norm_mat.num_columns(); ++j) { first_product += conj(norm_mat_els[0][j]) * mat_els[j][i]; } second_product += first_product * norm_mat_els[0][i]; @@ -102,15 +102,15 @@ float Norm(const ComplexMatrix<float>& mat, // Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|. complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs, const ComplexMatrix<float>& rhs) { - RTC_CHECK_EQ(lhs.num_rows(), 1); - RTC_CHECK_EQ(rhs.num_rows(), 1); + RTC_CHECK_EQ(1u, lhs.num_rows()); + RTC_CHECK_EQ(1u, rhs.num_rows()); RTC_CHECK_EQ(lhs.num_columns(), rhs.num_columns()); const complex<float>* const* lhs_elements = lhs.elements(); const complex<float>* const* rhs_elements = rhs.elements(); complex<float> result = complex<float>(0.f, 0.f); - for (int i = 0; i < lhs.num_columns(); ++i) { + for (size_t i = 0; i < lhs.num_columns(); ++i) { result += conj(lhs_elements[0][i]) * rhs_elements[0][i]; } @@ -126,8 +126,8 @@ size_t Round(float x) { float SumAbs(const ComplexMatrix<float>& mat) { float sum_abs = 0.f; const complex<float>* const* mat_els = mat.elements(); - for (int i = 0; i < mat.num_rows(); ++i) { - for (int j = 0; j < mat.num_columns(); ++j) { + for (size_t i = 0; i < mat.num_rows(); ++i) { + for (size_t j = 0; j < mat.num_columns(); ++j) { sum_abs += std::abs(mat_els[i][j]); } } @@ -138,8 +138,8 @@ float SumAbs(const ComplexMatrix<float>& mat) { float SumSquares(const ComplexMatrix<float>& mat) { float sum_squares = 0.f; const complex<float>* const* mat_els = mat.elements(); - for (int i = 0; i < mat.num_rows(); ++i) { - for (int j = 0; j < mat.num_columns(); ++j) { + for (size_t i = 0; i < mat.num_rows(); ++i) { + for (size_t j = 0; j < mat.num_columns(); ++j) { float abs_value = std::abs(mat_els[i][j]); sum_squares += abs_value * abs_value; } @@ -150,20 +150,20 @@ float SumSquares(const ComplexMatrix<float>& mat) { // Does |out| = |in|.' * conj(|in|) for row vector |in|. void TransposedConjugatedProduct(const ComplexMatrix<float>& in, ComplexMatrix<float>* out) { - RTC_CHECK_EQ(in.num_rows(), 1); + RTC_CHECK_EQ(1u, in.num_rows()); RTC_CHECK_EQ(out->num_rows(), in.num_columns()); RTC_CHECK_EQ(out->num_columns(), in.num_columns()); const complex<float>* in_elements = in.elements()[0]; complex<float>* const* out_elements = out->elements(); - for (int i = 0; i < out->num_rows(); ++i) { - for (int j = 0; j < out->num_columns(); ++j) { + for (size_t i = 0; i < out->num_rows(); ++i) { + for (size_t j = 0; j < out->num_columns(); ++j) { out_elements[i][j] = in_elements[i] * conj(in_elements[j]); } } } std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) { - for (int dim = 0; dim < 3; ++dim) { + for (size_t dim = 0; dim < 3; ++dim) { float center = 0.f; for (size_t i = 0; i < array_geometry.size(); ++i) { center += array_geometry[i].c[dim]; @@ -379,7 +379,7 @@ void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input, (high_pass_postfilter_mask_ - old_high_pass_mask) / input.num_frames_per_band(); // Apply the smoothed high-pass mask to the first channel of each band. - // This can be done because the effct of the linear beamformer is negligible + // This can be done because the effect of the linear beamformer is negligible // compared to the post-filter. for (size_t i = 1; i < input.num_bands(); ++i) { float smoothed_mask = old_high_pass_mask; @@ -408,13 +408,13 @@ bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { } void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, - int num_input_channels, + size_t num_input_channels, size_t num_freq_bins, - int num_output_channels, + size_t num_output_channels, complex_f* const* output) { - RTC_CHECK_EQ(num_freq_bins, kNumFreqBins); - RTC_CHECK_EQ(num_input_channels, num_input_channels_); - RTC_CHECK_EQ(num_output_channels, 1); + RTC_CHECK_EQ(kNumFreqBins, num_freq_bins); + RTC_CHECK_EQ(num_input_channels_, num_input_channels); + RTC_CHECK_EQ(1u, num_output_channels); // Calculating the post-filter masks. Note that we need two for each // frequency bin to account for the positive and negative interferer @@ -483,7 +483,7 @@ void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, const complex_f* delay_sum_mask_els = normalized_delay_sum_masks_[f_ix].elements()[0]; - for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) { + for (size_t c_ix = 0; c_ix < num_input_channels_; ++c_ix) { output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; } diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h index 565c1f349f..29c416ca91 100644 --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h @@ -67,9 +67,9 @@ class NonlinearBeamformer // Process one frequency-domain block of audio. This is where the fun // happens. Implements LappedTransform::Callback. void ProcessAudioBlock(const complex<float>* const* input, - int num_input_channels, + size_t num_input_channels, size_t num_freq_bins, - int num_output_channels, + size_t num_output_channels, complex<float>* const* output) override; private: @@ -129,12 +129,12 @@ class NonlinearBeamformer float window_[kFftSize]; // Parameters exposed to the user. - const int num_input_channels_; + const size_t num_input_channels_; int sample_rate_hz_; const std::vector<Point> array_geometry_; // The normal direction of the array if it has one and it is in the xy-plane. - const rtc::Maybe<Point> array_normal_; + const rtc::Optional<Point> array_normal_; // Minimum spacing between microphone pairs. const float min_mic_spacing_; diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc index cc752485e9..d187552692 100644 --- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer_test.cc @@ -12,6 +12,7 @@ #include "gflags/gflags.h" #include "webrtc/base/checks.h" +#include "webrtc/base/format_macros.h" #include "webrtc/common_audio/channel_buffer.h" #include "webrtc/common_audio/wav_file.h" #include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" @@ -52,9 +53,9 @@ int main(int argc, char* argv[]) { NonlinearBeamformer bf(array_geometry); bf.Initialize(kChunkSizeMs, in_file.sample_rate()); - printf("Input file: %s\nChannels: %d, Sample rate: %d Hz\n\n", + printf("Input file: %s\nChannels: %" PRIuS ", Sample rate: %d Hz\n\n", FLAGS_i.c_str(), in_file.num_channels(), in_file.sample_rate()); - printf("Output file: %s\nChannels: %d, Sample rate: %d Hz\n\n", + printf("Output file: %s\nChannels: %" PRIuS ", Sample rate: %d Hz\n\n", FLAGS_o.c_str(), out_file.num_channels(), out_file.sample_rate()); ChannelBuffer<float> in_buf( diff --git a/webrtc/modules/audio_processing/common.h b/webrtc/modules/audio_processing/common.h index ed8a0544c3..d4ddb92b50 100644 --- a/webrtc/modules/audio_processing/common.h +++ b/webrtc/modules/audio_processing/common.h @@ -17,7 +17,7 @@ namespace webrtc { -static inline int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) { +static inline size_t ChannelsFromLayout(AudioProcessing::ChannelLayout layout) { switch (layout) { case AudioProcessing::kMono: case AudioProcessing::kMonoAndKeyboard: @@ -27,7 +27,7 @@ static inline int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) { return 2; } assert(false); - return -1; + return 0; } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/echo_cancellation_impl.cc b/webrtc/modules/audio_processing/echo_cancellation_impl.cc index 56ee9e0fff..debc597c54 100644 --- a/webrtc/modules/audio_processing/echo_cancellation_impl.cc +++ b/webrtc/modules/audio_processing/echo_cancellation_impl.cc @@ -16,9 +16,8 @@ extern "C" { #include "webrtc/modules/audio_processing/aec/aec_core.h" } -#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" +#include "webrtc/modules/audio_processing/aec/echo_cancellation.h" #include "webrtc/modules/audio_processing/audio_buffer.h" -#include "webrtc/system_wrappers/include/critical_section_wrapper.h" namespace webrtc { @@ -53,13 +52,22 @@ AudioProcessing::Error MapError(int err) { return AudioProcessing::kUnspecifiedError; } } + +// Maximum length that a frame of samples can have. +static const size_t kMaxAllowedValuesOfSamplesPerFrame = 160; +// Maximum number of frames to buffer in the render queue. +// TODO(peah): Decrease this once we properly handle hugely unbalanced +// reverse and forward call numbers. +static const size_t kMaxNumFramesToBuffer = 100; } // namespace EchoCancellationImpl::EchoCancellationImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit) + rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture) : ProcessingComponent(), apm_(apm), - crit_(crit), + crit_render_(crit_render), + crit_capture_(crit_capture), drift_compensation_enabled_(false), metrics_enabled_(false), suppression_level_(kModerateSuppression), @@ -68,87 +76,131 @@ EchoCancellationImpl::EchoCancellationImpl(const AudioProcessing* apm, stream_has_echo_(false), delay_logging_enabled_(false), extended_filter_enabled_(false), - delay_agnostic_enabled_(false) { + delay_agnostic_enabled_(false), + render_queue_element_max_size_(0) { + RTC_DCHECK(apm); + RTC_DCHECK(crit_render); + RTC_DCHECK(crit_capture); } EchoCancellationImpl::~EchoCancellationImpl() {} int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) { + rtc::CritScope cs_render(crit_render_); if (!is_component_enabled()) { - return apm_->kNoError; + return AudioProcessing::kNoError; } assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == apm_->num_reverse_channels()); - int err = apm_->kNoError; + int err = AudioProcessing::kNoError; // The ordering convention must be followed to pass to the correct AEC. size_t handle_index = 0; - for (int i = 0; i < apm_->num_output_channels(); i++) { - for (int j = 0; j < audio->num_channels(); j++) { + render_queue_buffer_.clear(); + for (size_t i = 0; i < apm_->num_output_channels(); i++) { + for (size_t j = 0; j < audio->num_channels(); j++) { Handle* my_handle = static_cast<Handle*>(handle(handle_index)); - err = WebRtcAec_BufferFarend( - my_handle, - audio->split_bands_const_f(j)[kBand0To8kHz], + // Retrieve any error code produced by the buffering of the farend + // signal + err = WebRtcAec_GetBufferFarendError( + my_handle, audio->split_bands_const_f(j)[kBand0To8kHz], audio->num_frames_per_band()); - if (err != apm_->kNoError) { - return GetHandleError(my_handle); // TODO(ajm): warning possible? + if (err != AudioProcessing::kNoError) { + return MapError(err); // TODO(ajm): warning possible? } - handle_index++; + // Buffer the samples in the render queue. + render_queue_buffer_.insert(render_queue_buffer_.end(), + audio->split_bands_const_f(j)[kBand0To8kHz], + (audio->split_bands_const_f(j)[kBand0To8kHz] + + audio->num_frames_per_band())); } } - return apm_->kNoError; + // Insert the samples into the queue. + if (!render_signal_queue_->Insert(&render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + ReadQueuedRenderData(); + + // Retry the insert (should always work). + RTC_DCHECK_EQ(render_signal_queue_->Insert(&render_queue_buffer_), true); + } + + return AudioProcessing::kNoError; +} + +// Read chunks of data that were received and queued on the render side from +// a queue. All the data chunks are buffered into the farend signal of the AEC. +void EchoCancellationImpl::ReadQueuedRenderData() { + rtc::CritScope cs_capture(crit_capture_); + if (!is_component_enabled()) { + return; + } + + while (render_signal_queue_->Remove(&capture_queue_buffer_)) { + size_t handle_index = 0; + size_t buffer_index = 0; + const size_t num_frames_per_band = + capture_queue_buffer_.size() / + (apm_->num_output_channels() * apm_->num_reverse_channels()); + for (size_t i = 0; i < apm_->num_output_channels(); i++) { + for (size_t j = 0; j < apm_->num_reverse_channels(); j++) { + Handle* my_handle = static_cast<Handle*>(handle(handle_index)); + WebRtcAec_BufferFarend(my_handle, &capture_queue_buffer_[buffer_index], + num_frames_per_band); + + buffer_index += num_frames_per_band; + handle_index++; + } + } + } } int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) { + rtc::CritScope cs_capture(crit_capture_); if (!is_component_enabled()) { - return apm_->kNoError; + return AudioProcessing::kNoError; } if (!apm_->was_stream_delay_set()) { - return apm_->kStreamParameterNotSetError; + return AudioProcessing::kStreamParameterNotSetError; } if (drift_compensation_enabled_ && !was_stream_drift_set_) { - return apm_->kStreamParameterNotSetError; + return AudioProcessing::kStreamParameterNotSetError; } assert(audio->num_frames_per_band() <= 160); - assert(audio->num_channels() == apm_->num_output_channels()); + assert(audio->num_channels() == apm_->num_proc_channels()); - int err = apm_->kNoError; + int err = AudioProcessing::kNoError; // The ordering convention must be followed to pass to the correct AEC. size_t handle_index = 0; stream_has_echo_ = false; - for (int i = 0; i < audio->num_channels(); i++) { - for (int j = 0; j < apm_->num_reverse_channels(); j++) { + for (size_t i = 0; i < audio->num_channels(); i++) { + for (size_t j = 0; j < apm_->num_reverse_channels(); j++) { Handle* my_handle = handle(handle_index); - err = WebRtcAec_Process( - my_handle, - audio->split_bands_const_f(i), - audio->num_bands(), - audio->split_bands_f(i), - audio->num_frames_per_band(), - apm_->stream_delay_ms(), - stream_drift_samples_); - - if (err != apm_->kNoError) { - err = GetHandleError(my_handle); + err = WebRtcAec_Process(my_handle, audio->split_bands_const_f(i), + audio->num_bands(), audio->split_bands_f(i), + audio->num_frames_per_band(), + apm_->stream_delay_ms(), stream_drift_samples_); + + if (err != AudioProcessing::kNoError) { + err = MapError(err); // TODO(ajm): Figure out how to return warnings properly. - if (err != apm_->kBadStreamParameterWarning) { + if (err != AudioProcessing::kBadStreamParameterWarning) { return err; } } int status = 0; err = WebRtcAec_get_echo_status(my_handle, &status); - if (err != apm_->kNoError) { - return GetHandleError(my_handle); + if (err != AudioProcessing::kNoError) { + return MapError(err); } if (status == 1) { @@ -160,77 +212,92 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) { } was_stream_drift_set_ = false; - return apm_->kNoError; + return AudioProcessing::kNoError; } int EchoCancellationImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(crit_); + // Run in a single-threaded manner. + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); // Ensure AEC and AECM are not both enabled. + // The is_enabled call is safe from a deadlock perspective + // as both locks are already held in the correct order. if (enable && apm_->echo_control_mobile()->is_enabled()) { - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } return EnableComponent(enable); } bool EchoCancellationImpl::is_enabled() const { + rtc::CritScope cs(crit_capture_); return is_component_enabled(); } int EchoCancellationImpl::set_suppression_level(SuppressionLevel level) { - CriticalSectionScoped crit_scoped(crit_); - if (MapSetting(level) == -1) { - return apm_->kBadParameterError; + { + if (MapSetting(level) == -1) { + return AudioProcessing::kBadParameterError; + } + rtc::CritScope cs(crit_capture_); + suppression_level_ = level; } - - suppression_level_ = level; return Configure(); } EchoCancellation::SuppressionLevel EchoCancellationImpl::suppression_level() const { + rtc::CritScope cs(crit_capture_); return suppression_level_; } int EchoCancellationImpl::enable_drift_compensation(bool enable) { - CriticalSectionScoped crit_scoped(crit_); - drift_compensation_enabled_ = enable; + { + rtc::CritScope cs(crit_capture_); + drift_compensation_enabled_ = enable; + } return Configure(); } bool EchoCancellationImpl::is_drift_compensation_enabled() const { + rtc::CritScope cs(crit_capture_); return drift_compensation_enabled_; } void EchoCancellationImpl::set_stream_drift_samples(int drift) { + rtc::CritScope cs(crit_capture_); was_stream_drift_set_ = true; stream_drift_samples_ = drift; } int EchoCancellationImpl::stream_drift_samples() const { + rtc::CritScope cs(crit_capture_); return stream_drift_samples_; } int EchoCancellationImpl::enable_metrics(bool enable) { - CriticalSectionScoped crit_scoped(crit_); - metrics_enabled_ = enable; + { + rtc::CritScope cs(crit_capture_); + metrics_enabled_ = enable; + } return Configure(); } bool EchoCancellationImpl::are_metrics_enabled() const { + rtc::CritScope cs(crit_capture_); return metrics_enabled_; } // TODO(ajm): we currently just use the metrics from the first AEC. Think more // aboue the best way to extend this to multi-channel. int EchoCancellationImpl::GetMetrics(Metrics* metrics) { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs(crit_capture_); if (metrics == NULL) { - return apm_->kNullPointerError; + return AudioProcessing::kNullPointerError; } if (!is_component_enabled() || !metrics_enabled_) { - return apm_->kNotEnabledError; + return AudioProcessing::kNotEnabledError; } AecMetrics my_metrics; @@ -239,8 +306,8 @@ int EchoCancellationImpl::GetMetrics(Metrics* metrics) { Handle* my_handle = static_cast<Handle*>(handle(0)); int err = WebRtcAec_GetMetrics(my_handle, &my_metrics); - if (err != apm_->kNoError) { - return GetHandleError(my_handle); + if (err != AudioProcessing::kNoError) { + return MapError(err); } metrics->residual_echo_return_loss.instant = my_metrics.rerl.instant; @@ -263,62 +330,70 @@ int EchoCancellationImpl::GetMetrics(Metrics* metrics) { metrics->a_nlp.maximum = my_metrics.aNlp.max; metrics->a_nlp.minimum = my_metrics.aNlp.min; - return apm_->kNoError; + return AudioProcessing::kNoError; } bool EchoCancellationImpl::stream_has_echo() const { + rtc::CritScope cs(crit_capture_); return stream_has_echo_; } int EchoCancellationImpl::enable_delay_logging(bool enable) { - CriticalSectionScoped crit_scoped(crit_); - delay_logging_enabled_ = enable; + { + rtc::CritScope cs(crit_capture_); + delay_logging_enabled_ = enable; + } return Configure(); } bool EchoCancellationImpl::is_delay_logging_enabled() const { + rtc::CritScope cs(crit_capture_); return delay_logging_enabled_; } bool EchoCancellationImpl::is_delay_agnostic_enabled() const { + rtc::CritScope cs(crit_capture_); return delay_agnostic_enabled_; } bool EchoCancellationImpl::is_extended_filter_enabled() const { + rtc::CritScope cs(crit_capture_); return extended_filter_enabled_; } // TODO(bjornv): How should we handle the multi-channel case? int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) { + rtc::CritScope cs(crit_capture_); float fraction_poor_delays = 0; return GetDelayMetrics(median, std, &fraction_poor_delays); } int EchoCancellationImpl::GetDelayMetrics(int* median, int* std, float* fraction_poor_delays) { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs(crit_capture_); if (median == NULL) { - return apm_->kNullPointerError; + return AudioProcessing::kNullPointerError; } if (std == NULL) { - return apm_->kNullPointerError; + return AudioProcessing::kNullPointerError; } if (!is_component_enabled() || !delay_logging_enabled_) { - return apm_->kNotEnabledError; + return AudioProcessing::kNotEnabledError; } Handle* my_handle = static_cast<Handle*>(handle(0)); - if (WebRtcAec_GetDelayMetrics(my_handle, median, std, fraction_poor_delays) != - apm_->kNoError) { - return GetHandleError(my_handle); + const int err = + WebRtcAec_GetDelayMetrics(my_handle, median, std, fraction_poor_delays); + if (err != AudioProcessing::kNoError) { + return MapError(err); } - return apm_->kNoError; + return AudioProcessing::kNoError; } struct AecCore* EchoCancellationImpl::aec_core() const { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs(crit_capture_); if (!is_component_enabled()) { return NULL; } @@ -328,16 +403,51 @@ struct AecCore* EchoCancellationImpl::aec_core() const { int EchoCancellationImpl::Initialize() { int err = ProcessingComponent::Initialize(); - if (err != apm_->kNoError || !is_component_enabled()) { - return err; + { + rtc::CritScope cs(crit_capture_); + if (err != AudioProcessing::kNoError || !is_component_enabled()) { + return err; + } } - return apm_->kNoError; + AllocateRenderQueue(); + + return AudioProcessing::kNoError; +} + +void EchoCancellationImpl::AllocateRenderQueue() { + const size_t new_render_queue_element_max_size = std::max<size_t>( + static_cast<size_t>(1), + kMaxAllowedValuesOfSamplesPerFrame * num_handles_required()); + + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + + // Reallocate the queue if the queue item size is too small to fit the + // data to put in the queue. + if (render_queue_element_max_size_ < new_render_queue_element_max_size) { + render_queue_element_max_size_ = new_render_queue_element_max_size; + + std::vector<float> template_queue_element(render_queue_element_max_size_); + + render_signal_queue_.reset( + new SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier<float>(render_queue_element_max_size_))); + + render_queue_buffer_.resize(render_queue_element_max_size_); + capture_queue_buffer_.resize(render_queue_element_max_size_); + } else { + render_signal_queue_->Clear(); + } } void EchoCancellationImpl::SetExtraOptions(const Config& config) { - extended_filter_enabled_ = config.Get<ExtendedFilter>().enabled; - delay_agnostic_enabled_ = config.Get<DelayAgnostic>().enabled; + { + rtc::CritScope cs(crit_capture_); + extended_filter_enabled_ = config.Get<ExtendedFilter>().enabled; + delay_agnostic_enabled_ = config.Get<DelayAgnostic>().enabled; + } Configure(); } @@ -351,23 +461,25 @@ void EchoCancellationImpl::DestroyHandle(void* handle) const { } int EchoCancellationImpl::InitializeHandle(void* handle) const { + // Not locked as it only relies on APM public API which is threadsafe. + assert(handle != NULL); // TODO(ajm): Drift compensation is disabled in practice. If restored, it // should be managed internally and not depend on the hardware sample rate. // For now, just hardcode a 48 kHz value. return WebRtcAec_Init(static_cast<Handle*>(handle), - apm_->proc_sample_rate_hz(), - 48000); + apm_->proc_sample_rate_hz(), 48000); } int EchoCancellationImpl::ConfigureHandle(void* handle) const { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); assert(handle != NULL); AecConfig config; config.metricsMode = metrics_enabled_; config.nlpMode = MapSetting(suppression_level_); config.skewMode = drift_compensation_enabled_; config.delay_logging = delay_logging_enabled_; - WebRtcAec_enable_extended_filter( WebRtcAec_aec_core(static_cast<Handle*>(handle)), extended_filter_enabled_ ? 1 : 0); @@ -377,13 +489,14 @@ int EchoCancellationImpl::ConfigureHandle(void* handle) const { return WebRtcAec_set_config(static_cast<Handle*>(handle), config); } -int EchoCancellationImpl::num_handles_required() const { - return apm_->num_output_channels() * - apm_->num_reverse_channels(); +size_t EchoCancellationImpl::num_handles_required() const { + // Not locked as it only relies on APM public API which is threadsafe. + return apm_->num_output_channels() * apm_->num_reverse_channels(); } int EchoCancellationImpl::GetHandleError(void* handle) const { + // Not locked as it does not rely on anything in the state. assert(handle != NULL); - return MapError(WebRtcAec_get_error_code(static_cast<Handle*>(handle))); + return AudioProcessing::kUnspecifiedError; } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/echo_cancellation_impl.h b/webrtc/modules/audio_processing/echo_cancellation_impl.h index 070dcabc5d..a40a267e32 100644 --- a/webrtc/modules/audio_processing/echo_cancellation_impl.h +++ b/webrtc/modules/audio_processing/echo_cancellation_impl.h @@ -11,19 +11,22 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_ +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/swap_queue.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { class AudioBuffer; -class CriticalSectionWrapper; class EchoCancellationImpl : public EchoCancellation, public ProcessingComponent { public: EchoCancellationImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit); + rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture); virtual ~EchoCancellationImpl(); int ProcessRenderAudio(const AudioBuffer* audio); @@ -38,10 +41,13 @@ class EchoCancellationImpl : public EchoCancellation, // ProcessingComponent implementation. int Initialize() override; void SetExtraOptions(const Config& config) override; - bool is_delay_agnostic_enabled() const; bool is_extended_filter_enabled() const; + // Reads render side data that has been queued on the render call. + // Called holding the capture lock. + void ReadQueuedRenderData(); + private: // EchoCancellation implementation. int Enable(bool enable) override; @@ -58,6 +64,7 @@ class EchoCancellationImpl : public EchoCancellation, int GetDelayMetrics(int* median, int* std, float* fraction_poor_delays) override; + struct AecCore* aec_core() const override; // ProcessingComponent implementation. @@ -65,20 +72,35 @@ class EchoCancellationImpl : public EchoCancellation, int InitializeHandle(void* handle) const override; int ConfigureHandle(void* handle) const override; void DestroyHandle(void* handle) const override; - int num_handles_required() const override; + size_t num_handles_required() const override; int GetHandleError(void* handle) const override; + void AllocateRenderQueue(); + + // Not guarded as its public API is thread safe. const AudioProcessing* apm_; - CriticalSectionWrapper* crit_; - bool drift_compensation_enabled_; - bool metrics_enabled_; - SuppressionLevel suppression_level_; - int stream_drift_samples_; - bool was_stream_drift_set_; - bool stream_has_echo_; - bool delay_logging_enabled_; - bool extended_filter_enabled_; - bool delay_agnostic_enabled_; + + rtc::CriticalSection* const crit_render_ ACQUIRED_BEFORE(crit_capture_); + rtc::CriticalSection* const crit_capture_; + + bool drift_compensation_enabled_ GUARDED_BY(crit_capture_); + bool metrics_enabled_ GUARDED_BY(crit_capture_); + SuppressionLevel suppression_level_ GUARDED_BY(crit_capture_); + int stream_drift_samples_ GUARDED_BY(crit_capture_); + bool was_stream_drift_set_ GUARDED_BY(crit_capture_); + bool stream_has_echo_ GUARDED_BY(crit_capture_); + bool delay_logging_enabled_ GUARDED_BY(crit_capture_); + bool extended_filter_enabled_ GUARDED_BY(crit_capture_); + bool delay_agnostic_enabled_ GUARDED_BY(crit_capture_); + + size_t render_queue_element_max_size_ GUARDED_BY(crit_render_) + GUARDED_BY(crit_capture_); + std::vector<float> render_queue_buffer_ GUARDED_BY(crit_render_); + std::vector<float> capture_queue_buffer_ GUARDED_BY(crit_capture_); + + // Lock protection not needed. + rtc::scoped_ptr<SwapQueue<std::vector<float>, RenderQueueItemVerifier<float>>> + render_signal_queue_; }; } // namespace webrtc diff --git a/webrtc/modules/audio_processing/echo_cancellation_impl_unittest.cc b/webrtc/modules/audio_processing/echo_cancellation_impl_unittest.cc index b2e11981fa..7f152bf942 100644 --- a/webrtc/modules/audio_processing/echo_cancellation_impl_unittest.cc +++ b/webrtc/modules/audio_processing/echo_cancellation_impl_unittest.cc @@ -14,7 +14,6 @@ extern "C" { #include "webrtc/modules/audio_processing/aec/aec_core.h" } #include "webrtc/modules/audio_processing/include/audio_processing.h" -#include "webrtc/test/testsupport/gtest_disable.h" namespace webrtc { diff --git a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc index 954aac7d4a..f2df5f7984 100644 --- a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc +++ b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc @@ -13,9 +13,8 @@ #include <assert.h> #include <string.h> -#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/aecm/echo_control_mobile.h" #include "webrtc/modules/audio_processing/audio_buffer.h" -#include "webrtc/system_wrappers/include/critical_section_wrapper.h" #include "webrtc/system_wrappers/include/logging.h" namespace webrtc { @@ -56,6 +55,12 @@ AudioProcessing::Error MapError(int err) { return AudioProcessing::kUnspecifiedError; } } +// Maximum length that a frame of samples can have. +static const size_t kMaxAllowedValuesOfSamplesPerFrame = 160; +// Maximum number of frames to buffer in the render queue. +// TODO(peah): Decrease this once we properly handle hugely unbalanced +// reverse and forward call numbers. +static const size_t kMaxNumFramesToBuffer = 100; } // namespace size_t EchoControlMobile::echo_path_size_bytes() { @@ -63,13 +68,20 @@ size_t EchoControlMobile::echo_path_size_bytes() { } EchoControlMobileImpl::EchoControlMobileImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit) - : ProcessingComponent(), - apm_(apm), - crit_(crit), - routing_mode_(kSpeakerphone), - comfort_noise_enabled_(true), - external_echo_path_(NULL) {} + rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture) + : ProcessingComponent(), + apm_(apm), + crit_render_(crit_render), + crit_capture_(crit_capture), + routing_mode_(kSpeakerphone), + comfort_noise_enabled_(true), + external_echo_path_(NULL), + render_queue_element_max_size_(0) { + RTC_DCHECK(apm); + RTC_DCHECK(crit_render); + RTC_DCHECK(crit_capture); +} EchoControlMobileImpl::~EchoControlMobileImpl() { if (external_echo_path_ != NULL) { @@ -79,53 +91,98 @@ EchoControlMobileImpl::~EchoControlMobileImpl() { } int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) { + rtc::CritScope cs_render(crit_render_); + if (!is_component_enabled()) { - return apm_->kNoError; + return AudioProcessing::kNoError; } assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == apm_->num_reverse_channels()); - int err = apm_->kNoError; - + int err = AudioProcessing::kNoError; // The ordering convention must be followed to pass to the correct AECM. size_t handle_index = 0; - for (int i = 0; i < apm_->num_output_channels(); i++) { - for (int j = 0; j < audio->num_channels(); j++) { + render_queue_buffer_.clear(); + for (size_t i = 0; i < apm_->num_output_channels(); i++) { + for (size_t j = 0; j < audio->num_channels(); j++) { Handle* my_handle = static_cast<Handle*>(handle(handle_index)); - err = WebRtcAecm_BufferFarend( - my_handle, - audio->split_bands_const(j)[kBand0To8kHz], + err = WebRtcAecm_GetBufferFarendError( + my_handle, audio->split_bands_const(j)[kBand0To8kHz], audio->num_frames_per_band()); - if (err != apm_->kNoError) { - return GetHandleError(my_handle); // TODO(ajm): warning possible? - } + if (err != AudioProcessing::kNoError) + return MapError(err); // TODO(ajm): warning possible?); + + // Buffer the samples in the render queue. + render_queue_buffer_.insert(render_queue_buffer_.end(), + audio->split_bands_const(j)[kBand0To8kHz], + (audio->split_bands_const(j)[kBand0To8kHz] + + audio->num_frames_per_band())); handle_index++; } } - return apm_->kNoError; + // Insert the samples into the queue. + if (!render_signal_queue_->Insert(&render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + ReadQueuedRenderData(); + + // Retry the insert (should always work). + RTC_DCHECK_EQ(render_signal_queue_->Insert(&render_queue_buffer_), true); + } + + return AudioProcessing::kNoError; +} + +// Read chunks of data that were received and queued on the render side from +// a queue. All the data chunks are buffered into the farend signal of the AEC. +void EchoControlMobileImpl::ReadQueuedRenderData() { + rtc::CritScope cs_capture(crit_capture_); + + if (!is_component_enabled()) { + return; + } + + while (render_signal_queue_->Remove(&capture_queue_buffer_)) { + size_t handle_index = 0; + size_t buffer_index = 0; + const size_t num_frames_per_band = + capture_queue_buffer_.size() / + (apm_->num_output_channels() * apm_->num_reverse_channels()); + for (size_t i = 0; i < apm_->num_output_channels(); i++) { + for (size_t j = 0; j < apm_->num_reverse_channels(); j++) { + Handle* my_handle = static_cast<Handle*>(handle(handle_index)); + WebRtcAecm_BufferFarend(my_handle, &capture_queue_buffer_[buffer_index], + num_frames_per_band); + + buffer_index += num_frames_per_band; + handle_index++; + } + } + } } int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) { + rtc::CritScope cs_capture(crit_capture_); + if (!is_component_enabled()) { - return apm_->kNoError; + return AudioProcessing::kNoError; } if (!apm_->was_stream_delay_set()) { - return apm_->kStreamParameterNotSetError; + return AudioProcessing::kStreamParameterNotSetError; } assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == apm_->num_output_channels()); - int err = apm_->kNoError; + int err = AudioProcessing::kNoError; // The ordering convention must be followed to pass to the correct AECM. size_t handle_index = 0; - for (int i = 0; i < audio->num_channels(); i++) { + for (size_t i = 0; i < audio->num_channels(); i++) { // TODO(ajm): improve how this works, possibly inside AECM. // This is kind of hacked up. const int16_t* noisy = audio->low_pass_reference(i); @@ -134,7 +191,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) { noisy = clean; clean = NULL; } - for (int j = 0; j < apm_->num_reverse_channels(); j++) { + for (size_t j = 0; j < apm_->num_reverse_channels(); j++) { Handle* my_handle = static_cast<Handle*>(handle(handle_index)); err = WebRtcAecm_Process( my_handle, @@ -144,109 +201,158 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) { audio->num_frames_per_band(), apm_->stream_delay_ms()); - if (err != apm_->kNoError) { - return GetHandleError(my_handle); // TODO(ajm): warning possible? - } + if (err != AudioProcessing::kNoError) + return MapError(err); handle_index++; } } - return apm_->kNoError; + return AudioProcessing::kNoError; } int EchoControlMobileImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(crit_); // Ensure AEC and AECM are not both enabled. + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + // The is_enabled call is safe from a deadlock perspective + // as both locks are allready held in the correct order. if (enable && apm_->echo_cancellation()->is_enabled()) { - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } return EnableComponent(enable); } bool EchoControlMobileImpl::is_enabled() const { + rtc::CritScope cs(crit_capture_); return is_component_enabled(); } int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) { - CriticalSectionScoped crit_scoped(crit_); if (MapSetting(mode) == -1) { - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } - routing_mode_ = mode; + { + rtc::CritScope cs(crit_capture_); + routing_mode_ = mode; + } return Configure(); } EchoControlMobile::RoutingMode EchoControlMobileImpl::routing_mode() const { + rtc::CritScope cs(crit_capture_); return routing_mode_; } int EchoControlMobileImpl::enable_comfort_noise(bool enable) { - CriticalSectionScoped crit_scoped(crit_); - comfort_noise_enabled_ = enable; + { + rtc::CritScope cs(crit_capture_); + comfort_noise_enabled_ = enable; + } return Configure(); } bool EchoControlMobileImpl::is_comfort_noise_enabled() const { + rtc::CritScope cs(crit_capture_); return comfort_noise_enabled_; } int EchoControlMobileImpl::SetEchoPath(const void* echo_path, size_t size_bytes) { - CriticalSectionScoped crit_scoped(crit_); - if (echo_path == NULL) { - return apm_->kNullPointerError; - } - if (size_bytes != echo_path_size_bytes()) { - // Size mismatch - return apm_->kBadParameterError; - } + { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + if (echo_path == NULL) { + return AudioProcessing::kNullPointerError; + } + if (size_bytes != echo_path_size_bytes()) { + // Size mismatch + return AudioProcessing::kBadParameterError; + } - if (external_echo_path_ == NULL) { - external_echo_path_ = new unsigned char[size_bytes]; + if (external_echo_path_ == NULL) { + external_echo_path_ = new unsigned char[size_bytes]; + } + memcpy(external_echo_path_, echo_path, size_bytes); } - memcpy(external_echo_path_, echo_path, size_bytes); return Initialize(); } int EchoControlMobileImpl::GetEchoPath(void* echo_path, size_t size_bytes) const { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs(crit_capture_); if (echo_path == NULL) { - return apm_->kNullPointerError; + return AudioProcessing::kNullPointerError; } if (size_bytes != echo_path_size_bytes()) { // Size mismatch - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } if (!is_component_enabled()) { - return apm_->kNotEnabledError; + return AudioProcessing::kNotEnabledError; } // Get the echo path from the first channel Handle* my_handle = static_cast<Handle*>(handle(0)); - if (WebRtcAecm_GetEchoPath(my_handle, echo_path, size_bytes) != 0) { - return GetHandleError(my_handle); - } + int32_t err = WebRtcAecm_GetEchoPath(my_handle, echo_path, size_bytes); + if (err != 0) + return MapError(err); - return apm_->kNoError; + return AudioProcessing::kNoError; } int EchoControlMobileImpl::Initialize() { - if (!is_component_enabled()) { - return apm_->kNoError; + { + rtc::CritScope cs_capture(crit_capture_); + if (!is_component_enabled()) { + return AudioProcessing::kNoError; + } } - if (apm_->proc_sample_rate_hz() > apm_->kSampleRate16kHz) { + if (apm_->proc_sample_rate_hz() > AudioProcessing::kSampleRate16kHz) { LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates"; - return apm_->kBadSampleRateError; + return AudioProcessing::kBadSampleRateError; } - return ProcessingComponent::Initialize(); + int err = ProcessingComponent::Initialize(); + if (err != AudioProcessing::kNoError) { + return err; + } + + AllocateRenderQueue(); + + return AudioProcessing::kNoError; +} + +void EchoControlMobileImpl::AllocateRenderQueue() { + const size_t new_render_queue_element_max_size = std::max<size_t>( + static_cast<size_t>(1), + kMaxAllowedValuesOfSamplesPerFrame * num_handles_required()); + + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + + // Reallocate the queue if the queue item size is too small to fit the + // data to put in the queue. + if (render_queue_element_max_size_ < new_render_queue_element_max_size) { + render_queue_element_max_size_ = new_render_queue_element_max_size; + + std::vector<int16_t> template_queue_element(render_queue_element_max_size_); + + render_signal_queue_.reset( + new SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier<int16_t>(render_queue_element_max_size_))); + + render_queue_buffer_.resize(render_queue_element_max_size_); + capture_queue_buffer_.resize(render_queue_element_max_size_); + } else { + render_signal_queue_->Clear(); + } } void* EchoControlMobileImpl::CreateHandle() const { @@ -254,10 +360,14 @@ void* EchoControlMobileImpl::CreateHandle() const { } void EchoControlMobileImpl::DestroyHandle(void* handle) const { + // This method is only called in a non-concurrent manner during APM + // destruction. WebRtcAecm_Free(static_cast<Handle*>(handle)); } int EchoControlMobileImpl::InitializeHandle(void* handle) const { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); assert(handle != NULL); Handle* my_handle = static_cast<Handle*>(handle); if (WebRtcAecm_Init(my_handle, apm_->proc_sample_rate_hz()) != 0) { @@ -271,10 +381,12 @@ int EchoControlMobileImpl::InitializeHandle(void* handle) const { } } - return apm_->kNoError; + return AudioProcessing::kNoError; } int EchoControlMobileImpl::ConfigureHandle(void* handle) const { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); AecmConfig config; config.cngMode = comfort_noise_enabled_; config.echoMode = MapSetting(routing_mode_); @@ -282,13 +394,14 @@ int EchoControlMobileImpl::ConfigureHandle(void* handle) const { return WebRtcAecm_set_config(static_cast<Handle*>(handle), config); } -int EchoControlMobileImpl::num_handles_required() const { - return apm_->num_output_channels() * - apm_->num_reverse_channels(); +size_t EchoControlMobileImpl::num_handles_required() const { + // Not locked as it only relies on APM public API which is threadsafe. + return apm_->num_output_channels() * apm_->num_reverse_channels(); } int EchoControlMobileImpl::GetHandleError(void* handle) const { + // Not locked as it does not rely on anything in the state. assert(handle != NULL); - return MapError(WebRtcAecm_get_error_code(static_cast<Handle*>(handle))); + return AudioProcessing::kUnspecifiedError; } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/echo_control_mobile_impl.h b/webrtc/modules/audio_processing/echo_control_mobile_impl.h index da7022545f..4d6529d3ac 100644 --- a/webrtc/modules/audio_processing/echo_control_mobile_impl.h +++ b/webrtc/modules/audio_processing/echo_control_mobile_impl.h @@ -11,19 +11,23 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/swap_queue.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { class AudioBuffer; -class CriticalSectionWrapper; class EchoControlMobileImpl : public EchoControlMobile, public ProcessingComponent { public: EchoControlMobileImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit); + rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture); + virtual ~EchoControlMobileImpl(); int ProcessRenderAudio(const AudioBuffer* audio); @@ -37,6 +41,9 @@ class EchoControlMobileImpl : public EchoControlMobile, // ProcessingComponent implementation. int Initialize() override; + // Reads render side data that has been queued on the render call. + void ReadQueuedRenderData(); + private: // EchoControlMobile implementation. int Enable(bool enable) override; @@ -46,18 +53,37 @@ class EchoControlMobileImpl : public EchoControlMobile, int GetEchoPath(void* echo_path, size_t size_bytes) const override; // ProcessingComponent implementation. + // Called holding both the render and capture locks. void* CreateHandle() const override; int InitializeHandle(void* handle) const override; int ConfigureHandle(void* handle) const override; void DestroyHandle(void* handle) const override; - int num_handles_required() const override; + size_t num_handles_required() const override; int GetHandleError(void* handle) const override; + void AllocateRenderQueue(); + + // Not guarded as its public API is thread safe. const AudioProcessing* apm_; - CriticalSectionWrapper* crit_; - RoutingMode routing_mode_; - bool comfort_noise_enabled_; - unsigned char* external_echo_path_; + + rtc::CriticalSection* const crit_render_ ACQUIRED_BEFORE(crit_capture_); + rtc::CriticalSection* const crit_capture_; + + RoutingMode routing_mode_ GUARDED_BY(crit_capture_); + bool comfort_noise_enabled_ GUARDED_BY(crit_capture_); + unsigned char* external_echo_path_ GUARDED_BY(crit_render_) + GUARDED_BY(crit_capture_); + + size_t render_queue_element_max_size_ GUARDED_BY(crit_render_) + GUARDED_BY(crit_capture_); + + std::vector<int16_t> render_queue_buffer_ GUARDED_BY(crit_render_); + std::vector<int16_t> capture_queue_buffer_ GUARDED_BY(crit_capture_); + + // Lock protection not needed. + rtc::scoped_ptr< + SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>> + render_signal_queue_; }; } // namespace webrtc diff --git a/webrtc/modules/audio_processing/gain_control_impl.cc b/webrtc/modules/audio_processing/gain_control_impl.cc index 3b1537e796..04a6c7ba29 100644 --- a/webrtc/modules/audio_processing/gain_control_impl.cc +++ b/webrtc/modules/audio_processing/gain_control_impl.cc @@ -14,7 +14,6 @@ #include "webrtc/modules/audio_processing/audio_buffer.h" #include "webrtc/modules/audio_processing/agc/legacy/gain_control.h" -#include "webrtc/system_wrappers/include/critical_section_wrapper.h" namespace webrtc { @@ -33,60 +32,113 @@ int16_t MapSetting(GainControl::Mode mode) { assert(false); return -1; } + +// Maximum length that a frame of samples can have. +static const size_t kMaxAllowedValuesOfSamplesPerFrame = 160; +// Maximum number of frames to buffer in the render queue. +// TODO(peah): Decrease this once we properly handle hugely unbalanced +// reverse and forward call numbers. +static const size_t kMaxNumFramesToBuffer = 100; + } // namespace GainControlImpl::GainControlImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit) - : ProcessingComponent(), - apm_(apm), - crit_(crit), - mode_(kAdaptiveAnalog), - minimum_capture_level_(0), - maximum_capture_level_(255), - limiter_enabled_(true), - target_level_dbfs_(3), - compression_gain_db_(9), - analog_capture_level_(0), - was_analog_level_set_(false), - stream_is_saturated_(false) {} + rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture) + : ProcessingComponent(), + apm_(apm), + crit_render_(crit_render), + crit_capture_(crit_capture), + mode_(kAdaptiveAnalog), + minimum_capture_level_(0), + maximum_capture_level_(255), + limiter_enabled_(true), + target_level_dbfs_(3), + compression_gain_db_(9), + analog_capture_level_(0), + was_analog_level_set_(false), + stream_is_saturated_(false), + render_queue_element_max_size_(0) { + RTC_DCHECK(apm); + RTC_DCHECK(crit_render); + RTC_DCHECK(crit_capture); +} GainControlImpl::~GainControlImpl() {} int GainControlImpl::ProcessRenderAudio(AudioBuffer* audio) { + rtc::CritScope cs(crit_render_); if (!is_component_enabled()) { - return apm_->kNoError; + return AudioProcessing::kNoError; } assert(audio->num_frames_per_band() <= 160); - for (int i = 0; i < num_handles(); i++) { + render_queue_buffer_.resize(0); + for (size_t i = 0; i < num_handles(); i++) { Handle* my_handle = static_cast<Handle*>(handle(i)); - int err = WebRtcAgc_AddFarend( - my_handle, - audio->mixed_low_pass_data(), - audio->num_frames_per_band()); + int err = + WebRtcAgc_GetAddFarendError(my_handle, audio->num_frames_per_band()); - if (err != apm_->kNoError) { + if (err != AudioProcessing::kNoError) return GetHandleError(my_handle); - } + + // Buffer the samples in the render queue. + render_queue_buffer_.insert( + render_queue_buffer_.end(), audio->mixed_low_pass_data(), + (audio->mixed_low_pass_data() + audio->num_frames_per_band())); + } + + // Insert the samples into the queue. + if (!render_signal_queue_->Insert(&render_queue_buffer_)) { + // The data queue is full and needs to be emptied. + ReadQueuedRenderData(); + + // Retry the insert (should always work). + RTC_DCHECK_EQ(render_signal_queue_->Insert(&render_queue_buffer_), true); + } + + return AudioProcessing::kNoError; +} + +// Read chunks of data that were received and queued on the render side from +// a queue. All the data chunks are buffered into the farend signal of the AGC. +void GainControlImpl::ReadQueuedRenderData() { + rtc::CritScope cs(crit_capture_); + + if (!is_component_enabled()) { + return; } - return apm_->kNoError; + while (render_signal_queue_->Remove(&capture_queue_buffer_)) { + size_t buffer_index = 0; + const size_t num_frames_per_band = + capture_queue_buffer_.size() / num_handles(); + for (size_t i = 0; i < num_handles(); i++) { + Handle* my_handle = static_cast<Handle*>(handle(i)); + WebRtcAgc_AddFarend(my_handle, &capture_queue_buffer_[buffer_index], + num_frames_per_band); + + buffer_index += num_frames_per_band; + } + } } int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { + rtc::CritScope cs(crit_capture_); + if (!is_component_enabled()) { - return apm_->kNoError; + return AudioProcessing::kNoError; } assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == num_handles()); - int err = apm_->kNoError; + int err = AudioProcessing::kNoError; if (mode_ == kAdaptiveAnalog) { capture_levels_.assign(num_handles(), analog_capture_level_); - for (int i = 0; i < num_handles(); i++) { + for (size_t i = 0; i < num_handles(); i++) { Handle* my_handle = static_cast<Handle*>(handle(i)); err = WebRtcAgc_AddMic( my_handle, @@ -94,13 +146,13 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { audio->num_bands(), audio->num_frames_per_band()); - if (err != apm_->kNoError) { + if (err != AudioProcessing::kNoError) { return GetHandleError(my_handle); } } } else if (mode_ == kAdaptiveDigital) { - for (int i = 0; i < num_handles(); i++) { + for (size_t i = 0; i < num_handles(); i++) { Handle* my_handle = static_cast<Handle*>(handle(i)); int32_t capture_level_out = 0; @@ -114,34 +166,38 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { capture_levels_[i] = capture_level_out; - if (err != apm_->kNoError) { + if (err != AudioProcessing::kNoError) { return GetHandleError(my_handle); } } } - return apm_->kNoError; + return AudioProcessing::kNoError; } int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) { + rtc::CritScope cs(crit_capture_); + if (!is_component_enabled()) { - return apm_->kNoError; + return AudioProcessing::kNoError; } if (mode_ == kAdaptiveAnalog && !was_analog_level_set_) { - return apm_->kStreamParameterNotSetError; + return AudioProcessing::kStreamParameterNotSetError; } assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == num_handles()); stream_is_saturated_ = false; - for (int i = 0; i < num_handles(); i++) { + for (size_t i = 0; i < num_handles(); i++) { Handle* my_handle = static_cast<Handle*>(handle(i)); int32_t capture_level_out = 0; uint8_t saturation_warning = 0; + // The call to stream_has_echo() is ok from a deadlock perspective + // as the capture lock is allready held. int err = WebRtcAgc_Process( my_handle, audio->split_bands_const(i), @@ -153,7 +209,7 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) { apm_->echo_cancellation()->stream_has_echo(), &saturation_warning); - if (err != apm_->kNoError) { + if (err != AudioProcessing::kNoError) { return GetHandleError(my_handle); } @@ -166,7 +222,7 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) { if (mode_ == kAdaptiveAnalog) { // Take the analog level to be the average across the handles. analog_capture_level_ = 0; - for (int i = 0; i < num_handles(); i++) { + for (size_t i = 0; i < num_handles(); i++) { analog_capture_level_ += capture_levels_[i]; } @@ -174,22 +230,24 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) { } was_analog_level_set_ = false; - return apm_->kNoError; + return AudioProcessing::kNoError; } // TODO(ajm): ensure this is called under kAdaptiveAnalog. int GainControlImpl::set_stream_analog_level(int level) { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs(crit_capture_); + was_analog_level_set_ = true; if (level < minimum_capture_level_ || level > maximum_capture_level_) { - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } analog_capture_level_ = level; - return apm_->kNoError; + return AudioProcessing::kNoError; } int GainControlImpl::stream_analog_level() { + rtc::CritScope cs(crit_capture_); // TODO(ajm): enable this assertion? //assert(mode_ == kAdaptiveAnalog); @@ -197,18 +255,21 @@ int GainControlImpl::stream_analog_level() { } int GainControlImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); return EnableComponent(enable); } bool GainControlImpl::is_enabled() const { + rtc::CritScope cs(crit_capture_); return is_component_enabled(); } int GainControlImpl::set_mode(Mode mode) { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); if (MapSetting(mode) == -1) { - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } mode_ = mode; @@ -216,22 +277,23 @@ int GainControlImpl::set_mode(Mode mode) { } GainControl::Mode GainControlImpl::mode() const { + rtc::CritScope cs(crit_capture_); return mode_; } int GainControlImpl::set_analog_level_limits(int minimum, int maximum) { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs(crit_capture_); if (minimum < 0) { - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } if (maximum > 65535) { - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } if (maximum < minimum) { - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } minimum_capture_level_ = minimum; @@ -241,21 +303,24 @@ int GainControlImpl::set_analog_level_limits(int minimum, } int GainControlImpl::analog_level_minimum() const { + rtc::CritScope cs(crit_capture_); return minimum_capture_level_; } int GainControlImpl::analog_level_maximum() const { + rtc::CritScope cs(crit_capture_); return maximum_capture_level_; } bool GainControlImpl::stream_is_saturated() const { + rtc::CritScope cs(crit_capture_); return stream_is_saturated_; } int GainControlImpl::set_target_level_dbfs(int level) { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs(crit_capture_); if (level > 31 || level < 0) { - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } target_level_dbfs_ = level; @@ -263,13 +328,14 @@ int GainControlImpl::set_target_level_dbfs(int level) { } int GainControlImpl::target_level_dbfs() const { + rtc::CritScope cs(crit_capture_); return target_level_dbfs_; } int GainControlImpl::set_compression_gain_db(int gain) { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs(crit_capture_); if (gain < 0 || gain > 90) { - return apm_->kBadParameterError; + return AudioProcessing::kBadParameterError; } compression_gain_db_ = gain; @@ -277,27 +343,59 @@ int GainControlImpl::set_compression_gain_db(int gain) { } int GainControlImpl::compression_gain_db() const { + rtc::CritScope cs(crit_capture_); return compression_gain_db_; } int GainControlImpl::enable_limiter(bool enable) { - CriticalSectionScoped crit_scoped(crit_); + rtc::CritScope cs(crit_capture_); limiter_enabled_ = enable; return Configure(); } bool GainControlImpl::is_limiter_enabled() const { + rtc::CritScope cs(crit_capture_); return limiter_enabled_; } int GainControlImpl::Initialize() { int err = ProcessingComponent::Initialize(); - if (err != apm_->kNoError || !is_component_enabled()) { + if (err != AudioProcessing::kNoError || !is_component_enabled()) { return err; } - capture_levels_.assign(num_handles(), analog_capture_level_); - return apm_->kNoError; + AllocateRenderQueue(); + + rtc::CritScope cs_capture(crit_capture_); + const int n = num_handles(); + RTC_CHECK_GE(n, 0) << "Bad number of handles: " << n; + + capture_levels_.assign(n, analog_capture_level_); + return AudioProcessing::kNoError; +} + +void GainControlImpl::AllocateRenderQueue() { + const size_t new_render_queue_element_max_size = + std::max<size_t>(static_cast<size_t>(1), + kMaxAllowedValuesOfSamplesPerFrame * num_handles()); + + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + + if (render_queue_element_max_size_ < new_render_queue_element_max_size) { + render_queue_element_max_size_ = new_render_queue_element_max_size; + std::vector<int16_t> template_queue_element(render_queue_element_max_size_); + + render_signal_queue_.reset( + new SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>( + kMaxNumFramesToBuffer, template_queue_element, + RenderQueueItemVerifier<int16_t>(render_queue_element_max_size_))); + + render_queue_buffer_.resize(render_queue_element_max_size_); + capture_queue_buffer_.resize(render_queue_element_max_size_); + } else { + render_signal_queue_->Clear(); + } } void* GainControlImpl::CreateHandle() const { @@ -309,6 +407,9 @@ void GainControlImpl::DestroyHandle(void* handle) const { } int GainControlImpl::InitializeHandle(void* handle) const { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); + return WebRtcAgc_Init(static_cast<Handle*>(handle), minimum_capture_level_, maximum_capture_level_, @@ -317,6 +418,8 @@ int GainControlImpl::InitializeHandle(void* handle) const { } int GainControlImpl::ConfigureHandle(void* handle) const { + rtc::CritScope cs_render(crit_render_); + rtc::CritScope cs_capture(crit_capture_); WebRtcAgcConfig config; // TODO(ajm): Flip the sign here (since AGC expects a positive value) if we // change the interface. @@ -330,14 +433,15 @@ int GainControlImpl::ConfigureHandle(void* handle) const { return WebRtcAgc_set_config(static_cast<Handle*>(handle), config); } -int GainControlImpl::num_handles_required() const { - return apm_->num_output_channels(); +size_t GainControlImpl::num_handles_required() const { + // Not locked as it only relies on APM public API which is threadsafe. + return apm_->num_proc_channels(); } int GainControlImpl::GetHandleError(void* handle) const { // The AGC has no get_error() function. // (Despite listing errors in its interface...) assert(handle != NULL); - return apm_->kUnspecifiedError; + return AudioProcessing::kUnspecifiedError; } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/gain_control_impl.h b/webrtc/modules/audio_processing/gain_control_impl.h index f24d200cf2..72789ba5e1 100644 --- a/webrtc/modules/audio_processing/gain_control_impl.h +++ b/webrtc/modules/audio_processing/gain_control_impl.h @@ -13,19 +13,23 @@ #include <vector> +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/base/thread_annotations.h" +#include "webrtc/common_audio/swap_queue.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { class AudioBuffer; -class CriticalSectionWrapper; class GainControlImpl : public GainControl, public ProcessingComponent { public: GainControlImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit); + rtc::CriticalSection* crit_render, + rtc::CriticalSection* crit_capture); virtual ~GainControlImpl(); int ProcessRenderAudio(AudioBuffer* audio); @@ -41,6 +45,9 @@ class GainControlImpl : public GainControl, bool is_limiter_enabled() const override; Mode mode() const override; + // Reads render side data that has been queued on the render call. + void ReadQueuedRenderData(); + private: // GainControl implementation. int Enable(bool enable) override; @@ -61,21 +68,37 @@ class GainControlImpl : public GainControl, int InitializeHandle(void* handle) const override; int ConfigureHandle(void* handle) const override; void DestroyHandle(void* handle) const override; - int num_handles_required() const override; + size_t num_handles_required() const override; int GetHandleError(void* handle) const override; + void AllocateRenderQueue(); + + // Not guarded as its public API is thread safe. const AudioProcessing* apm_; - CriticalSectionWrapper* crit_; - Mode mode_; - int minimum_capture_level_; - int maximum_capture_level_; - bool limiter_enabled_; - int target_level_dbfs_; - int compression_gain_db_; - std::vector<int> capture_levels_; - int analog_capture_level_; - bool was_analog_level_set_; - bool stream_is_saturated_; + + rtc::CriticalSection* const crit_render_ ACQUIRED_BEFORE(crit_capture_); + rtc::CriticalSection* const crit_capture_; + + Mode mode_ GUARDED_BY(crit_capture_); + int minimum_capture_level_ GUARDED_BY(crit_capture_); + int maximum_capture_level_ GUARDED_BY(crit_capture_); + bool limiter_enabled_ GUARDED_BY(crit_capture_); + int target_level_dbfs_ GUARDED_BY(crit_capture_); + int compression_gain_db_ GUARDED_BY(crit_capture_); + std::vector<int> capture_levels_ GUARDED_BY(crit_capture_); + int analog_capture_level_ GUARDED_BY(crit_capture_); + bool was_analog_level_set_ GUARDED_BY(crit_capture_); + bool stream_is_saturated_ GUARDED_BY(crit_capture_); + + size_t render_queue_element_max_size_ GUARDED_BY(crit_render_) + GUARDED_BY(crit_capture_); + std::vector<int16_t> render_queue_buffer_ GUARDED_BY(crit_render_); + std::vector<int16_t> capture_queue_buffer_ GUARDED_BY(crit_capture_); + + // Lock protection not needed. + rtc::scoped_ptr< + SwapQueue<std::vector<int16_t>, RenderQueueItemVerifier<int16_t>>> + render_signal_queue_; }; } // namespace webrtc diff --git a/webrtc/modules/audio_processing/high_pass_filter_impl.cc b/webrtc/modules/audio_processing/high_pass_filter_impl.cc index 29e482078e..375d58febb 100644 --- a/webrtc/modules/audio_processing/high_pass_filter_impl.cc +++ b/webrtc/modules/audio_processing/high_pass_filter_impl.cc @@ -10,159 +10,125 @@ #include "webrtc/modules/audio_processing/high_pass_filter_impl.h" -#include <assert.h> - #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" #include "webrtc/modules/audio_processing/audio_buffer.h" #include "webrtc/system_wrappers/include/critical_section_wrapper.h" -#include "webrtc/typedefs.h" - namespace webrtc { namespace { -const int16_t kFilterCoefficients8kHz[5] = - {3798, -7596, 3798, 7807, -3733}; - -const int16_t kFilterCoefficients[5] = - {4012, -8024, 4012, 8002, -3913}; - -struct FilterState { - int16_t y[4]; - int16_t x[2]; - const int16_t* ba; -}; - -int InitializeFilter(FilterState* hpf, int sample_rate_hz) { - assert(hpf != NULL); +const int16_t kFilterCoefficients8kHz[5] = {3798, -7596, 3798, 7807, -3733}; +const int16_t kFilterCoefficients[5] = {4012, -8024, 4012, 8002, -3913}; +} // namespace - if (sample_rate_hz == AudioProcessing::kSampleRate8kHz) { - hpf->ba = kFilterCoefficients8kHz; - } else { - hpf->ba = kFilterCoefficients; +class HighPassFilterImpl::BiquadFilter { + public: + explicit BiquadFilter(int sample_rate_hz) : + ba_(sample_rate_hz == AudioProcessing::kSampleRate8kHz ? + kFilterCoefficients8kHz : kFilterCoefficients) + { + Reset(); } - WebRtcSpl_MemSetW16(hpf->x, 0, 2); - WebRtcSpl_MemSetW16(hpf->y, 0, 4); - - return AudioProcessing::kNoError; -} - -int Filter(FilterState* hpf, int16_t* data, size_t length) { - assert(hpf != NULL); - - int32_t tmp_int32 = 0; - int16_t* y = hpf->y; - int16_t* x = hpf->x; - const int16_t* ba = hpf->ba; - - for (size_t i = 0; i < length; i++) { - // y[i] = b[0] * x[i] + b[1] * x[i-1] + b[2] * x[i-2] - // + -a[1] * y[i-1] + -a[2] * y[i-2]; - - tmp_int32 = y[1] * ba[3]; // -a[1] * y[i-1] (low part) - tmp_int32 += y[3] * ba[4]; // -a[2] * y[i-2] (low part) - tmp_int32 = (tmp_int32 >> 15); - tmp_int32 += y[0] * ba[3]; // -a[1] * y[i-1] (high part) - tmp_int32 += y[2] * ba[4]; // -a[2] * y[i-2] (high part) - tmp_int32 = (tmp_int32 << 1); - - tmp_int32 += data[i] * ba[0]; // b[0]*x[0] - tmp_int32 += x[0] * ba[1]; // b[1]*x[i-1] - tmp_int32 += x[1] * ba[2]; // b[2]*x[i-2] - - // Update state (input part) - x[1] = x[0]; - x[0] = data[i]; - - // Update state (filtered part) - y[2] = y[0]; - y[3] = y[1]; - y[0] = static_cast<int16_t>(tmp_int32 >> 13); - y[1] = static_cast<int16_t>( - (tmp_int32 - (static_cast<int32_t>(y[0]) << 13)) << 2); - - // Rounding in Q12, i.e. add 2^11 - tmp_int32 += 2048; - - // Saturate (to 2^27) so that the HP filtered signal does not overflow - tmp_int32 = WEBRTC_SPL_SAT(static_cast<int32_t>(134217727), - tmp_int32, - static_cast<int32_t>(-134217728)); - - // Convert back to Q0 and use rounding. - data[i] = (int16_t)(tmp_int32 >> 12); + void Reset() { + std::memset(x_, 0, sizeof(x_)); + std::memset(y_, 0, sizeof(y_)); } - return AudioProcessing::kNoError; -} -} // namespace + void Process(int16_t* data, size_t length) { + const int16_t* const ba = ba_; + int16_t* x = x_; + int16_t* y = y_; + int32_t tmp_int32 = 0; + + for (size_t i = 0; i < length; i++) { + // y[i] = b[0] * x[i] + b[1] * x[i-1] + b[2] * x[i-2] + // + -a[1] * y[i-1] + -a[2] * y[i-2]; + + tmp_int32 = y[1] * ba[3]; // -a[1] * y[i-1] (low part) + tmp_int32 += y[3] * ba[4]; // -a[2] * y[i-2] (low part) + tmp_int32 = (tmp_int32 >> 15); + tmp_int32 += y[0] * ba[3]; // -a[1] * y[i-1] (high part) + tmp_int32 += y[2] * ba[4]; // -a[2] * y[i-2] (high part) + tmp_int32 = (tmp_int32 << 1); + + tmp_int32 += data[i] * ba[0]; // b[0] * x[0] + tmp_int32 += x[0] * ba[1]; // b[1] * x[i-1] + tmp_int32 += x[1] * ba[2]; // b[2] * x[i-2] + + // Update state (input part). + x[1] = x[0]; + x[0] = data[i]; + + // Update state (filtered part). + y[2] = y[0]; + y[3] = y[1]; + y[0] = static_cast<int16_t>(tmp_int32 >> 13); + y[1] = static_cast<int16_t>( + (tmp_int32 - (static_cast<int32_t>(y[0]) << 13)) << 2); + + // Rounding in Q12, i.e. add 2^11. + tmp_int32 += 2048; + + // Saturate (to 2^27) so that the HP filtered signal does not overflow. + tmp_int32 = WEBRTC_SPL_SAT(static_cast<int32_t>(134217727), + tmp_int32, + static_cast<int32_t>(-134217728)); + + // Convert back to Q0 and use rounding. + data[i] = static_cast<int16_t>(tmp_int32 >> 12); + } + } -typedef FilterState Handle; + private: + const int16_t* const ba_ = nullptr; + int16_t x_[2]; + int16_t y_[4]; +}; -HighPassFilterImpl::HighPassFilterImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit) - : ProcessingComponent(), - apm_(apm), - crit_(crit) {} +HighPassFilterImpl::HighPassFilterImpl(rtc::CriticalSection* crit) + : crit_(crit) { + RTC_DCHECK(crit_); +} HighPassFilterImpl::~HighPassFilterImpl() {} -int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) { - int err = apm_->kNoError; - - if (!is_component_enabled()) { - return apm_->kNoError; +void HighPassFilterImpl::Initialize(size_t channels, int sample_rate_hz) { + std::vector<rtc::scoped_ptr<BiquadFilter>> new_filters(channels); + for (size_t i = 0; i < channels; i++) { + new_filters[i].reset(new BiquadFilter(sample_rate_hz)); } + rtc::CritScope cs(crit_); + filters_.swap(new_filters); +} - assert(audio->num_frames_per_band() <= 160); - - for (int i = 0; i < num_handles(); i++) { - Handle* my_handle = static_cast<Handle*>(handle(i)); - err = Filter(my_handle, - audio->split_bands(i)[kBand0To8kHz], - audio->num_frames_per_band()); - - if (err != apm_->kNoError) { - return GetHandleError(my_handle); - } +void HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) { + RTC_DCHECK(audio); + rtc::CritScope cs(crit_); + if (!enabled_) { + return; } - return apm_->kNoError; + RTC_DCHECK_GE(160u, audio->num_frames_per_band()); + RTC_DCHECK_EQ(filters_.size(), audio->num_channels()); + for (size_t i = 0; i < filters_.size(); i++) { + filters_[i]->Process(audio->split_bands(i)[kBand0To8kHz], + audio->num_frames_per_band()); + } } int HighPassFilterImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(crit_); - return EnableComponent(enable); + rtc::CritScope cs(crit_); + if (!enabled_ && enable) { + for (auto& filter : filters_) { + filter->Reset(); + } + } + enabled_ = enable; + return AudioProcessing::kNoError; } bool HighPassFilterImpl::is_enabled() const { - return is_component_enabled(); -} - -void* HighPassFilterImpl::CreateHandle() const { - return new FilterState; -} - -void HighPassFilterImpl::DestroyHandle(void* handle) const { - delete static_cast<Handle*>(handle); -} - -int HighPassFilterImpl::InitializeHandle(void* handle) const { - return InitializeFilter(static_cast<Handle*>(handle), - apm_->proc_sample_rate_hz()); -} - -int HighPassFilterImpl::ConfigureHandle(void* /*handle*/) const { - return apm_->kNoError; // Not configurable. -} - -int HighPassFilterImpl::num_handles_required() const { - return apm_->num_output_channels(); -} - -int HighPassFilterImpl::GetHandleError(void* handle) const { - // The component has no detailed errors. - assert(handle != NULL); - return apm_->kUnspecifiedError; + rtc::CritScope cs(crit_); + return enabled_; } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/high_pass_filter_impl.h b/webrtc/modules/audio_processing/high_pass_filter_impl.h index 90b393e903..0e985bac7a 100644 --- a/webrtc/modules/audio_processing/high_pass_filter_impl.h +++ b/webrtc/modules/audio_processing/high_pass_filter_impl.h @@ -11,39 +11,34 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_ +#include "webrtc/base/constructormagic.h" +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/scoped_ptr.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" -#include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { class AudioBuffer; -class CriticalSectionWrapper; -class HighPassFilterImpl : public HighPassFilter, - public ProcessingComponent { +class HighPassFilterImpl : public HighPassFilter { public: - HighPassFilterImpl(const AudioProcessing* apm, CriticalSectionWrapper* crit); - virtual ~HighPassFilterImpl(); + explicit HighPassFilterImpl(rtc::CriticalSection* crit); + ~HighPassFilterImpl() override; - int ProcessCaptureAudio(AudioBuffer* audio); + // TODO(peah): Fold into ctor, once public API is removed. + void Initialize(size_t channels, int sample_rate_hz); + void ProcessCaptureAudio(AudioBuffer* audio); // HighPassFilter implementation. + int Enable(bool enable) override; bool is_enabled() const override; private: - // HighPassFilter implementation. - int Enable(bool enable) override; - - // ProcessingComponent implementation. - void* CreateHandle() const override; - int InitializeHandle(void* handle) const override; - int ConfigureHandle(void* handle) const override; - void DestroyHandle(void* handle) const override; - int num_handles_required() const override; - int GetHandleError(void* handle) const override; - - const AudioProcessing* apm_; - CriticalSectionWrapper* crit_; + class BiquadFilter; + rtc::CriticalSection* const crit_ = nullptr; + bool enabled_ GUARDED_BY(crit_) = false; + std::vector<rtc::scoped_ptr<BiquadFilter>> filters_ GUARDED_BY(crit_); + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(HighPassFilterImpl); }; } // namespace webrtc diff --git a/webrtc/modules/audio_processing/include/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h index c8ddc6a483..9a3a4b32d5 100644 --- a/webrtc/modules/audio_processing/include/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -65,6 +65,7 @@ class VoiceDetection; struct ExtendedFilter { ExtendedFilter() : enabled(false) {} explicit ExtendedFilter(bool enabled) : enabled(enabled) {} + static const ConfigOptionID identifier = ConfigOptionID::kExtendedFilter; bool enabled; }; @@ -76,6 +77,7 @@ struct ExtendedFilter { struct DelayAgnostic { DelayAgnostic() : enabled(false) {} explicit DelayAgnostic(bool enabled) : enabled(enabled) {} + static const ConfigOptionID identifier = ConfigOptionID::kDelayAgnostic; bool enabled; }; @@ -96,6 +98,7 @@ struct ExperimentalAgc { : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} ExperimentalAgc(bool enabled, int startup_min_volume) : enabled(enabled), startup_min_volume(startup_min_volume) {} + static const ConfigOptionID identifier = ConfigOptionID::kExperimentalAgc; bool enabled; int startup_min_volume; }; @@ -105,6 +108,7 @@ struct ExperimentalAgc { struct ExperimentalNs { ExperimentalNs() : enabled(false) {} explicit ExperimentalNs(bool enabled) : enabled(enabled) {} + static const ConfigOptionID identifier = ConfigOptionID::kExperimentalNs; bool enabled; }; @@ -127,6 +131,7 @@ struct Beamforming { : enabled(enabled), array_geometry(array_geometry), target_direction(target_direction) {} + static const ConfigOptionID identifier = ConfigOptionID::kBeamforming; const bool enabled; const std::vector<Point> array_geometry; const SphericalPointf target_direction; @@ -141,6 +146,7 @@ struct Beamforming { struct Intelligibility { Intelligibility() : enabled(false) {} explicit Intelligibility(bool enabled) : enabled(enabled) {} + static const ConfigOptionID identifier = ConfigOptionID::kIntelligibility; bool enabled; }; @@ -279,13 +285,18 @@ class AudioProcessing { // ensures the options are applied immediately. virtual void SetExtraOptions(const Config& config) = 0; + // TODO(peah): Remove after voice engine no longer requires it to resample + // the reverse stream to the forward rate. + virtual int input_sample_rate_hz() const = 0; + // TODO(ajm): Only intended for internal use. Make private and friend the // necessary classes? virtual int proc_sample_rate_hz() const = 0; virtual int proc_split_sample_rate_hz() const = 0; - virtual int num_input_channels() const = 0; - virtual int num_output_channels() const = 0; - virtual int num_reverse_channels() const = 0; + virtual size_t num_input_channels() const = 0; + virtual size_t num_proc_channels() const = 0; + virtual size_t num_output_channels() const = 0; + virtual size_t num_reverse_channels() const = 0; // Set to true when the output of AudioProcessing will be muted or in some // other way not used. Ideally, the captured audio would still be processed, @@ -497,7 +508,7 @@ class StreamConfig { // is true, the last channel in any corresponding list of // channels is the keyboard channel. StreamConfig(int sample_rate_hz = 0, - int num_channels = 0, + size_t num_channels = 0, bool has_keyboard = false) : sample_rate_hz_(sample_rate_hz), num_channels_(num_channels), @@ -508,14 +519,14 @@ class StreamConfig { sample_rate_hz_ = value; num_frames_ = calculate_frames(value); } - void set_num_channels(int value) { num_channels_ = value; } + void set_num_channels(size_t value) { num_channels_ = value; } void set_has_keyboard(bool value) { has_keyboard_ = value; } int sample_rate_hz() const { return sample_rate_hz_; } // The number of channels in the stream, not including the keyboard channel if // present. - int num_channels() const { return num_channels_; } + size_t num_channels() const { return num_channels_; } bool has_keyboard() const { return has_keyboard_; } size_t num_frames() const { return num_frames_; } @@ -536,7 +547,7 @@ class StreamConfig { } int sample_rate_hz_; - int num_channels_; + size_t num_channels_; bool has_keyboard_; size_t num_frames_; }; diff --git a/webrtc/modules/audio_processing/include/mock_audio_processing.h b/webrtc/modules/audio_processing/include/mock_audio_processing.h index 4ff52baf1c..9e1f2d5861 100644 --- a/webrtc/modules/audio_processing/include/mock_audio_processing.h +++ b/webrtc/modules/audio_processing/include/mock_audio_processing.h @@ -201,11 +201,11 @@ class MockAudioProcessing : public AudioProcessing { MOCK_CONST_METHOD0(proc_split_sample_rate_hz, int()); MOCK_CONST_METHOD0(num_input_channels, - int()); + size_t()); MOCK_CONST_METHOD0(num_output_channels, - int()); + size_t()); MOCK_CONST_METHOD0(num_reverse_channels, - int()); + size_t()); MOCK_METHOD1(set_output_will_be_muted, void(bool muted)); MOCK_CONST_METHOD0(output_will_be_muted, diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc index d014ce060c..fe964aba8c 100644 --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc @@ -54,12 +54,12 @@ IntelligibilityEnhancer::TransformCallback::TransformCallback( void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( const complex<float>* const* in_block, - int in_channels, + size_t in_channels, size_t frames, - int /* out_channels */, + size_t /* out_channels */, complex<float>* const* out_block) { RTC_DCHECK_EQ(parent_->freqs_, frames); - for (int i = 0; i < in_channels; ++i) { + for (size_t i = 0; i < in_channels; ++i) { parent_->DispatchAudio(source_, in_block[i], out_block[i]); } } @@ -129,7 +129,7 @@ IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, int sample_rate_hz, - int num_channels) { + size_t num_channels) { RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); RTC_CHECK_EQ(num_render_channels_, num_channels); @@ -138,7 +138,7 @@ void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, } if (active_) { - for (int i = 0; i < num_render_channels_; ++i) { + for (size_t i = 0; i < num_render_channels_; ++i) { memcpy(audio[i], temp_render_out_buffer_.channels()[i], chunk_length_ * sizeof(**audio)); } @@ -147,7 +147,7 @@ void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, void IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio, int sample_rate_hz, - int num_channels) { + size_t num_channels) { RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); RTC_CHECK_EQ(num_capture_channels_, num_channels); diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h index 1e9e35ac2a..1eb22342ad 100644 --- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h @@ -47,8 +47,8 @@ class IntelligibilityEnhancer { gain_change_limit(0.1f), rho(0.02f) {} int sample_rate_hz; - int num_capture_channels; - int num_render_channels; + size_t num_capture_channels; + size_t num_render_channels; intelligibility::VarianceArray::StepType var_type; float var_decay_rate; size_t var_window_size; @@ -63,12 +63,12 @@ class IntelligibilityEnhancer { // Reads and processes chunk of noise stream in time domain. void AnalyzeCaptureAudio(float* const* audio, int sample_rate_hz, - int num_channels); + size_t num_channels); // Reads chunk of speech in time domain and updates with modified signal. void ProcessRenderAudio(float* const* audio, int sample_rate_hz, - int num_channels); + size_t num_channels); bool active() const; private: @@ -85,9 +85,9 @@ class IntelligibilityEnhancer { // All in frequency domain, receives input |in_block|, applies // intelligibility enhancement, and writes result to |out_block|. void ProcessAudioBlock(const std::complex<float>* const* in_block, - int in_channels, + size_t in_channels, size_t frames, - int out_channels, + size_t out_channels, std::complex<float>* const* out_block) override; private: @@ -144,8 +144,8 @@ class IntelligibilityEnhancer { const size_t bank_size_; // Num ERB filters. const int sample_rate_hz_; const int erb_resolution_; - const int num_capture_channels_; - const int num_render_channels_; + const size_t num_capture_channels_; + const size_t num_render_channels_; const int analysis_rate_; // Num blocks before gains recalculated. const bool active_; // Whether render gains are being updated. diff --git a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc index 27d0ab48bb..4d2f5f4c5d 100644 --- a/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc +++ b/webrtc/modules/audio_processing/intelligibility/test/intelligibility_proc.cc @@ -68,7 +68,7 @@ DEFINE_string(out_file, "Enhanced output. Use '-' to " "play through aplay immediately."); -const int kNumChannels = 1; +const size_t kNumChannels = 1; // void function for gtest void void_main(int argc, char* argv[]) { diff --git a/webrtc/modules/audio_processing/level_estimator_impl.cc b/webrtc/modules/audio_processing/level_estimator_impl.cc index 35fe697c2d..187873e33e 100644 --- a/webrtc/modules/audio_processing/level_estimator_impl.cc +++ b/webrtc/modules/audio_processing/level_estimator_impl.cc @@ -11,76 +11,55 @@ #include "webrtc/modules/audio_processing/level_estimator_impl.h" #include "webrtc/modules/audio_processing/audio_buffer.h" -#include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/audio_processing/rms_level.h" #include "webrtc/system_wrappers/include/critical_section_wrapper.h" namespace webrtc { -LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit) - : ProcessingComponent(), - crit_(crit) {} +LevelEstimatorImpl::LevelEstimatorImpl(rtc::CriticalSection* crit) + : crit_(crit), rms_(new RMSLevel()) { + RTC_DCHECK(crit); +} LevelEstimatorImpl::~LevelEstimatorImpl() {} -int LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) { - if (!is_component_enabled()) { - return AudioProcessing::kNoError; - } +void LevelEstimatorImpl::Initialize() { + rtc::CritScope cs(crit_); + rms_->Reset(); +} - RMSLevel* rms_level = static_cast<RMSLevel*>(handle(0)); - for (int i = 0; i < audio->num_channels(); ++i) { - rms_level->Process(audio->channels_const()[i], - audio->num_frames()); +void LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) { + RTC_DCHECK(audio); + rtc::CritScope cs(crit_); + if (!enabled_) { + return; } - return AudioProcessing::kNoError; + for (size_t i = 0; i < audio->num_channels(); i++) { + rms_->Process(audio->channels_const()[i], audio->num_frames()); + } } int LevelEstimatorImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(crit_); - return EnableComponent(enable); + rtc::CritScope cs(crit_); + if (enable && !enabled_) { + rms_->Reset(); + } + enabled_ = enable; + return AudioProcessing::kNoError; } bool LevelEstimatorImpl::is_enabled() const { - return is_component_enabled(); + rtc::CritScope cs(crit_); + return enabled_; } int LevelEstimatorImpl::RMS() { - if (!is_component_enabled()) { + rtc::CritScope cs(crit_); + if (!enabled_) { return AudioProcessing::kNotEnabledError; } - RMSLevel* rms_level = static_cast<RMSLevel*>(handle(0)); - return rms_level->RMS(); -} - -// The ProcessingComponent implementation is pretty weird in this class since -// we have only a single instance of the trivial underlying component. -void* LevelEstimatorImpl::CreateHandle() const { - return new RMSLevel; -} - -void LevelEstimatorImpl::DestroyHandle(void* handle) const { - delete static_cast<RMSLevel*>(handle); + return rms_->RMS(); } - -int LevelEstimatorImpl::InitializeHandle(void* handle) const { - static_cast<RMSLevel*>(handle)->Reset(); - return AudioProcessing::kNoError; -} - -int LevelEstimatorImpl::ConfigureHandle(void* /*handle*/) const { - return AudioProcessing::kNoError; -} - -int LevelEstimatorImpl::num_handles_required() const { - return 1; -} - -int LevelEstimatorImpl::GetHandleError(void* /*handle*/) const { - return AudioProcessing::kUnspecifiedError; -} - } // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_estimator_impl.h b/webrtc/modules/audio_processing/level_estimator_impl.h index 0d0050c7e7..4401da37e4 100644 --- a/webrtc/modules/audio_processing/level_estimator_impl.h +++ b/webrtc/modules/audio_processing/level_estimator_impl.h @@ -11,43 +11,36 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_ +#include "webrtc/base/constructormagic.h" +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/scoped_ptr.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" -#include "webrtc/modules/audio_processing/processing_component.h" -#include "webrtc/modules/audio_processing/rms_level.h" namespace webrtc { class AudioBuffer; -class CriticalSectionWrapper; +class RMSLevel; -class LevelEstimatorImpl : public LevelEstimator, - public ProcessingComponent { +class LevelEstimatorImpl : public LevelEstimator { public: - LevelEstimatorImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit); - virtual ~LevelEstimatorImpl(); + explicit LevelEstimatorImpl(rtc::CriticalSection* crit); + ~LevelEstimatorImpl() override; - int ProcessStream(AudioBuffer* audio); + // TODO(peah): Fold into ctor, once public API is removed. + void Initialize(); + void ProcessStream(AudioBuffer* audio); // LevelEstimator implementation. - bool is_enabled() const override; - - private: - // LevelEstimator implementation. int Enable(bool enable) override; + bool is_enabled() const override; int RMS() override; - // ProcessingComponent implementation. - void* CreateHandle() const override; - int InitializeHandle(void* handle) const override; - int ConfigureHandle(void* handle) const override; - void DestroyHandle(void* handle) const override; - int num_handles_required() const override; - int GetHandleError(void* handle) const override; - - CriticalSectionWrapper* crit_; + private: + rtc::CriticalSection* const crit_ = nullptr; + bool enabled_ GUARDED_BY(crit_) = false; + rtc::scoped_ptr<RMSLevel> rms_ GUARDED_BY(crit_); + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(LevelEstimatorImpl); }; - } // namespace webrtc #endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_ diff --git a/webrtc/modules/audio_processing/logging/aec_logging.h b/webrtc/modules/audio_processing/logging/aec_logging.h index 3cf9ff89ed..b062913be2 100644 --- a/webrtc/modules/audio_processing/logging/aec_logging.h +++ b/webrtc/modules/audio_processing/logging/aec_logging.h @@ -43,6 +43,20 @@ (void) fwrite(data, data_size, 1, file); \ } while (0) +// Dumps a raw scalar int32 to file. +#define RTC_AEC_DEBUG_RAW_WRITE_SCALAR_INT32(file, data) \ + do { \ + int32_t value_to_store = data; \ + (void) fwrite(&value_to_store, sizeof(value_to_store), 1, file); \ + } while (0) + +// Dumps a raw scalar double to file. +#define RTC_AEC_DEBUG_RAW_WRITE_SCALAR_DOUBLE(file, data) \ + do { \ + double value_to_store = data; \ + (void) fwrite(&value_to_store, sizeof(value_to_store), 1, file); \ + } while (0) + // Opens a raw data file for writing using the specified sample rate. #define RTC_AEC_DEBUG_RAW_OPEN(name, instance_counter, file) \ do { \ @@ -73,6 +87,14 @@ do { \ } while (0) +#define RTC_AEC_DEBUG_RAW_WRITE_SCALAR_INT32(file, data) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_RAW_WRITE_SCALAR_DOUBLE(file, data) \ + do { \ + } while (0) + #define RTC_AEC_DEBUG_RAW_OPEN(file, name, instance_counter) \ do { \ } while (0) diff --git a/webrtc/modules/audio_processing/noise_suppression_impl.cc b/webrtc/modules/audio_processing/noise_suppression_impl.cc index 65ec3c445e..de7e856676 100644 --- a/webrtc/modules/audio_processing/noise_suppression_impl.cc +++ b/webrtc/modules/audio_processing/noise_suppression_impl.cc @@ -10,172 +10,166 @@ #include "webrtc/modules/audio_processing/noise_suppression_impl.h" -#include <assert.h> - #include "webrtc/modules/audio_processing/audio_buffer.h" #if defined(WEBRTC_NS_FLOAT) -#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" +#include "webrtc/modules/audio_processing/ns/noise_suppression.h" +#define NS_CREATE WebRtcNs_Create +#define NS_FREE WebRtcNs_Free +#define NS_INIT WebRtcNs_Init +#define NS_SET_POLICY WebRtcNs_set_policy +typedef NsHandle NsState; #elif defined(WEBRTC_NS_FIXED) -#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/noise_suppression_x.h" +#define NS_CREATE WebRtcNsx_Create +#define NS_FREE WebRtcNsx_Free +#define NS_INIT WebRtcNsx_Init +#define NS_SET_POLICY WebRtcNsx_set_policy +typedef NsxHandle NsState; #endif -#include "webrtc/system_wrappers/include/critical_section_wrapper.h" - namespace webrtc { - -#if defined(WEBRTC_NS_FLOAT) -typedef NsHandle Handle; -#elif defined(WEBRTC_NS_FIXED) -typedef NsxHandle Handle; -#endif - -namespace { -int MapSetting(NoiseSuppression::Level level) { - switch (level) { - case NoiseSuppression::kLow: - return 0; - case NoiseSuppression::kModerate: - return 1; - case NoiseSuppression::kHigh: - return 2; - case NoiseSuppression::kVeryHigh: - return 3; +class NoiseSuppressionImpl::Suppressor { + public: + explicit Suppressor(int sample_rate_hz) { + state_ = NS_CREATE(); + RTC_CHECK(state_); + int error = NS_INIT(state_, sample_rate_hz); + RTC_DCHECK_EQ(0, error); + } + ~Suppressor() { + NS_FREE(state_); } - assert(false); - return -1; + NsState* state() { return state_; } + private: + NsState* state_ = nullptr; + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Suppressor); +}; + +NoiseSuppressionImpl::NoiseSuppressionImpl(rtc::CriticalSection* crit) + : crit_(crit) { + RTC_DCHECK(crit); } -} // namespace - -NoiseSuppressionImpl::NoiseSuppressionImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit) - : ProcessingComponent(), - apm_(apm), - crit_(crit), - level_(kModerate) {} NoiseSuppressionImpl::~NoiseSuppressionImpl() {} -int NoiseSuppressionImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { -#if defined(WEBRTC_NS_FLOAT) - if (!is_component_enabled()) { - return apm_->kNoError; +void NoiseSuppressionImpl::Initialize(size_t channels, int sample_rate_hz) { + rtc::CritScope cs(crit_); + channels_ = channels; + sample_rate_hz_ = sample_rate_hz; + std::vector<rtc::scoped_ptr<Suppressor>> new_suppressors; + if (enabled_) { + new_suppressors.resize(channels); + for (size_t i = 0; i < channels; i++) { + new_suppressors[i].reset(new Suppressor(sample_rate_hz)); + } } - assert(audio->num_frames_per_band() <= 160); - assert(audio->num_channels() == num_handles()); + suppressors_.swap(new_suppressors); + set_level(level_); +} - for (int i = 0; i < num_handles(); ++i) { - Handle* my_handle = static_cast<Handle*>(handle(i)); +void NoiseSuppressionImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { + RTC_DCHECK(audio); +#if defined(WEBRTC_NS_FLOAT) + rtc::CritScope cs(crit_); + if (!enabled_) { + return; + } - WebRtcNs_Analyze(my_handle, audio->split_bands_const_f(i)[kBand0To8kHz]); + RTC_DCHECK_GE(160u, audio->num_frames_per_band()); + RTC_DCHECK_EQ(suppressors_.size(), audio->num_channels()); + for (size_t i = 0; i < suppressors_.size(); i++) { + WebRtcNs_Analyze(suppressors_[i]->state(), + audio->split_bands_const_f(i)[kBand0To8kHz]); } #endif - return apm_->kNoError; } -int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { - if (!is_component_enabled()) { - return apm_->kNoError; +void NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { + RTC_DCHECK(audio); + rtc::CritScope cs(crit_); + if (!enabled_) { + return; } - assert(audio->num_frames_per_band() <= 160); - assert(audio->num_channels() == num_handles()); - for (int i = 0; i < num_handles(); ++i) { - Handle* my_handle = static_cast<Handle*>(handle(i)); + RTC_DCHECK_GE(160u, audio->num_frames_per_band()); + RTC_DCHECK_EQ(suppressors_.size(), audio->num_channels()); + for (size_t i = 0; i < suppressors_.size(); i++) { #if defined(WEBRTC_NS_FLOAT) - WebRtcNs_Process(my_handle, + WebRtcNs_Process(suppressors_[i]->state(), audio->split_bands_const_f(i), audio->num_bands(), audio->split_bands_f(i)); #elif defined(WEBRTC_NS_FIXED) - WebRtcNsx_Process(my_handle, + WebRtcNsx_Process(suppressors_[i]->state(), audio->split_bands_const(i), audio->num_bands(), audio->split_bands(i)); #endif } - return apm_->kNoError; } int NoiseSuppressionImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(crit_); - return EnableComponent(enable); + rtc::CritScope cs(crit_); + if (enabled_ != enable) { + enabled_ = enable; + Initialize(channels_, sample_rate_hz_); + } + return AudioProcessing::kNoError; } bool NoiseSuppressionImpl::is_enabled() const { - return is_component_enabled(); + rtc::CritScope cs(crit_); + return enabled_; } int NoiseSuppressionImpl::set_level(Level level) { - CriticalSectionScoped crit_scoped(crit_); - if (MapSetting(level) == -1) { - return apm_->kBadParameterError; + int policy = 1; + switch (level) { + case NoiseSuppression::kLow: + policy = 0; + break; + case NoiseSuppression::kModerate: + policy = 1; + break; + case NoiseSuppression::kHigh: + policy = 2; + break; + case NoiseSuppression::kVeryHigh: + policy = 3; + break; + default: + RTC_NOTREACHED(); } - + rtc::CritScope cs(crit_); level_ = level; - return Configure(); + for (auto& suppressor : suppressors_) { + int error = NS_SET_POLICY(suppressor->state(), policy); + RTC_DCHECK_EQ(0, error); + } + return AudioProcessing::kNoError; } NoiseSuppression::Level NoiseSuppressionImpl::level() const { + rtc::CritScope cs(crit_); return level_; } float NoiseSuppressionImpl::speech_probability() const { + rtc::CritScope cs(crit_); #if defined(WEBRTC_NS_FLOAT) float probability_average = 0.0f; - for (int i = 0; i < num_handles(); i++) { - Handle* my_handle = static_cast<Handle*>(handle(i)); - probability_average += WebRtcNs_prior_speech_probability(my_handle); + for (auto& suppressor : suppressors_) { + probability_average += + WebRtcNs_prior_speech_probability(suppressor->state()); } - return probability_average / num_handles(); + if (!suppressors_.empty()) { + probability_average /= suppressors_.size(); + } + return probability_average; #elif defined(WEBRTC_NS_FIXED) + // TODO(peah): Returning error code as a float! Remove this. // Currently not available for the fixed point implementation. - return apm_->kUnsupportedFunctionError; + return AudioProcessing::kUnsupportedFunctionError; #endif } - -void* NoiseSuppressionImpl::CreateHandle() const { -#if defined(WEBRTC_NS_FLOAT) - return WebRtcNs_Create(); -#elif defined(WEBRTC_NS_FIXED) - return WebRtcNsx_Create(); -#endif -} - -void NoiseSuppressionImpl::DestroyHandle(void* handle) const { -#if defined(WEBRTC_NS_FLOAT) - WebRtcNs_Free(static_cast<Handle*>(handle)); -#elif defined(WEBRTC_NS_FIXED) - WebRtcNsx_Free(static_cast<Handle*>(handle)); -#endif -} - -int NoiseSuppressionImpl::InitializeHandle(void* handle) const { -#if defined(WEBRTC_NS_FLOAT) - return WebRtcNs_Init(static_cast<Handle*>(handle), - apm_->proc_sample_rate_hz()); -#elif defined(WEBRTC_NS_FIXED) - return WebRtcNsx_Init(static_cast<Handle*>(handle), - apm_->proc_sample_rate_hz()); -#endif -} - -int NoiseSuppressionImpl::ConfigureHandle(void* handle) const { -#if defined(WEBRTC_NS_FLOAT) - return WebRtcNs_set_policy(static_cast<Handle*>(handle), - MapSetting(level_)); -#elif defined(WEBRTC_NS_FIXED) - return WebRtcNsx_set_policy(static_cast<Handle*>(handle), - MapSetting(level_)); -#endif -} - -int NoiseSuppressionImpl::num_handles_required() const { - return apm_->num_output_channels(); -} - -int NoiseSuppressionImpl::GetHandleError(void* handle) const { - // The NS has no get_error() function. - assert(handle != NULL); - return apm_->kUnspecifiedError; -} } // namespace webrtc diff --git a/webrtc/modules/audio_processing/noise_suppression_impl.h b/webrtc/modules/audio_processing/noise_suppression_impl.h index 76a39b8e09..debbc61bc9 100644 --- a/webrtc/modules/audio_processing/noise_suppression_impl.h +++ b/webrtc/modules/audio_processing/noise_suppression_impl.h @@ -11,47 +11,42 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_ +#include "webrtc/base/constructormagic.h" +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/scoped_ptr.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" -#include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { class AudioBuffer; -class CriticalSectionWrapper; -class NoiseSuppressionImpl : public NoiseSuppression, - public ProcessingComponent { +class NoiseSuppressionImpl : public NoiseSuppression { public: - NoiseSuppressionImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit); - virtual ~NoiseSuppressionImpl(); + explicit NoiseSuppressionImpl(rtc::CriticalSection* crit); + ~NoiseSuppressionImpl() override; - int AnalyzeCaptureAudio(AudioBuffer* audio); - int ProcessCaptureAudio(AudioBuffer* audio); + // TODO(peah): Fold into ctor, once public API is removed. + void Initialize(size_t channels, int sample_rate_hz); + void AnalyzeCaptureAudio(AudioBuffer* audio); + void ProcessCaptureAudio(AudioBuffer* audio); // NoiseSuppression implementation. + int Enable(bool enable) override; bool is_enabled() const override; - float speech_probability() const override; + int set_level(Level level) override; Level level() const override; + float speech_probability() const override; private: - // NoiseSuppression implementation. - int Enable(bool enable) override; - int set_level(Level level) override; - - // ProcessingComponent implementation. - void* CreateHandle() const override; - int InitializeHandle(void* handle) const override; - int ConfigureHandle(void* handle) const override; - void DestroyHandle(void* handle) const override; - int num_handles_required() const override; - int GetHandleError(void* handle) const override; - - const AudioProcessing* apm_; - CriticalSectionWrapper* crit_; - Level level_; + class Suppressor; + rtc::CriticalSection* const crit_; + bool enabled_ GUARDED_BY(crit_) = false; + Level level_ GUARDED_BY(crit_) = kModerate; + size_t channels_ GUARDED_BY(crit_) = 0; + int sample_rate_hz_ GUARDED_BY(crit_) = 0; + std::vector<rtc::scoped_ptr<Suppressor>> suppressors_ GUARDED_BY(crit_); + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(NoiseSuppressionImpl); }; - } // namespace webrtc #endif // WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_ diff --git a/webrtc/modules/audio_processing/ns/noise_suppression.c b/webrtc/modules/audio_processing/ns/noise_suppression.c index 13f1b2d6dc..dd05e0ab3d 100644 --- a/webrtc/modules/audio_processing/ns/noise_suppression.c +++ b/webrtc/modules/audio_processing/ns/noise_suppression.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" +#include "webrtc/modules/audio_processing/ns/noise_suppression.h" #include <stdlib.h> #include <string.h> diff --git a/webrtc/modules/audio_processing/ns/include/noise_suppression.h b/webrtc/modules/audio_processing/ns/noise_suppression.h index 9dac56bdee..8018118b60 100644 --- a/webrtc/modules/audio_processing/ns/include/noise_suppression.h +++ b/webrtc/modules/audio_processing/ns/noise_suppression.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_H_ #include <stddef.h> @@ -113,4 +113,4 @@ float WebRtcNs_prior_speech_probability(NsHandle* handle); } #endif -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_H_ diff --git a/webrtc/modules/audio_processing/ns/noise_suppression_x.c b/webrtc/modules/audio_processing/ns/noise_suppression_x.c index 150fe608dd..0a5ba13300 100644 --- a/webrtc/modules/audio_processing/ns/noise_suppression_x.c +++ b/webrtc/modules/audio_processing/ns/noise_suppression_x.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/noise_suppression_x.h" #include <stdlib.h> diff --git a/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h b/webrtc/modules/audio_processing/ns/noise_suppression_x.h index 88fe4cd635..02b44cc091 100644 --- a/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h +++ b/webrtc/modules/audio_processing/ns/noise_suppression_x.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_X_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_X_H_ #include "webrtc/typedefs.h" @@ -85,4 +85,4 @@ void WebRtcNsx_Process(NsxHandle* nsxInst, } #endif -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NOISE_SUPPRESSION_X_H_ diff --git a/webrtc/modules/audio_processing/ns/ns_core.c b/webrtc/modules/audio_processing/ns/ns_core.c index 1d6091400e..5ce64cee29 100644 --- a/webrtc/modules/audio_processing/ns/ns_core.c +++ b/webrtc/modules/audio_processing/ns/ns_core.c @@ -15,7 +15,7 @@ #include "webrtc/common_audio/fft4g.h" #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" -#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" +#include "webrtc/modules/audio_processing/ns/noise_suppression.h" #include "webrtc/modules/audio_processing/ns/ns_core.h" #include "webrtc/modules/audio_processing/ns/windows_private.h" diff --git a/webrtc/modules/audio_processing/ns/nsx_core.c b/webrtc/modules/audio_processing/ns/nsx_core.c index 71445792f5..25f16d26ab 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core.c +++ b/webrtc/modules/audio_processing/ns/nsx_core.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/noise_suppression_x.h" #include <assert.h> #include <math.h> diff --git a/webrtc/modules/audio_processing/ns/nsx_core_c.c b/webrtc/modules/audio_processing/ns/nsx_core_c.c index 14322d38cc..da7aa3d5db 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core_c.c +++ b/webrtc/modules/audio_processing/ns/nsx_core_c.c @@ -10,7 +10,7 @@ #include <assert.h> -#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/noise_suppression_x.h" #include "webrtc/modules/audio_processing/ns/nsx_core.h" #include "webrtc/modules/audio_processing/ns/nsx_defines.h" diff --git a/webrtc/modules/audio_processing/ns/nsx_core_mips.c b/webrtc/modules/audio_processing/ns/nsx_core_mips.c index d99be8720b..7688d82d78 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core_mips.c +++ b/webrtc/modules/audio_processing/ns/nsx_core_mips.c @@ -11,7 +11,7 @@ #include <assert.h> #include <string.h> -#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/noise_suppression_x.h" #include "webrtc/modules/audio_processing/ns/nsx_core.h" static const int16_t kIndicatorTable[17] = { diff --git a/webrtc/modules/audio_processing/processing_component.cc b/webrtc/modules/audio_processing/processing_component.cc index 9e16d7c4ee..7abd8e2100 100644 --- a/webrtc/modules/audio_processing/processing_component.cc +++ b/webrtc/modules/audio_processing/processing_component.cc @@ -55,12 +55,12 @@ bool ProcessingComponent::is_component_enabled() const { return enabled_; } -void* ProcessingComponent::handle(int index) const { +void* ProcessingComponent::handle(size_t index) const { assert(index < num_handles_); return handles_[index]; } -int ProcessingComponent::num_handles() const { +size_t ProcessingComponent::num_handles() const { return num_handles_; } @@ -70,12 +70,12 @@ int ProcessingComponent::Initialize() { } num_handles_ = num_handles_required(); - if (num_handles_ > static_cast<int>(handles_.size())) { + if (num_handles_ > handles_.size()) { handles_.resize(num_handles_, NULL); } - assert(static_cast<int>(handles_.size()) >= num_handles_); - for (int i = 0; i < num_handles_; i++) { + assert(handles_.size() >= num_handles_); + for (size_t i = 0; i < num_handles_; i++) { if (handles_[i] == NULL) { handles_[i] = CreateHandle(); if (handles_[i] == NULL) { @@ -98,8 +98,8 @@ int ProcessingComponent::Configure() { return AudioProcessing::kNoError; } - assert(static_cast<int>(handles_.size()) >= num_handles_); - for (int i = 0; i < num_handles_; i++) { + assert(handles_.size() >= num_handles_); + for (size_t i = 0; i < num_handles_; i++) { int err = ConfigureHandle(handles_[i]); if (err != AudioProcessing::kNoError) { return GetHandleError(handles_[i]); diff --git a/webrtc/modules/audio_processing/processing_component.h b/webrtc/modules/audio_processing/processing_component.h index 8ee3ac6c7d..577f1570ad 100644 --- a/webrtc/modules/audio_processing/processing_component.h +++ b/webrtc/modules/audio_processing/processing_component.h @@ -17,6 +17,22 @@ namespace webrtc { +// Functor to use when supplying a verifier function for the queue item +// verifcation. +template <typename T> +class RenderQueueItemVerifier { + public: + explicit RenderQueueItemVerifier(size_t minimum_capacity) + : minimum_capacity_(minimum_capacity) {} + + bool operator()(const std::vector<T>& v) const { + return v.capacity() >= minimum_capacity_; + } + + private: + size_t minimum_capacity_; +}; + class ProcessingComponent { public: ProcessingComponent(); @@ -31,21 +47,21 @@ class ProcessingComponent { protected: virtual int Configure(); int EnableComponent(bool enable); - void* handle(int index) const; - int num_handles() const; + void* handle(size_t index) const; + size_t num_handles() const; private: virtual void* CreateHandle() const = 0; virtual int InitializeHandle(void* handle) const = 0; virtual int ConfigureHandle(void* handle) const = 0; virtual void DestroyHandle(void* handle) const = 0; - virtual int num_handles_required() const = 0; + virtual size_t num_handles_required() const = 0; virtual int GetHandleError(void* handle) const = 0; std::vector<void*> handles_; bool initialized_; bool enabled_; - int num_handles_; + size_t num_handles_; }; } // namespace webrtc diff --git a/webrtc/modules/audio_processing/splitting_filter.cc b/webrtc/modules/audio_processing/splitting_filter.cc index 60427e2db6..46cc9352c2 100644 --- a/webrtc/modules/audio_processing/splitting_filter.cc +++ b/webrtc/modules/audio_processing/splitting_filter.cc @@ -16,7 +16,7 @@ namespace webrtc { -SplittingFilter::SplittingFilter(int num_channels, +SplittingFilter::SplittingFilter(size_t num_channels, size_t num_bands, size_t num_frames) : num_bands_(num_bands) { @@ -24,7 +24,7 @@ SplittingFilter::SplittingFilter(int num_channels, if (num_bands_ == 2) { two_bands_states_.resize(num_channels); } else if (num_bands_ == 3) { - for (int i = 0; i < num_channels; ++i) { + for (size_t i = 0; i < num_channels; ++i) { three_band_filter_banks_.push_back(new ThreeBandFilterBank(num_frames)); } } @@ -58,8 +58,7 @@ void SplittingFilter::Synthesis(const IFChannelBuffer* bands, void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands) { - RTC_DCHECK_EQ(static_cast<int>(two_bands_states_.size()), - data->num_channels()); + RTC_DCHECK_EQ(two_bands_states_.size(), data->num_channels()); for (size_t i = 0; i < two_bands_states_.size(); ++i) { WebRtcSpl_AnalysisQMF(data->ibuf_const()->channels()[i], data->num_frames(), @@ -72,8 +71,7 @@ void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data, void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data) { - RTC_DCHECK_EQ(static_cast<int>(two_bands_states_.size()), - data->num_channels()); + RTC_DCHECK_EQ(two_bands_states_.size(), data->num_channels()); for (size_t i = 0; i < two_bands_states_.size(); ++i) { WebRtcSpl_SynthesisQMF(bands->ibuf_const()->channels(0)[i], bands->ibuf_const()->channels(1)[i], @@ -86,8 +84,7 @@ void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands, void SplittingFilter::ThreeBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands) { - RTC_DCHECK_EQ(static_cast<int>(three_band_filter_banks_.size()), - data->num_channels()); + RTC_DCHECK_EQ(three_band_filter_banks_.size(), data->num_channels()); for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { three_band_filter_banks_[i]->Analysis(data->fbuf_const()->channels()[i], data->num_frames(), @@ -97,8 +94,7 @@ void SplittingFilter::ThreeBandsAnalysis(const IFChannelBuffer* data, void SplittingFilter::ThreeBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data) { - RTC_DCHECK_EQ(static_cast<int>(three_band_filter_banks_.size()), - data->num_channels()); + RTC_DCHECK_EQ(three_band_filter_banks_.size(), data->num_channels()); for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { three_band_filter_banks_[i]->Synthesis(bands->fbuf_const()->bands(i), bands->num_frames_per_band(), diff --git a/webrtc/modules/audio_processing/splitting_filter.h b/webrtc/modules/audio_processing/splitting_filter.h index 4698d3fe2b..6b81c2fb05 100644 --- a/webrtc/modules/audio_processing/splitting_filter.h +++ b/webrtc/modules/audio_processing/splitting_filter.h @@ -45,7 +45,7 @@ struct TwoBandsStates { // used. class SplittingFilter { public: - SplittingFilter(int num_channels, size_t num_bands, size_t num_frames); + SplittingFilter(size_t num_channels, size_t num_bands, size_t num_frames); void Analysis(const IFChannelBuffer* data, IFChannelBuffer* bands); void Synthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); diff --git a/webrtc/modules/audio_processing/test/audio_file_processor.cc b/webrtc/modules/audio_processing/test/audio_file_processor.cc new file mode 100644 index 0000000000..56e9b4b96f --- /dev/null +++ b/webrtc/modules/audio_processing/test/audio_file_processor.cc @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/test/audio_file_processor.h" + +#include <algorithm> +#include <utility> + +#include "webrtc/base/checks.h" +#include "webrtc/modules/audio_processing/test/protobuf_utils.h" + +using rtc::scoped_ptr; +using rtc::CheckedDivExact; +using std::vector; +using webrtc::audioproc::Event; +using webrtc::audioproc::Init; +using webrtc::audioproc::ReverseStream; +using webrtc::audioproc::Stream; + +namespace webrtc { +namespace { + +// Returns a StreamConfig corresponding to file. +StreamConfig GetStreamConfig(const WavFile& file) { + return StreamConfig(file.sample_rate(), file.num_channels()); +} + +// Returns a ChannelBuffer corresponding to file. +ChannelBuffer<float> GetChannelBuffer(const WavFile& file) { + return ChannelBuffer<float>( + CheckedDivExact(file.sample_rate(), AudioFileProcessor::kChunksPerSecond), + file.num_channels()); +} + +} // namespace + +WavFileProcessor::WavFileProcessor(scoped_ptr<AudioProcessing> ap, + scoped_ptr<WavReader> in_file, + scoped_ptr<WavWriter> out_file) + : ap_(std::move(ap)), + in_buf_(GetChannelBuffer(*in_file)), + out_buf_(GetChannelBuffer(*out_file)), + input_config_(GetStreamConfig(*in_file)), + output_config_(GetStreamConfig(*out_file)), + buffer_reader_(std::move(in_file)), + buffer_writer_(std::move(out_file)) {} + +bool WavFileProcessor::ProcessChunk() { + if (!buffer_reader_.Read(&in_buf_)) { + return false; + } + { + const auto st = ScopedTimer(mutable_proc_time()); + RTC_CHECK_EQ(kNoErr, + ap_->ProcessStream(in_buf_.channels(), input_config_, + output_config_, out_buf_.channels())); + } + buffer_writer_.Write(out_buf_); + return true; +} + +AecDumpFileProcessor::AecDumpFileProcessor(scoped_ptr<AudioProcessing> ap, + FILE* dump_file, + scoped_ptr<WavWriter> out_file) + : ap_(std::move(ap)), + dump_file_(dump_file), + out_buf_(GetChannelBuffer(*out_file)), + output_config_(GetStreamConfig(*out_file)), + buffer_writer_(std::move(out_file)) { + RTC_CHECK(dump_file_) << "Could not open dump file for reading."; +} + +AecDumpFileProcessor::~AecDumpFileProcessor() { + fclose(dump_file_); +} + +bool AecDumpFileProcessor::ProcessChunk() { + Event event_msg; + + // Continue until we process our first Stream message. + do { + if (!ReadMessageFromFile(dump_file_, &event_msg)) { + return false; + } + + if (event_msg.type() == Event::INIT) { + RTC_CHECK(event_msg.has_init()); + HandleMessage(event_msg.init()); + + } else if (event_msg.type() == Event::STREAM) { + RTC_CHECK(event_msg.has_stream()); + HandleMessage(event_msg.stream()); + + } else if (event_msg.type() == Event::REVERSE_STREAM) { + RTC_CHECK(event_msg.has_reverse_stream()); + HandleMessage(event_msg.reverse_stream()); + } + } while (event_msg.type() != Event::STREAM); + + return true; +} + +void AecDumpFileProcessor::HandleMessage(const Init& msg) { + RTC_CHECK(msg.has_sample_rate()); + RTC_CHECK(msg.has_num_input_channels()); + RTC_CHECK(msg.has_num_reverse_channels()); + + in_buf_.reset(new ChannelBuffer<float>( + CheckedDivExact(msg.sample_rate(), kChunksPerSecond), + msg.num_input_channels())); + const int reverse_sample_rate = msg.has_reverse_sample_rate() + ? msg.reverse_sample_rate() + : msg.sample_rate(); + reverse_buf_.reset(new ChannelBuffer<float>( + CheckedDivExact(reverse_sample_rate, kChunksPerSecond), + msg.num_reverse_channels())); + input_config_ = StreamConfig(msg.sample_rate(), msg.num_input_channels()); + reverse_config_ = + StreamConfig(reverse_sample_rate, msg.num_reverse_channels()); + + const ProcessingConfig config = { + {input_config_, output_config_, reverse_config_, reverse_config_}}; + RTC_CHECK_EQ(kNoErr, ap_->Initialize(config)); +} + +void AecDumpFileProcessor::HandleMessage(const Stream& msg) { + RTC_CHECK(!msg.has_input_data()); + RTC_CHECK_EQ(in_buf_->num_channels(), + static_cast<size_t>(msg.input_channel_size())); + + for (int i = 0; i < msg.input_channel_size(); ++i) { + RTC_CHECK_EQ(in_buf_->num_frames() * sizeof(*in_buf_->channels()[i]), + msg.input_channel(i).size()); + std::memcpy(in_buf_->channels()[i], msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + { + const auto st = ScopedTimer(mutable_proc_time()); + RTC_CHECK_EQ(kNoErr, ap_->set_stream_delay_ms(msg.delay())); + ap_->echo_cancellation()->set_stream_drift_samples(msg.drift()); + if (msg.has_keypress()) { + ap_->set_stream_key_pressed(msg.keypress()); + } + RTC_CHECK_EQ(kNoErr, + ap_->ProcessStream(in_buf_->channels(), input_config_, + output_config_, out_buf_.channels())); + } + + buffer_writer_.Write(out_buf_); +} + +void AecDumpFileProcessor::HandleMessage(const ReverseStream& msg) { + RTC_CHECK(!msg.has_data()); + RTC_CHECK_EQ(reverse_buf_->num_channels(), + static_cast<size_t>(msg.channel_size())); + + for (int i = 0; i < msg.channel_size(); ++i) { + RTC_CHECK_EQ(reverse_buf_->num_frames() * sizeof(*in_buf_->channels()[i]), + msg.channel(i).size()); + std::memcpy(reverse_buf_->channels()[i], msg.channel(i).data(), + msg.channel(i).size()); + } + { + const auto st = ScopedTimer(mutable_proc_time()); + // TODO(ajm): This currently discards the processed output, which is needed + // for e.g. intelligibility enhancement. + RTC_CHECK_EQ(kNoErr, ap_->ProcessReverseStream( + reverse_buf_->channels(), reverse_config_, + reverse_config_, reverse_buf_->channels())); + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/test/audio_file_processor.h b/webrtc/modules/audio_processing/test/audio_file_processor.h new file mode 100644 index 0000000000..a3153b2244 --- /dev/null +++ b/webrtc/modules/audio_processing/test/audio_file_processor.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TEST_AUDIO_FILE_PROCESSOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TEST_AUDIO_FILE_PROCESSOR_H_ + +#include <algorithm> +#include <limits> +#include <vector> + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/channel_buffer.h" +#include "webrtc/common_audio/wav_file.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/test/test_utils.h" +#include "webrtc/system_wrappers/include/tick_util.h" + +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" +#else +#include "webrtc/audio_processing/debug.pb.h" +#endif + +namespace webrtc { + +// Holds a few statistics about a series of TickIntervals. +struct TickIntervalStats { + TickIntervalStats() : min(std::numeric_limits<int64_t>::max()) {} + TickInterval sum; + TickInterval max; + TickInterval min; +}; + +// Interface for processing an input file with an AudioProcessing instance and +// dumping the results to an output file. +class AudioFileProcessor { + public: + static const int kChunksPerSecond = 1000 / AudioProcessing::kChunkSizeMs; + + virtual ~AudioFileProcessor() {} + + // Processes one AudioProcessing::kChunkSizeMs of data from the input file and + // writes to the output file. + virtual bool ProcessChunk() = 0; + + // Returns the execution time of all AudioProcessing calls. + const TickIntervalStats& proc_time() const { return proc_time_; } + + protected: + // RAII class for execution time measurement. Updates the provided + // TickIntervalStats based on the time between ScopedTimer creation and + // leaving the enclosing scope. + class ScopedTimer { + public: + explicit ScopedTimer(TickIntervalStats* proc_time) + : proc_time_(proc_time), start_time_(TickTime::Now()) {} + + ~ScopedTimer() { + TickInterval interval = TickTime::Now() - start_time_; + proc_time_->sum += interval; + proc_time_->max = std::max(proc_time_->max, interval); + proc_time_->min = std::min(proc_time_->min, interval); + } + + private: + TickIntervalStats* const proc_time_; + TickTime start_time_; + }; + + TickIntervalStats* mutable_proc_time() { return &proc_time_; } + + private: + TickIntervalStats proc_time_; +}; + +// Used to read from and write to WavFile objects. +class WavFileProcessor final : public AudioFileProcessor { + public: + // Takes ownership of all parameters. + WavFileProcessor(rtc::scoped_ptr<AudioProcessing> ap, + rtc::scoped_ptr<WavReader> in_file, + rtc::scoped_ptr<WavWriter> out_file); + virtual ~WavFileProcessor() {} + + // Processes one chunk from the WAV input and writes to the WAV output. + bool ProcessChunk() override; + + private: + rtc::scoped_ptr<AudioProcessing> ap_; + + ChannelBuffer<float> in_buf_; + ChannelBuffer<float> out_buf_; + const StreamConfig input_config_; + const StreamConfig output_config_; + ChannelBufferWavReader buffer_reader_; + ChannelBufferWavWriter buffer_writer_; +}; + +// Used to read from an aecdump file and write to a WavWriter. +class AecDumpFileProcessor final : public AudioFileProcessor { + public: + // Takes ownership of all parameters. + AecDumpFileProcessor(rtc::scoped_ptr<AudioProcessing> ap, + FILE* dump_file, + rtc::scoped_ptr<WavWriter> out_file); + + virtual ~AecDumpFileProcessor(); + + // Processes messages from the aecdump file until the first Stream message is + // completed. Passes other data from the aecdump messages as appropriate. + bool ProcessChunk() override; + + private: + void HandleMessage(const webrtc::audioproc::Init& msg); + void HandleMessage(const webrtc::audioproc::Stream& msg); + void HandleMessage(const webrtc::audioproc::ReverseStream& msg); + + rtc::scoped_ptr<AudioProcessing> ap_; + FILE* dump_file_; + + rtc::scoped_ptr<ChannelBuffer<float>> in_buf_; + rtc::scoped_ptr<ChannelBuffer<float>> reverse_buf_; + ChannelBuffer<float> out_buf_; + StreamConfig input_config_; + StreamConfig reverse_config_; + const StreamConfig output_config_; + ChannelBufferWavWriter buffer_writer_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TEST_AUDIO_FILE_PROCESSOR_H_ diff --git a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc index c013a369fe..324cb7bec6 100644 --- a/webrtc/modules/audio_processing/test/audio_processing_unittest.cc +++ b/webrtc/modules/audio_processing/test/audio_processing_unittest.cc @@ -14,6 +14,7 @@ #include <limits> #include <queue> +#include "webrtc/base/arraysize.h" #include "webrtc/base/scoped_ptr.h" #include "webrtc/common_audio/include/audio_util.h" #include "webrtc/common_audio/resampler/include/push_resampler.h" @@ -23,11 +24,10 @@ #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/audio_processing/test/protobuf_utils.h" #include "webrtc/modules/audio_processing/test/test_utils.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" #include "webrtc/system_wrappers/include/event_wrapper.h" #include "webrtc/system_wrappers/include/trace.h" #include "webrtc/test/testsupport/fileutils.h" -#include "webrtc/test/testsupport/gtest_disable.h" #ifdef WEBRTC_ANDROID_PLATFORM_BUILD #include "gtest/gtest.h" #include "external/webrtc/webrtc/modules/audio_processing/test/unittest.pb.h" @@ -53,11 +53,8 @@ namespace { // file. This is the typical case. When the file should be updated, it can // be set to true with the command-line switch --write_ref_data. bool write_ref_data = false; -const int kChannels[] = {1, 2}; -const size_t kChannelsSize = sizeof(kChannels) / sizeof(*kChannels); - +const google::protobuf::int32 kChannels[] = {1, 2}; const int kSampleRates[] = {8000, 16000, 32000, 48000}; -const size_t kSampleRatesSize = sizeof(kSampleRates) / sizeof(*kSampleRates); #if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE) // AECM doesn't support super-wb. @@ -65,8 +62,6 @@ const int kProcessSampleRates[] = {8000, 16000}; #elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE) const int kProcessSampleRates[] = {8000, 16000, 32000, 48000}; #endif -const size_t kProcessSampleRatesSize = sizeof(kProcessSampleRates) / - sizeof(*kProcessSampleRates); enum StreamDirection { kForward = 0, kReverse }; @@ -77,7 +72,7 @@ void ConvertToFloat(const int16_t* int_data, ChannelBuffer<float>* cb) { cb->num_frames(), cb->num_channels(), cb_int.channels()); - for (int i = 0; i < cb->num_channels(); ++i) { + for (size_t i = 0; i < cb->num_channels(); ++i) { S16ToFloat(cb_int.channels()[i], cb->num_frames(), cb->channels()[i]); @@ -89,7 +84,7 @@ void ConvertToFloat(const AudioFrame& frame, ChannelBuffer<float>* cb) { } // Number of channels including the keyboard channel. -int TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout) { +size_t TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout) { switch (layout) { case AudioProcessing::kMono: return 1; @@ -100,7 +95,7 @@ int TotalChannelsFromLayout(AudioProcessing::ChannelLayout layout) { return 3; } assert(false); - return -1; + return 0; } int TruncateToMultipleOf10(int value) { @@ -108,25 +103,25 @@ int TruncateToMultipleOf10(int value) { } void MixStereoToMono(const float* stereo, float* mono, - int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) + size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; ++i) mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) / 2; } void MixStereoToMono(const int16_t* stereo, int16_t* mono, - int samples_per_channel) { - for (int i = 0; i < samples_per_channel; ++i) + size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; ++i) mono[i] = (stereo[i * 2] + stereo[i * 2 + 1]) >> 1; } -void CopyLeftToRightChannel(int16_t* stereo, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; i++) { +void CopyLeftToRightChannel(int16_t* stereo, size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; i++) { stereo[i * 2 + 1] = stereo[i * 2]; } } -void VerifyChannelsAreEqual(int16_t* stereo, int samples_per_channel) { - for (int i = 0; i < samples_per_channel; i++) { +void VerifyChannelsAreEqual(int16_t* stereo, size_t samples_per_channel) { + for (size_t i = 0; i < samples_per_channel; i++) { EXPECT_EQ(stereo[i * 2 + 1], stereo[i * 2]); } } @@ -139,7 +134,7 @@ void SetFrameTo(AudioFrame* frame, int16_t value) { } void SetFrameTo(AudioFrame* frame, int16_t left, int16_t right) { - ASSERT_EQ(2, frame->num_channels_); + ASSERT_EQ(2u, frame->num_channels_); for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { frame->data_[i] = left; frame->data_[i + 1] = right; @@ -199,9 +194,9 @@ T AbsValue(T a) { } int16_t MaxAudioFrame(const AudioFrame& frame) { - const int length = frame.samples_per_channel_ * frame.num_channels_; + const size_t length = frame.samples_per_channel_ * frame.num_channels_; int16_t max_data = AbsValue(frame.data_[0]); - for (int i = 1; i < length; i++) { + for (size_t i = 1; i < length; i++) { max_data = std::max(max_data, AbsValue(frame.data_[i])); } @@ -264,10 +259,10 @@ std::string OutputFilePath(std::string name, int output_rate, int reverse_input_rate, int reverse_output_rate, - int num_input_channels, - int num_output_channels, - int num_reverse_input_channels, - int num_reverse_output_channels, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_input_channels, + size_t num_reverse_output_channels, StreamDirection file_direction) { std::ostringstream ss; ss << name << "_i" << num_input_channels << "_" << input_rate / 1000 << "_ir" @@ -362,9 +357,9 @@ class ApmTest : public ::testing::Test { void Init(int sample_rate_hz, int output_sample_rate_hz, int reverse_sample_rate_hz, - int num_input_channels, - int num_output_channels, - int num_reverse_channels, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_channels, bool open_output_file); void Init(AudioProcessing* ap); void EnableAllComponents(); @@ -377,12 +372,12 @@ class ApmTest : public ::testing::Test { void ProcessDelayVerificationTest(int delay_ms, int system_delay_ms, int delay_min, int delay_max); void TestChangingChannelsInt16Interface( - int num_channels, + size_t num_channels, AudioProcessing::Error expected_return); - void TestChangingForwardChannels(int num_in_channels, - int num_out_channels, + void TestChangingForwardChannels(size_t num_in_channels, + size_t num_out_channels, AudioProcessing::Error expected_return); - void TestChangingReverseChannels(int num_rev_channels, + void TestChangingReverseChannels(size_t num_rev_channels, AudioProcessing::Error expected_return); void RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate); void RunManualVolumeChangeIsPossibleTest(int sample_rate); @@ -403,7 +398,7 @@ class ApmTest : public ::testing::Test { rtc::scoped_ptr<ChannelBuffer<float> > float_cb_; rtc::scoped_ptr<ChannelBuffer<float> > revfloat_cb_; int output_sample_rate_hz_; - int num_output_channels_; + size_t num_output_channels_; FILE* far_file_; FILE* near_file_; FILE* out_file_; @@ -487,9 +482,9 @@ void ApmTest::Init(AudioProcessing* ap) { void ApmTest::Init(int sample_rate_hz, int output_sample_rate_hz, int reverse_sample_rate_hz, - int num_input_channels, - int num_output_channels, - int num_reverse_channels, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_channels, bool open_output_file) { SetContainerFormat(sample_rate_hz, num_input_channels, frame_, &float_cb_); output_sample_rate_hz_ = output_sample_rate_hz; @@ -821,7 +816,7 @@ TEST_F(ApmTest, DelayOffsetWithLimitsIsSetProperly) { } void ApmTest::TestChangingChannelsInt16Interface( - int num_channels, + size_t num_channels, AudioProcessing::Error expected_return) { frame_->num_channels_ = num_channels; EXPECT_EQ(expected_return, apm_->ProcessStream(frame_)); @@ -829,8 +824,8 @@ void ApmTest::TestChangingChannelsInt16Interface( } void ApmTest::TestChangingForwardChannels( - int num_in_channels, - int num_out_channels, + size_t num_in_channels, + size_t num_out_channels, AudioProcessing::Error expected_return) { const StreamConfig input_stream = {frame_->sample_rate_hz_, num_in_channels}; const StreamConfig output_stream = {output_sample_rate_hz_, num_out_channels}; @@ -841,7 +836,7 @@ void ApmTest::TestChangingForwardChannels( } void ApmTest::TestChangingReverseChannels( - int num_rev_channels, + size_t num_rev_channels, AudioProcessing::Error expected_return) { const ProcessingConfig processing_config = { {{frame_->sample_rate_hz_, apm_->num_input_channels()}, @@ -862,11 +857,11 @@ TEST_F(ApmTest, ChannelsInt16Interface) { TestChangingChannelsInt16Interface(0, apm_->kBadNumberChannelsError); - for (int i = 1; i < 4; i++) { + for (size_t i = 1; i < 4; i++) { TestChangingChannelsInt16Interface(i, kNoErr); EXPECT_EQ(i, apm_->num_input_channels()); // We always force the number of reverse channels used for processing to 1. - EXPECT_EQ(1, apm_->num_reverse_channels()); + EXPECT_EQ(1u, apm_->num_reverse_channels()); } } @@ -877,8 +872,8 @@ TEST_F(ApmTest, Channels) { TestChangingForwardChannels(0, 1, apm_->kBadNumberChannelsError); TestChangingReverseChannels(0, apm_->kBadNumberChannelsError); - for (int i = 1; i < 4; ++i) { - for (int j = 0; j < 1; ++j) { + for (size_t i = 1; i < 4; ++i) { + for (size_t j = 0; j < 1; ++j) { // Output channels much be one or match input channels. if (j == 1 || i == j) { TestChangingForwardChannels(i, j, kNoErr); @@ -887,7 +882,7 @@ TEST_F(ApmTest, Channels) { EXPECT_EQ(i, apm_->num_input_channels()); EXPECT_EQ(j, apm_->num_output_channels()); // The number of reverse channels used for processing to is always 1. - EXPECT_EQ(1, apm_->num_reverse_channels()); + EXPECT_EQ(1u, apm_->num_reverse_channels()); } else { TestChangingForwardChannels(i, j, AudioProcessing::kBadNumberChannelsError); @@ -902,7 +897,7 @@ TEST_F(ApmTest, SampleRatesInt) { EXPECT_EQ(apm_->kBadSampleRateError, ProcessStreamChooser(kIntFormat)); // Testing valid sample rates int fs[] = {8000, 16000, 32000, 48000}; - for (size_t i = 0; i < sizeof(fs) / sizeof(*fs); i++) { + for (size_t i = 0; i < arraysize(fs); i++) { SetContainerFormat(fs[i], 2, frame_, &float_cb_); EXPECT_NOERR(ProcessStreamChooser(kIntFormat)); } @@ -921,7 +916,7 @@ TEST_F(ApmTest, EchoCancellation) { EchoCancellation::kModerateSuppression, EchoCancellation::kHighSuppression, }; - for (size_t i = 0; i < sizeof(level)/sizeof(*level); i++) { + for (size_t i = 0; i < arraysize(level); i++) { EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->set_suppression_level(level[i])); EXPECT_EQ(level[i], @@ -998,7 +993,7 @@ TEST_F(ApmTest, DISABLED_EchoCancellationReportsCorrectDelays) { // Test a couple of corner cases and verify that the estimated delay is // within a valid region (set to +-1.5 blocks). Note that these cases are // sampling frequency dependent. - for (size_t i = 0; i < kProcessSampleRatesSize; i++) { + for (size_t i = 0; i < arraysize(kProcessSampleRates); i++) { Init(kProcessSampleRates[i], kProcessSampleRates[i], kProcessSampleRates[i], @@ -1070,7 +1065,7 @@ TEST_F(ApmTest, EchoControlMobile) { EchoControlMobile::kSpeakerphone, EchoControlMobile::kLoudSpeakerphone, }; - for (size_t i = 0; i < sizeof(mode)/sizeof(*mode); i++) { + for (size_t i = 0; i < arraysize(mode); i++) { EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->set_routing_mode(mode[i])); EXPECT_EQ(mode[i], @@ -1135,7 +1130,7 @@ TEST_F(ApmTest, GainControl) { GainControl::kAdaptiveDigital, GainControl::kFixedDigital }; - for (size_t i = 0; i < sizeof(mode)/sizeof(*mode); i++) { + for (size_t i = 0; i < arraysize(mode); i++) { EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_mode(mode[i])); EXPECT_EQ(mode[i], apm_->gain_control()->mode()); @@ -1151,7 +1146,7 @@ TEST_F(ApmTest, GainControl) { apm_->gain_control()->target_level_dbfs())); int level_dbfs[] = {0, 6, 31}; - for (size_t i = 0; i < sizeof(level_dbfs)/sizeof(*level_dbfs); i++) { + for (size_t i = 0; i < arraysize(level_dbfs); i++) { EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_target_level_dbfs(level_dbfs[i])); EXPECT_EQ(level_dbfs[i], apm_->gain_control()->target_level_dbfs()); @@ -1169,7 +1164,7 @@ TEST_F(ApmTest, GainControl) { apm_->gain_control()->compression_gain_db())); int gain_db[] = {0, 10, 90}; - for (size_t i = 0; i < sizeof(gain_db)/sizeof(*gain_db); i++) { + for (size_t i = 0; i < arraysize(gain_db); i++) { EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_compression_gain_db(gain_db[i])); EXPECT_EQ(gain_db[i], apm_->gain_control()->compression_gain_db()); @@ -1200,14 +1195,14 @@ TEST_F(ApmTest, GainControl) { apm_->gain_control()->analog_level_maximum())); int min_level[] = {0, 255, 1024}; - for (size_t i = 0; i < sizeof(min_level)/sizeof(*min_level); i++) { + for (size_t i = 0; i < arraysize(min_level); i++) { EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_analog_level_limits(min_level[i], 1024)); EXPECT_EQ(min_level[i], apm_->gain_control()->analog_level_minimum()); } int max_level[] = {0, 1024, 65535}; - for (size_t i = 0; i < sizeof(min_level)/sizeof(*min_level); i++) { + for (size_t i = 0; i < arraysize(min_level); i++) { EXPECT_EQ(apm_->kNoError, apm_->gain_control()->set_analog_level_limits(0, max_level[i])); EXPECT_EQ(max_level[i], apm_->gain_control()->analog_level_maximum()); @@ -1246,7 +1241,7 @@ void ApmTest::RunQuantizedVolumeDoesNotGetStuckTest(int sample_rate) { // Verifies that despite volume slider quantization, the AGC can continue to // increase its volume. TEST_F(ApmTest, QuantizedVolumeDoesNotGetStuck) { - for (size_t i = 0; i < kSampleRatesSize; ++i) { + for (size_t i = 0; i < arraysize(kSampleRates); ++i) { RunQuantizedVolumeDoesNotGetStuckTest(kSampleRates[i]); } } @@ -1291,7 +1286,7 @@ void ApmTest::RunManualVolumeChangeIsPossibleTest(int sample_rate) { } TEST_F(ApmTest, ManualVolumeChangeIsPossible) { - for (size_t i = 0; i < kSampleRatesSize; ++i) { + for (size_t i = 0; i < arraysize(kSampleRates); ++i) { RunManualVolumeChangeIsPossibleTest(kSampleRates[i]); } } @@ -1299,11 +1294,11 @@ TEST_F(ApmTest, ManualVolumeChangeIsPossible) { #if !defined(WEBRTC_ANDROID) && !defined(WEBRTC_IOS) TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) { const int kSampleRateHz = 16000; - const int kSamplesPerChannel = - AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000; - const int kNumInputChannels = 2; - const int kNumOutputChannels = 1; - const int kNumChunks = 700; + const size_t kSamplesPerChannel = + static_cast<size_t>(AudioProcessing::kChunkSizeMs * kSampleRateHz / 1000); + const size_t kNumInputChannels = 2; + const size_t kNumOutputChannels = 1; + const size_t kNumChunks = 700; const float kScaleFactor = 0.25f; Config config; std::vector<webrtc::Point> geometry; @@ -1317,8 +1312,8 @@ TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) { EXPECT_EQ(kNoErr, apm->gain_control()->Enable(true)); ChannelBuffer<float> src_buf(kSamplesPerChannel, kNumInputChannels); ChannelBuffer<float> dest_buf(kSamplesPerChannel, kNumOutputChannels); - const int max_length = kSamplesPerChannel * std::max(kNumInputChannels, - kNumOutputChannels); + const size_t max_length = kSamplesPerChannel * std::max(kNumInputChannels, + kNumOutputChannels); rtc::scoped_ptr<int16_t[]> int_data(new int16_t[max_length]); rtc::scoped_ptr<float[]> float_data(new float[max_length]); std::string filename = ResourceFilePath("far", kSampleRateHz); @@ -1330,13 +1325,13 @@ TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) { bool is_target = false; EXPECT_CALL(*beamformer, is_target_present()) .WillRepeatedly(testing::ReturnPointee(&is_target)); - for (int i = 0; i < kNumChunks; ++i) { + for (size_t i = 0; i < kNumChunks; ++i) { ASSERT_TRUE(ReadChunk(far_file, int_data.get(), float_data.get(), &src_buf)); - for (int j = 0; j < kNumInputChannels; ++j) { - for (int k = 0; k < kSamplesPerChannel; ++k) { + for (size_t j = 0; j < kNumInputChannels; ++j) { + for (size_t k = 0; k < kSamplesPerChannel; ++k) { src_buf.channels()[j][k] *= kScaleFactor; } } @@ -1355,13 +1350,13 @@ TEST_F(ApmTest, AgcOnlyAdaptsWhenTargetSignalIsPresent) { apm->gain_control()->compression_gain_db()); rewind(far_file); is_target = true; - for (int i = 0; i < kNumChunks; ++i) { + for (size_t i = 0; i < kNumChunks; ++i) { ASSERT_TRUE(ReadChunk(far_file, int_data.get(), float_data.get(), &src_buf)); - for (int j = 0; j < kNumInputChannels; ++j) { - for (int k = 0; k < kSamplesPerChannel; ++k) { + for (size_t j = 0; j < kNumInputChannels; ++j) { + for (size_t k = 0; k < kSamplesPerChannel; ++k) { src_buf.channels()[j][k] *= kScaleFactor; } } @@ -1390,7 +1385,7 @@ TEST_F(ApmTest, NoiseSuppression) { NoiseSuppression::kHigh, NoiseSuppression::kVeryHigh }; - for (size_t i = 0; i < sizeof(level)/sizeof(*level); i++) { + for (size_t i = 0; i < arraysize(level); i++) { EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->set_level(level[i])); EXPECT_EQ(level[i], apm_->noise_suppression()->level()); @@ -1492,7 +1487,7 @@ TEST_F(ApmTest, VoiceDetection) { VoiceDetection::kModerateLikelihood, VoiceDetection::kHighLikelihood }; - for (size_t i = 0; i < sizeof(likelihood)/sizeof(*likelihood); i++) { + for (size_t i = 0; i < arraysize(likelihood); i++) { EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->set_likelihood(likelihood[i])); EXPECT_EQ(likelihood[i], apm_->voice_detection()->likelihood()); @@ -1524,7 +1519,7 @@ TEST_F(ApmTest, VoiceDetection) { AudioFrame::kVadPassive, AudioFrame::kVadUnknown }; - for (size_t i = 0; i < sizeof(activity)/sizeof(*activity); i++) { + for (size_t i = 0; i < arraysize(activity); i++) { frame_->vad_activity_ = activity[i]; EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); EXPECT_EQ(activity[i], frame_->vad_activity_); @@ -1550,7 +1545,7 @@ TEST_F(ApmTest, AllProcessingDisabledByDefault) { } TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabled) { - for (size_t i = 0; i < kSampleRatesSize; i++) { + for (size_t i = 0; i < arraysize(kSampleRates); i++) { Init(kSampleRates[i], kSampleRates[i], kSampleRates[i], 2, 2, 2, false); SetFrameTo(frame_, 1000, 2000); AudioFrame frame_copy; @@ -1602,7 +1597,7 @@ TEST_F(ApmTest, NoProcessingWhenAllComponentsDisabledFloat) { TEST_F(ApmTest, IdenticalInputChannelsResultInIdenticalOutputChannels) { EnableAllComponents(); - for (size_t i = 0; i < kProcessSampleRatesSize; i++) { + for (size_t i = 0; i < arraysize(kProcessSampleRates); i++) { Init(kProcessSampleRates[i], kProcessSampleRates[i], kProcessSampleRates[i], @@ -1751,7 +1746,8 @@ void ApmTest::ProcessDebugDump(const std::string& in_filename, const audioproc::ReverseStream msg = event_msg.reverse_stream(); if (msg.channel_size() > 0) { - ASSERT_EQ(revframe_->num_channels_, msg.channel_size()); + ASSERT_EQ(revframe_->num_channels_, + static_cast<size_t>(msg.channel_size())); for (int i = 0; i < msg.channel_size(); ++i) { memcpy(revfloat_cb_->channels()[i], msg.channel(i).data(), @@ -1781,7 +1777,8 @@ void ApmTest::ProcessDebugDump(const std::string& in_filename, } if (msg.input_channel_size() > 0) { - ASSERT_EQ(frame_->num_channels_, msg.input_channel_size()); + ASSERT_EQ(frame_->num_channels_, + static_cast<size_t>(msg.input_channel_size())); for (int i = 0; i < msg.input_channel_size(); ++i) { memcpy(float_cb_->channels()[i], msg.input_channel(i).data(), @@ -1939,11 +1936,14 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { if (test->num_input_channels() != test->num_output_channels()) continue; - const int num_render_channels = test->num_reverse_channels(); - const int num_input_channels = test->num_input_channels(); - const int num_output_channels = test->num_output_channels(); - const int samples_per_channel = test->sample_rate() * - AudioProcessing::kChunkSizeMs / 1000; + const size_t num_render_channels = + static_cast<size_t>(test->num_reverse_channels()); + const size_t num_input_channels = + static_cast<size_t>(test->num_input_channels()); + const size_t num_output_channels = + static_cast<size_t>(test->num_output_channels()); + const size_t samples_per_channel = static_cast<size_t>( + test->sample_rate() * AudioProcessing::kChunkSizeMs / 1000); Init(test->sample_rate(), test->sample_rate(), test->sample_rate(), num_input_channels, num_output_channels, num_render_channels, true); @@ -1984,7 +1984,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { test->sample_rate(), LayoutFromChannels(num_output_channels), float_cb_->channels())); - for (int j = 0; j < num_output_channels; ++j) { + for (size_t j = 0; j < num_output_channels; ++j) { FloatToS16(float_cb_->channels()[j], samples_per_channel, output_cb.channels()[j]); @@ -2017,7 +2017,7 @@ TEST_F(ApmTest, FloatAndIntInterfacesGiveSimilarResults) { 0.01); // Reset in case of downmixing. - frame_->num_channels_ = test->num_input_channels(); + frame_->num_channels_ = static_cast<size_t>(test->num_input_channels()); } rewind(far_file_); rewind(near_file_); @@ -2035,9 +2035,9 @@ TEST_F(ApmTest, Process) { OpenFileAndReadMessage(ref_filename_, &ref_data); } else { // Write the desired tests to the protobuf reference file. - for (size_t i = 0; i < kChannelsSize; i++) { - for (size_t j = 0; j < kChannelsSize; j++) { - for (size_t l = 0; l < kProcessSampleRatesSize; l++) { + for (size_t i = 0; i < arraysize(kChannels); i++) { + for (size_t j = 0; j < arraysize(kChannels); j++) { + for (size_t l = 0; l < arraysize(kProcessSampleRates); l++) { audioproc::Test* test = ref_data.add_test(); test->set_num_reverse_channels(kChannels[i]); test->set_num_input_channels(kChannels[j]); @@ -2078,9 +2078,9 @@ TEST_F(ApmTest, Process) { Init(test->sample_rate(), test->sample_rate(), test->sample_rate(), - test->num_input_channels(), - test->num_output_channels(), - test->num_reverse_channels(), + static_cast<size_t>(test->num_input_channels()), + static_cast<size_t>(test->num_output_channels()), + static_cast<size_t>(test->num_reverse_channels()), true); int frame_count = 0; @@ -2105,7 +2105,8 @@ TEST_F(ApmTest, Process) { EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); // Ensure the frame was downmixed properly. - EXPECT_EQ(test->num_output_channels(), frame_->num_channels_); + EXPECT_EQ(static_cast<size_t>(test->num_output_channels()), + frame_->num_channels_); max_output_average += MaxAudioFrame(*frame_); @@ -2135,7 +2136,7 @@ TEST_F(ApmTest, Process) { ASSERT_EQ(frame_size, write_count); // Reset in case of downmixing. - frame_->num_channels_ = test->num_input_channels(); + frame_->num_channels_ = static_cast<size_t>(test->num_input_channels()); frame_count++; } max_output_average /= frame_count; @@ -2264,12 +2265,11 @@ TEST_F(ApmTest, NoErrorsWithKeyboardChannel) { {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kMono}, {AudioProcessing::kStereoAndKeyboard, AudioProcessing::kStereo}, }; - size_t channel_format_size = sizeof(cf) / sizeof(*cf); rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create()); // Enable one component just to ensure some processing takes place. ap->noise_suppression()->Enable(true); - for (size_t i = 0; i < channel_format_size; ++i) { + for (size_t i = 0; i < arraysize(cf); ++i) { const int in_rate = 44100; const int out_rate = 48000; ChannelBuffer<float> in_cb(SamplesFromRate(in_rate), @@ -2296,7 +2296,7 @@ TEST_F(ApmTest, NoErrorsWithKeyboardChannel) { // error results to the supplied accumulators. void UpdateBestSNR(const float* ref, const float* test, - int length, + size_t length, int expected_delay, double* variance_acc, double* sq_error_acc) { @@ -2308,7 +2308,7 @@ void UpdateBestSNR(const float* ref, ++delay) { double sq_error = 0; double variance = 0; - for (int i = 0; i < length - delay; ++i) { + for (size_t i = 0; i < length - delay; ++i) { double error = test[i + delay] - ref[i]; sq_error += error * error; variance += ref[i] * ref[i]; @@ -2360,14 +2360,10 @@ class AudioProcessingTest static void SetUpTestCase() { // Create all needed output reference files. const int kNativeRates[] = {8000, 16000, 32000, 48000}; - const size_t kNativeRatesSize = - sizeof(kNativeRates) / sizeof(*kNativeRates); - const int kNumChannels[] = {1, 2}; - const size_t kNumChannelsSize = - sizeof(kNumChannels) / sizeof(*kNumChannels); - for (size_t i = 0; i < kNativeRatesSize; ++i) { - for (size_t j = 0; j < kNumChannelsSize; ++j) { - for (size_t k = 0; k < kNumChannelsSize; ++k) { + const size_t kNumChannels[] = {1, 2}; + for (size_t i = 0; i < arraysize(kNativeRates); ++i) { + for (size_t j = 0; j < arraysize(kNumChannels); ++j) { + for (size_t k = 0; k < arraysize(kNumChannels); ++k) { // The reference files always have matching input and output channels. ProcessFormat(kNativeRates[i], kNativeRates[i], kNativeRates[i], kNativeRates[i], kNumChannels[j], kNumChannels[j], @@ -2388,10 +2384,10 @@ class AudioProcessingTest int output_rate, int reverse_input_rate, int reverse_output_rate, - int num_input_channels, - int num_output_channels, - int num_reverse_input_channels, - int num_reverse_output_channels, + size_t num_input_channels, + size_t num_output_channels, + size_t num_reverse_input_channels, + size_t num_reverse_output_channels, std::string output_file_prefix) { Config config; config.Set<ExperimentalAgc>(new ExperimentalAgc(false)); @@ -2466,18 +2462,19 @@ class AudioProcessingTest // Dump forward output to file. Interleave(out_cb.channels(), out_cb.num_frames(), out_cb.num_channels(), float_data.get()); - int out_length = out_cb.num_channels() * out_cb.num_frames(); + size_t out_length = out_cb.num_channels() * out_cb.num_frames(); - ASSERT_EQ(static_cast<size_t>(out_length), + ASSERT_EQ(out_length, fwrite(float_data.get(), sizeof(float_data[0]), out_length, out_file)); // Dump reverse output to file. Interleave(rev_out_cb.channels(), rev_out_cb.num_frames(), rev_out_cb.num_channels(), float_data.get()); - int rev_out_length = rev_out_cb.num_channels() * rev_out_cb.num_frames(); + size_t rev_out_length = + rev_out_cb.num_channels() * rev_out_cb.num_frames(); - ASSERT_EQ(static_cast<size_t>(rev_out_length), + ASSERT_EQ(rev_out_length, fwrite(float_data.get(), sizeof(float_data[0]), rev_out_length, rev_out_file)); @@ -2513,9 +2510,8 @@ TEST_P(AudioProcessingTest, Formats) { {2, 2, 1, 1}, {2, 2, 2, 2}, }; - size_t channel_format_size = sizeof(cf) / sizeof(*cf); - for (size_t i = 0; i < channel_format_size; ++i) { + for (size_t i = 0; i < arraysize(cf); ++i) { ProcessFormat(input_rate_, output_rate_, reverse_input_rate_, reverse_output_rate_, cf[i].num_input, cf[i].num_output, cf[i].num_reverse_input, cf[i].num_reverse_output, "out"); @@ -2565,8 +2561,8 @@ TEST_P(AudioProcessingTest, Formats) { ASSERT_TRUE(out_file != NULL); ASSERT_TRUE(ref_file != NULL); - const int ref_length = SamplesFromRate(ref_rate) * out_num; - const int out_length = SamplesFromRate(out_rate) * out_num; + const size_t ref_length = SamplesFromRate(ref_rate) * out_num; + const size_t out_length = SamplesFromRate(out_rate) * out_num; // Data from the reference file. rtc::scoped_ptr<float[]> ref_data(new float[ref_length]); // Data from the output file. @@ -2606,8 +2602,9 @@ TEST_P(AudioProcessingTest, Formats) { if (out_rate != ref_rate) { // Resample the output back to its internal processing rate if // necssary. - ASSERT_EQ(ref_length, resampler.Resample(out_ptr, out_length, - cmp_data.get(), ref_length)); + ASSERT_EQ(ref_length, + static_cast<size_t>(resampler.Resample( + out_ptr, out_length, cmp_data.get(), ref_length))); out_ptr = cmp_data.get(); } @@ -2752,9 +2749,5 @@ INSTANTIATE_TEST_CASE_P( std::tr1::make_tuple(16000, 16000, 16000, 16000, 0, 0))); #endif -// TODO(henrike): re-implement functionality lost when removing the old main -// function. See -// https://code.google.com/p/webrtc/issues/detail?id=1981 - } // namespace } // namespace webrtc diff --git a/webrtc/modules/audio_processing/test/audioproc_float.cc b/webrtc/modules/audio_processing/test/audioproc_float.cc index 811e9070fa..a489d255c8 100644 --- a/webrtc/modules/audio_processing/test/audioproc_float.cc +++ b/webrtc/modules/audio_processing/test/audioproc_float.cc @@ -9,35 +9,50 @@ */ #include <stdio.h> +#include <iostream> #include <sstream> #include <string> +#include <utility> #include "gflags/gflags.h" #include "webrtc/base/checks.h" +#include "webrtc/base/format_macros.h" #include "webrtc/base/scoped_ptr.h" #include "webrtc/common_audio/channel_buffer.h" #include "webrtc/common_audio/wav_file.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/test/audio_file_processor.h" #include "webrtc/modules/audio_processing/test/protobuf_utils.h" #include "webrtc/modules/audio_processing/test/test_utils.h" #include "webrtc/system_wrappers/include/tick_util.h" #include "webrtc/test/testsupport/trace_to_stderr.h" -DEFINE_string(dump, "", "The name of the debug dump file to read from."); -DEFINE_string(i, "", "The name of the input file to read from."); -DEFINE_string(i_rev, "", "The name of the reverse input file to read from."); -DEFINE_string(o, "out.wav", "Name of the output file to write to."); -DEFINE_string(o_rev, - "out_rev.wav", - "Name of the reverse output file to write to."); -DEFINE_int32(out_channels, 0, "Number of output channels. Defaults to input."); -DEFINE_int32(out_sample_rate, 0, - "Output sample rate in Hz. Defaults to input."); +namespace { + +bool ValidateOutChannels(const char* flagname, int32_t value) { + return value >= 0; +} + +} // namespace + +DEFINE_string(dump, "", "Name of the aecdump debug file to read from."); +DEFINE_string(i, "", "Name of the capture input stream file to read from."); +DEFINE_string( + o, + "out.wav", + "Name of the output file to write the processed capture stream to."); +DEFINE_int32(out_channels, 1, "Number of output channels."); +const bool out_channels_dummy = + google::RegisterFlagValidator(&FLAGS_out_channels, &ValidateOutChannels); +DEFINE_int32(out_sample_rate, 48000, "Output sample rate in Hz."); DEFINE_string(mic_positions, "", "Space delimited cartesian coordinates of microphones in meters. " "The coordinates of each point are contiguous. " "For a two element array: \"x1 y1 z1 x2 y2 z2\""); -DEFINE_double(target_angle_degrees, 90, "The azimuth of the target in radians"); +DEFINE_double( + target_angle_degrees, + 90, + "The azimuth of the target in degrees. Only applies to beamforming."); DEFINE_bool(aec, false, "Enable echo cancellation."); DEFINE_bool(agc, false, "Enable automatic gain control."); @@ -64,15 +79,6 @@ const char kUsage[] = "All components are disabled by default. If any bi-directional components\n" "are enabled, only debug dump files are permitted."; -// Returns a StreamConfig corresponding to wav_file if it's non-nullptr. -// Otherwise returns a default initialized StreamConfig. -StreamConfig MakeStreamConfig(const WavFile* wav_file) { - if (wav_file) { - return {wav_file->sample_rate(), wav_file->num_channels()}; - } - return {}; -} - } // namespace int main(int argc, char* argv[]) { @@ -84,158 +90,75 @@ int main(int argc, char* argv[]) { "An input file must be specified with either -i or -dump.\n"); return 1; } - if (!FLAGS_dump.empty()) { - fprintf(stderr, "FIXME: the -dump option is not yet implemented.\n"); + if (FLAGS_dump.empty() && (FLAGS_aec || FLAGS_ie)) { + fprintf(stderr, "-aec and -ie require a -dump file.\n"); + return 1; + } + if (FLAGS_ie) { + fprintf(stderr, + "FIXME(ajm): The intelligibility enhancer output is not dumped.\n"); return 1; } test::TraceToStderr trace_to_stderr(true); - WavReader in_file(FLAGS_i); - // If the output format is uninitialized, use the input format. - const int out_channels = - FLAGS_out_channels ? FLAGS_out_channels : in_file.num_channels(); - const int out_sample_rate = - FLAGS_out_sample_rate ? FLAGS_out_sample_rate : in_file.sample_rate(); - WavWriter out_file(FLAGS_o, out_sample_rate, out_channels); - Config config; - config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts || FLAGS_all)); - config.Set<Intelligibility>(new Intelligibility(FLAGS_ie || FLAGS_all)); - if (FLAGS_bf || FLAGS_all) { - const size_t num_mics = in_file.num_channels(); - const std::vector<Point> array_geometry = - ParseArrayGeometry(FLAGS_mic_positions, num_mics); - RTC_CHECK_EQ(array_geometry.size(), num_mics); - + if (FLAGS_mic_positions.empty()) { + fprintf(stderr, "-mic_positions must be specified when -bf is used.\n"); + return 1; + } config.Set<Beamforming>(new Beamforming( - true, array_geometry, + true, ParseArrayGeometry(FLAGS_mic_positions), SphericalPointf(DegreesToRadians(FLAGS_target_angle_degrees), 0.f, 1.f))); } + config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts || FLAGS_all)); + config.Set<Intelligibility>(new Intelligibility(FLAGS_ie || FLAGS_all)); rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config)); - if (!FLAGS_dump.empty()) { - RTC_CHECK_EQ(kNoErr, - ap->echo_cancellation()->Enable(FLAGS_aec || FLAGS_all)); - } else if (FLAGS_aec) { - fprintf(stderr, "-aec requires a -dump file.\n"); - return -1; - } - bool process_reverse = !FLAGS_i_rev.empty(); + RTC_CHECK_EQ(kNoErr, ap->echo_cancellation()->Enable(FLAGS_aec || FLAGS_all)); RTC_CHECK_EQ(kNoErr, ap->gain_control()->Enable(FLAGS_agc || FLAGS_all)); - RTC_CHECK_EQ(kNoErr, - ap->gain_control()->set_mode(GainControl::kFixedDigital)); RTC_CHECK_EQ(kNoErr, ap->high_pass_filter()->Enable(FLAGS_hpf || FLAGS_all)); RTC_CHECK_EQ(kNoErr, ap->noise_suppression()->Enable(FLAGS_ns || FLAGS_all)); - if (FLAGS_ns_level != -1) + if (FLAGS_ns_level != -1) { RTC_CHECK_EQ(kNoErr, ap->noise_suppression()->set_level( static_cast<NoiseSuppression::Level>(FLAGS_ns_level))); - - printf("Input file: %s\nChannels: %d, Sample rate: %d Hz\n\n", - FLAGS_i.c_str(), in_file.num_channels(), in_file.sample_rate()); - printf("Output file: %s\nChannels: %d, Sample rate: %d Hz\n\n", - FLAGS_o.c_str(), out_file.num_channels(), out_file.sample_rate()); - - ChannelBuffer<float> in_buf( - rtc::CheckedDivExact(in_file.sample_rate(), kChunksPerSecond), - in_file.num_channels()); - ChannelBuffer<float> out_buf( - rtc::CheckedDivExact(out_file.sample_rate(), kChunksPerSecond), - out_file.num_channels()); - - std::vector<float> in_interleaved(in_buf.size()); - std::vector<float> out_interleaved(out_buf.size()); - - rtc::scoped_ptr<WavReader> in_rev_file; - rtc::scoped_ptr<WavWriter> out_rev_file; - rtc::scoped_ptr<ChannelBuffer<float>> in_rev_buf; - rtc::scoped_ptr<ChannelBuffer<float>> out_rev_buf; - std::vector<float> in_rev_interleaved; - std::vector<float> out_rev_interleaved; - if (process_reverse) { - in_rev_file.reset(new WavReader(FLAGS_i_rev)); - out_rev_file.reset(new WavWriter(FLAGS_o_rev, in_rev_file->sample_rate(), - in_rev_file->num_channels())); - printf("In rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n", - FLAGS_i_rev.c_str(), in_rev_file->num_channels(), - in_rev_file->sample_rate()); - printf("Out rev file: %s\nChannels: %d, Sample rate: %d Hz\n\n", - FLAGS_o_rev.c_str(), out_rev_file->num_channels(), - out_rev_file->sample_rate()); - in_rev_buf.reset(new ChannelBuffer<float>( - rtc::CheckedDivExact(in_rev_file->sample_rate(), kChunksPerSecond), - in_rev_file->num_channels())); - in_rev_interleaved.resize(in_rev_buf->size()); - out_rev_buf.reset(new ChannelBuffer<float>( - rtc::CheckedDivExact(out_rev_file->sample_rate(), kChunksPerSecond), - out_rev_file->num_channels())); - out_rev_interleaved.resize(out_rev_buf->size()); + } + ap->set_stream_key_pressed(FLAGS_ts); + + rtc::scoped_ptr<AudioFileProcessor> processor; + auto out_file = rtc_make_scoped_ptr(new WavWriter( + FLAGS_o, FLAGS_out_sample_rate, static_cast<size_t>(FLAGS_out_channels))); + std::cout << FLAGS_o << ": " << out_file->FormatAsString() << std::endl; + if (FLAGS_dump.empty()) { + auto in_file = rtc_make_scoped_ptr(new WavReader(FLAGS_i)); + std::cout << FLAGS_i << ": " << in_file->FormatAsString() << std::endl; + processor.reset(new WavFileProcessor(std::move(ap), std::move(in_file), + std::move(out_file))); + + } else { + processor.reset(new AecDumpFileProcessor( + std::move(ap), fopen(FLAGS_dump.c_str(), "rb"), std::move(out_file))); } - TickTime processing_start_time; - TickInterval accumulated_time; int num_chunks = 0; - - const auto input_config = MakeStreamConfig(&in_file); - const auto output_config = MakeStreamConfig(&out_file); - const auto reverse_input_config = MakeStreamConfig(in_rev_file.get()); - const auto reverse_output_config = MakeStreamConfig(out_rev_file.get()); - - while (in_file.ReadSamples(in_interleaved.size(), - &in_interleaved[0]) == in_interleaved.size()) { - // Have logs display the file time rather than wallclock time. + while (processor->ProcessChunk()) { trace_to_stderr.SetTimeSeconds(num_chunks * 1.f / kChunksPerSecond); - FloatS16ToFloat(&in_interleaved[0], in_interleaved.size(), - &in_interleaved[0]); - Deinterleave(&in_interleaved[0], in_buf.num_frames(), - in_buf.num_channels(), in_buf.channels()); - if (process_reverse) { - in_rev_file->ReadSamples(in_rev_interleaved.size(), - in_rev_interleaved.data()); - FloatS16ToFloat(in_rev_interleaved.data(), in_rev_interleaved.size(), - in_rev_interleaved.data()); - Deinterleave(in_rev_interleaved.data(), in_rev_buf->num_frames(), - in_rev_buf->num_channels(), in_rev_buf->channels()); - } - - if (FLAGS_perf) { - processing_start_time = TickTime::Now(); - } - RTC_CHECK_EQ(kNoErr, ap->ProcessStream(in_buf.channels(), input_config, - output_config, out_buf.channels())); - if (process_reverse) { - RTC_CHECK_EQ(kNoErr, ap->ProcessReverseStream( - in_rev_buf->channels(), reverse_input_config, - reverse_output_config, out_rev_buf->channels())); - } - if (FLAGS_perf) { - accumulated_time += TickTime::Now() - processing_start_time; - } - - Interleave(out_buf.channels(), out_buf.num_frames(), - out_buf.num_channels(), &out_interleaved[0]); - FloatToFloatS16(&out_interleaved[0], out_interleaved.size(), - &out_interleaved[0]); - out_file.WriteSamples(&out_interleaved[0], out_interleaved.size()); - if (process_reverse) { - Interleave(out_rev_buf->channels(), out_rev_buf->num_frames(), - out_rev_buf->num_channels(), out_rev_interleaved.data()); - FloatToFloatS16(out_rev_interleaved.data(), out_rev_interleaved.size(), - out_rev_interleaved.data()); - out_rev_file->WriteSamples(out_rev_interleaved.data(), - out_rev_interleaved.size()); - } - num_chunks++; + ++num_chunks; } + if (FLAGS_perf) { - int64_t execution_time_ms = accumulated_time.Milliseconds(); - printf("\nExecution time: %.3f s\nFile time: %.2f s\n" - "Time per chunk: %.3f ms\n", - execution_time_ms * 0.001f, num_chunks * 1.f / kChunksPerSecond, - execution_time_ms * 1.f / num_chunks); + const auto& proc_time = processor->proc_time(); + int64_t exec_time_us = proc_time.sum.Microseconds(); + printf( + "\nExecution time: %.3f s, File time: %.2f s\n" + "Time per chunk (mean, max, min):\n%.0f us, %.0f us, %.0f us\n", + exec_time_us * 1e-6, num_chunks * 1.f / kChunksPerSecond, + exec_time_us * 1.f / num_chunks, 1.f * proc_time.max.Microseconds(), + 1.f * proc_time.min.Microseconds()); } + return 0; } diff --git a/webrtc/modules/audio_processing/test/debug_dump_test.cc b/webrtc/modules/audio_processing/test/debug_dump_test.cc new file mode 100644 index 0000000000..005faa0f44 --- /dev/null +++ b/webrtc/modules/audio_processing/test/debug_dump_test.cc @@ -0,0 +1,612 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stddef.h> // size_t +#include <string> +#include <vector> + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/audio_processing/debug.pb.h" +#include "webrtc/base/checks.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/channel_buffer.h" +#include "webrtc/modules/audio_coding/neteq/tools/resample_input_audio_file.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/test/protobuf_utils.h" +#include "webrtc/modules/audio_processing/test/test_utils.h" +#include "webrtc/test/testsupport/fileutils.h" + +namespace webrtc { +namespace test { + +namespace { + +void MaybeResetBuffer(rtc::scoped_ptr<ChannelBuffer<float>>* buffer, + const StreamConfig& config) { + auto& buffer_ref = *buffer; + if (!buffer_ref.get() || buffer_ref->num_frames() != config.num_frames() || + buffer_ref->num_channels() != config.num_channels()) { + buffer_ref.reset(new ChannelBuffer<float>(config.num_frames(), + config.num_channels())); + } +} + +class DebugDumpGenerator { + public: + DebugDumpGenerator(const std::string& input_file_name, + int input_file_rate_hz, + int input_channels, + const std::string& reverse_file_name, + int reverse_file_rate_hz, + int reverse_channels, + const Config& config, + const std::string& dump_file_name); + + // Constructor that uses default input files. + explicit DebugDumpGenerator(const Config& config); + + ~DebugDumpGenerator(); + + // Changes the sample rate of the input audio to the APM. + void SetInputRate(int rate_hz); + + // Sets if converts stereo input signal to mono by discarding other channels. + void ForceInputMono(bool mono); + + // Changes the sample rate of the reverse audio to the APM. + void SetReverseRate(int rate_hz); + + // Sets if converts stereo reverse signal to mono by discarding other + // channels. + void ForceReverseMono(bool mono); + + // Sets the required sample rate of the APM output. + void SetOutputRate(int rate_hz); + + // Sets the required channels of the APM output. + void SetOutputChannels(int channels); + + std::string dump_file_name() const { return dump_file_name_; } + + void StartRecording(); + void Process(size_t num_blocks); + void StopRecording(); + AudioProcessing* apm() const { return apm_.get(); } + + private: + static void ReadAndDeinterleave(ResampleInputAudioFile* audio, int channels, + const StreamConfig& config, + float* const* buffer); + + // APM input/output settings. + StreamConfig input_config_; + StreamConfig reverse_config_; + StreamConfig output_config_; + + // Input file format. + const std::string input_file_name_; + ResampleInputAudioFile input_audio_; + const int input_file_channels_; + + // Reverse file format. + const std::string reverse_file_name_; + ResampleInputAudioFile reverse_audio_; + const int reverse_file_channels_; + + // Buffer for APM input/output. + rtc::scoped_ptr<ChannelBuffer<float>> input_; + rtc::scoped_ptr<ChannelBuffer<float>> reverse_; + rtc::scoped_ptr<ChannelBuffer<float>> output_; + + rtc::scoped_ptr<AudioProcessing> apm_; + + const std::string dump_file_name_; +}; + +DebugDumpGenerator::DebugDumpGenerator(const std::string& input_file_name, + int input_rate_hz, + int input_channels, + const std::string& reverse_file_name, + int reverse_rate_hz, + int reverse_channels, + const Config& config, + const std::string& dump_file_name) + : input_config_(input_rate_hz, input_channels), + reverse_config_(reverse_rate_hz, reverse_channels), + output_config_(input_rate_hz, input_channels), + input_audio_(input_file_name, input_rate_hz, input_rate_hz), + input_file_channels_(input_channels), + reverse_audio_(reverse_file_name, reverse_rate_hz, reverse_rate_hz), + reverse_file_channels_(reverse_channels), + input_(new ChannelBuffer<float>(input_config_.num_frames(), + input_config_.num_channels())), + reverse_(new ChannelBuffer<float>(reverse_config_.num_frames(), + reverse_config_.num_channels())), + output_(new ChannelBuffer<float>(output_config_.num_frames(), + output_config_.num_channels())), + apm_(AudioProcessing::Create(config)), + dump_file_name_(dump_file_name) { +} + +DebugDumpGenerator::DebugDumpGenerator(const Config& config) + : DebugDumpGenerator(ResourcePath("near32_stereo", "pcm"), 32000, 2, + ResourcePath("far32_stereo", "pcm"), 32000, 2, + config, + TempFilename(OutputPath(), "debug_aec")) { +} + +DebugDumpGenerator::~DebugDumpGenerator() { + remove(dump_file_name_.c_str()); +} + +void DebugDumpGenerator::SetInputRate(int rate_hz) { + input_audio_.set_output_rate_hz(rate_hz); + input_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&input_, input_config_); +} + +void DebugDumpGenerator::ForceInputMono(bool mono) { + const int channels = mono ? 1 : input_file_channels_; + input_config_.set_num_channels(channels); + MaybeResetBuffer(&input_, input_config_); +} + +void DebugDumpGenerator::SetReverseRate(int rate_hz) { + reverse_audio_.set_output_rate_hz(rate_hz); + reverse_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +void DebugDumpGenerator::ForceReverseMono(bool mono) { + const int channels = mono ? 1 : reverse_file_channels_; + reverse_config_.set_num_channels(channels); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +void DebugDumpGenerator::SetOutputRate(int rate_hz) { + output_config_.set_sample_rate_hz(rate_hz); + MaybeResetBuffer(&output_, output_config_); +} + +void DebugDumpGenerator::SetOutputChannels(int channels) { + output_config_.set_num_channels(channels); + MaybeResetBuffer(&output_, output_config_); +} + +void DebugDumpGenerator::StartRecording() { + apm_->StartDebugRecording(dump_file_name_.c_str()); +} + +void DebugDumpGenerator::Process(size_t num_blocks) { + for (size_t i = 0; i < num_blocks; ++i) { + ReadAndDeinterleave(&reverse_audio_, reverse_file_channels_, + reverse_config_, reverse_->channels()); + ReadAndDeinterleave(&input_audio_, input_file_channels_, input_config_, + input_->channels()); + RTC_CHECK_EQ(AudioProcessing::kNoError, apm_->set_stream_delay_ms(100)); + apm_->set_stream_key_pressed(i % 10 == 9); + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->ProcessStream(input_->channels(), input_config_, + output_config_, output_->channels())); + + RTC_CHECK_EQ(AudioProcessing::kNoError, + apm_->ProcessReverseStream(reverse_->channels(), + reverse_config_, + reverse_config_, + reverse_->channels())); + } +} + +void DebugDumpGenerator::StopRecording() { + apm_->StopDebugRecording(); +} + +void DebugDumpGenerator::ReadAndDeinterleave(ResampleInputAudioFile* audio, + int channels, + const StreamConfig& config, + float* const* buffer) { + const size_t num_frames = config.num_frames(); + const int out_channels = config.num_channels(); + + std::vector<int16_t> signal(channels * num_frames); + + audio->Read(num_frames * channels, &signal[0]); + + // We only allow reducing number of channels by discarding some channels. + RTC_CHECK_LE(out_channels, channels); + for (int channel = 0; channel < out_channels; ++channel) { + for (size_t i = 0; i < num_frames; ++i) { + buffer[channel][i] = S16ToFloat(signal[i * channels + channel]); + } + } +} + +} // namespace + +class DebugDumpTest : public ::testing::Test { + public: + DebugDumpTest(); + + // VerifyDebugDump replays a debug dump using APM and verifies that the result + // is bit-exact-identical to the output channel in the dump. This is only + // guaranteed if the debug dump is started on the first frame. + void VerifyDebugDump(const std::string& dump_file_name); + + private: + // Following functions are facilities for replaying debug dumps. + void OnInitEvent(const audioproc::Init& msg); + void OnStreamEvent(const audioproc::Stream& msg); + void OnReverseStreamEvent(const audioproc::ReverseStream& msg); + void OnConfigEvent(const audioproc::Config& msg); + + void MaybeRecreateApm(const audioproc::Config& msg); + void ConfigureApm(const audioproc::Config& msg); + + // Buffer for APM input/output. + rtc::scoped_ptr<ChannelBuffer<float>> input_; + rtc::scoped_ptr<ChannelBuffer<float>> reverse_; + rtc::scoped_ptr<ChannelBuffer<float>> output_; + + rtc::scoped_ptr<AudioProcessing> apm_; + + StreamConfig input_config_; + StreamConfig reverse_config_; + StreamConfig output_config_; +}; + +DebugDumpTest::DebugDumpTest() + : input_(nullptr), // will be created upon usage. + reverse_(nullptr), + output_(nullptr), + apm_(nullptr) { +} + +void DebugDumpTest::VerifyDebugDump(const std::string& in_filename) { + FILE* in_file = fopen(in_filename.c_str(), "rb"); + ASSERT_TRUE(in_file); + audioproc::Event event_msg; + + while (ReadMessageFromFile(in_file, &event_msg)) { + switch (event_msg.type()) { + case audioproc::Event::INIT: + OnInitEvent(event_msg.init()); + break; + case audioproc::Event::STREAM: + OnStreamEvent(event_msg.stream()); + break; + case audioproc::Event::REVERSE_STREAM: + OnReverseStreamEvent(event_msg.reverse_stream()); + break; + case audioproc::Event::CONFIG: + OnConfigEvent(event_msg.config()); + break; + case audioproc::Event::UNKNOWN_EVENT: + // We do not expect receive UNKNOWN event currently. + FAIL(); + } + } + fclose(in_file); +} + +// OnInitEvent reset the input/output/reserve channel format. +void DebugDumpTest::OnInitEvent(const audioproc::Init& msg) { + ASSERT_TRUE(msg.has_num_input_channels()); + ASSERT_TRUE(msg.has_output_sample_rate()); + ASSERT_TRUE(msg.has_num_output_channels()); + ASSERT_TRUE(msg.has_reverse_sample_rate()); + ASSERT_TRUE(msg.has_num_reverse_channels()); + + input_config_ = StreamConfig(msg.sample_rate(), msg.num_input_channels()); + output_config_ = + StreamConfig(msg.output_sample_rate(), msg.num_output_channels()); + reverse_config_ = + StreamConfig(msg.reverse_sample_rate(), msg.num_reverse_channels()); + + MaybeResetBuffer(&input_, input_config_); + MaybeResetBuffer(&output_, output_config_); + MaybeResetBuffer(&reverse_, reverse_config_); +} + +// OnStreamEvent replays an input signal and verifies the output. +void DebugDumpTest::OnStreamEvent(const audioproc::Stream& msg) { + // APM should have been created. + ASSERT_TRUE(apm_.get()); + + EXPECT_NOERR(apm_->gain_control()->set_stream_analog_level(msg.level())); + EXPECT_NOERR(apm_->set_stream_delay_ms(msg.delay())); + apm_->echo_cancellation()->set_stream_drift_samples(msg.drift()); + if (msg.has_keypress()) + apm_->set_stream_key_pressed(msg.keypress()); + else + apm_->set_stream_key_pressed(true); + + ASSERT_EQ(input_config_.num_channels(), + static_cast<size_t>(msg.input_channel_size())); + ASSERT_EQ(input_config_.num_frames() * sizeof(float), + msg.input_channel(0).size()); + + for (int i = 0; i < msg.input_channel_size(); ++i) { + memcpy(input_->channels()[i], msg.input_channel(i).data(), + msg.input_channel(i).size()); + } + + ASSERT_EQ(AudioProcessing::kNoError, + apm_->ProcessStream(input_->channels(), input_config_, + output_config_, output_->channels())); + + // Check that output of APM is bit-exact to the output in the dump. + ASSERT_EQ(output_config_.num_channels(), + static_cast<size_t>(msg.output_channel_size())); + ASSERT_EQ(output_config_.num_frames() * sizeof(float), + msg.output_channel(0).size()); + for (int i = 0; i < msg.output_channel_size(); ++i) { + ASSERT_EQ(0, memcmp(output_->channels()[i], msg.output_channel(i).data(), + msg.output_channel(i).size())); + } +} + +void DebugDumpTest::OnReverseStreamEvent(const audioproc::ReverseStream& msg) { + // APM should have been created. + ASSERT_TRUE(apm_.get()); + + ASSERT_GT(msg.channel_size(), 0); + ASSERT_EQ(reverse_config_.num_channels(), + static_cast<size_t>(msg.channel_size())); + ASSERT_EQ(reverse_config_.num_frames() * sizeof(float), + msg.channel(0).size()); + + for (int i = 0; i < msg.channel_size(); ++i) { + memcpy(reverse_->channels()[i], msg.channel(i).data(), + msg.channel(i).size()); + } + + ASSERT_EQ(AudioProcessing::kNoError, + apm_->ProcessReverseStream(reverse_->channels(), + reverse_config_, + reverse_config_, + reverse_->channels())); +} + +void DebugDumpTest::OnConfigEvent(const audioproc::Config& msg) { + MaybeRecreateApm(msg); + ConfigureApm(msg); +} + +void DebugDumpTest::MaybeRecreateApm(const audioproc::Config& msg) { + // These configurations cannot be changed on the fly. + Config config; + ASSERT_TRUE(msg.has_aec_delay_agnostic_enabled()); + config.Set<DelayAgnostic>( + new DelayAgnostic(msg.aec_delay_agnostic_enabled())); + + ASSERT_TRUE(msg.has_noise_robust_agc_enabled()); + config.Set<ExperimentalAgc>( + new ExperimentalAgc(msg.noise_robust_agc_enabled())); + + ASSERT_TRUE(msg.has_transient_suppression_enabled()); + config.Set<ExperimentalNs>( + new ExperimentalNs(msg.transient_suppression_enabled())); + + ASSERT_TRUE(msg.has_aec_extended_filter_enabled()); + config.Set<ExtendedFilter>(new ExtendedFilter( + msg.aec_extended_filter_enabled())); + + // We only create APM once, since changes on these fields should not + // happen in current implementation. + if (!apm_.get()) { + apm_.reset(AudioProcessing::Create(config)); + } +} + +void DebugDumpTest::ConfigureApm(const audioproc::Config& msg) { + // AEC configs. + ASSERT_TRUE(msg.has_aec_enabled()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->echo_cancellation()->Enable(msg.aec_enabled())); + + ASSERT_TRUE(msg.has_aec_drift_compensation_enabled()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->echo_cancellation()->enable_drift_compensation( + msg.aec_drift_compensation_enabled())); + + ASSERT_TRUE(msg.has_aec_suppression_level()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->echo_cancellation()->set_suppression_level( + static_cast<EchoCancellation::SuppressionLevel>( + msg.aec_suppression_level()))); + + // AECM configs. + ASSERT_TRUE(msg.has_aecm_enabled()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->echo_control_mobile()->Enable(msg.aecm_enabled())); + + ASSERT_TRUE(msg.has_aecm_comfort_noise_enabled()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->echo_control_mobile()->enable_comfort_noise( + msg.aecm_comfort_noise_enabled())); + + ASSERT_TRUE(msg.has_aecm_routing_mode()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->echo_control_mobile()->set_routing_mode( + static_cast<EchoControlMobile::RoutingMode>( + msg.aecm_routing_mode()))); + + // AGC configs. + ASSERT_TRUE(msg.has_agc_enabled()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->gain_control()->Enable(msg.agc_enabled())); + + ASSERT_TRUE(msg.has_agc_mode()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->gain_control()->set_mode( + static_cast<GainControl::Mode>(msg.agc_mode()))); + + ASSERT_TRUE(msg.has_agc_limiter_enabled()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->gain_control()->enable_limiter(msg.agc_limiter_enabled())); + + // HPF configs. + ASSERT_TRUE(msg.has_hpf_enabled()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->high_pass_filter()->Enable(msg.hpf_enabled())); + + // NS configs. + ASSERT_TRUE(msg.has_ns_enabled()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->noise_suppression()->Enable(msg.ns_enabled())); + + ASSERT_TRUE(msg.has_ns_level()); + EXPECT_EQ(AudioProcessing::kNoError, + apm_->noise_suppression()->set_level( + static_cast<NoiseSuppression::Level>(msg.ns_level()))); +} + +TEST_F(DebugDumpTest, SimpleCase) { + Config config; + DebugDumpGenerator generator(config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeInputFormat) { + Config config; + DebugDumpGenerator generator(config); + generator.StartRecording(); + generator.Process(100); + generator.SetInputRate(48000); + + generator.ForceInputMono(true); + // Number of output channel should not be larger than that of input. APM will + // fail otherwise. + generator.SetOutputChannels(1); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeReverseFormat) { + Config config; + DebugDumpGenerator generator(config); + generator.StartRecording(); + generator.Process(100); + generator.SetReverseRate(48000); + generator.ForceReverseMono(true); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ChangeOutputFormat) { + Config config; + DebugDumpGenerator generator(config); + generator.StartRecording(); + generator.Process(100); + generator.SetOutputRate(48000); + generator.SetOutputChannels(1); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleAec) { + Config config; + DebugDumpGenerator generator(config); + generator.StartRecording(); + generator.Process(100); + + EchoCancellation* aec = generator.apm()->echo_cancellation(); + EXPECT_EQ(AudioProcessing::kNoError, aec->Enable(!aec->is_enabled())); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleDelayAgnosticAec) { + Config config; + config.Set<DelayAgnostic>(new DelayAgnostic(true)); + DebugDumpGenerator generator(config); + generator.StartRecording(); + generator.Process(100); + + EchoCancellation* aec = generator.apm()->echo_cancellation(); + EXPECT_EQ(AudioProcessing::kNoError, aec->Enable(!aec->is_enabled())); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleAecLevel) { + Config config; + DebugDumpGenerator generator(config); + EchoCancellation* aec = generator.apm()->echo_cancellation(); + EXPECT_EQ(AudioProcessing::kNoError, aec->Enable(true)); + EXPECT_EQ(AudioProcessing::kNoError, + aec->set_suppression_level(EchoCancellation::kLowSuppression)); + generator.StartRecording(); + generator.Process(100); + + EXPECT_EQ(AudioProcessing::kNoError, + aec->set_suppression_level(EchoCancellation::kHighSuppression)); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +#if defined(WEBRTC_ANDROID) +// AGC may not be supported on Android. +#define MAYBE_ToggleAgc DISABLED_ToggleAgc +#else +#define MAYBE_ToggleAgc ToggleAgc +#endif +TEST_F(DebugDumpTest, MAYBE_ToggleAgc) { + Config config; + DebugDumpGenerator generator(config); + generator.StartRecording(); + generator.Process(100); + + GainControl* agc = generator.apm()->gain_control(); + EXPECT_EQ(AudioProcessing::kNoError, agc->Enable(!agc->is_enabled())); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, ToggleNs) { + Config config; + DebugDumpGenerator generator(config); + generator.StartRecording(); + generator.Process(100); + + NoiseSuppression* ns = generator.apm()->noise_suppression(); + EXPECT_EQ(AudioProcessing::kNoError, ns->Enable(!ns->is_enabled())); + + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +TEST_F(DebugDumpTest, TransientSuppressionOn) { + Config config; + config.Set<ExperimentalNs>(new ExperimentalNs(true)); + DebugDumpGenerator generator(config); + generator.StartRecording(); + generator.Process(100); + generator.StopRecording(); + VerifyDebugDump(generator.dump_file_name()); +} + +} // namespace test +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/test/process_test.cc b/webrtc/modules/audio_processing/test/process_test.cc index 43165404c8..6e20a787e7 100644 --- a/webrtc/modules/audio_processing/test/process_test.cc +++ b/webrtc/modules/audio_processing/test/process_test.cc @@ -17,12 +17,13 @@ #include <algorithm> +#include "webrtc/base/format_macros.h" #include "webrtc/base/scoped_ptr.h" #include "webrtc/common.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" #include "webrtc/modules/audio_processing/test/protobuf_utils.h" #include "webrtc/modules/audio_processing/test/test_utils.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" #include "webrtc/system_wrappers/include/tick_util.h" #include "webrtc/test/testsupport/fileutils.h" @@ -159,9 +160,9 @@ void void_main(int argc, char* argv[]) { int32_t sample_rate_hz = 16000; - int num_capture_input_channels = 1; - int num_capture_output_channels = 1; - int num_render_channels = 1; + size_t num_capture_input_channels = 1; + size_t num_capture_output_channels = 1; + size_t num_render_channels = 1; int samples_per_channel = sample_rate_hz / 100; @@ -207,14 +208,14 @@ void void_main(int argc, char* argv[]) { } else if (strcmp(argv[i], "-ch") == 0) { i++; ASSERT_LT(i + 1, argc) << "Specify number of channels after -ch"; - ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_input_channels)); + ASSERT_EQ(1, sscanf(argv[i], "%" PRIuS, &num_capture_input_channels)); i++; - ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_output_channels)); + ASSERT_EQ(1, sscanf(argv[i], "%" PRIuS, &num_capture_output_channels)); } else if (strcmp(argv[i], "-rch") == 0) { i++; ASSERT_LT(i, argc) << "Specify number of channels after -rch"; - ASSERT_EQ(1, sscanf(argv[i], "%d", &num_render_channels)); + ASSERT_EQ(1, sscanf(argv[i], "%" PRIuS, &num_render_channels)); } else if (strcmp(argv[i], "-aec") == 0) { ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); @@ -447,10 +448,10 @@ void void_main(int argc, char* argv[]) { if (verbose) { printf("Sample rate: %d Hz\n", sample_rate_hz); - printf("Primary channels: %d (in), %d (out)\n", + printf("Primary channels: %" PRIuS " (in), %" PRIuS " (out)\n", num_capture_input_channels, num_capture_output_channels); - printf("Reverse channels: %d \n", num_render_channels); + printf("Reverse channels: %" PRIuS "\n", num_render_channels); } const std::string out_path = webrtc::test::OutputPath(); @@ -601,14 +602,18 @@ void void_main(int argc, char* argv[]) { if (msg.has_output_sample_rate()) { output_sample_rate = msg.output_sample_rate(); } - output_layout = LayoutFromChannels(msg.num_output_channels()); - ASSERT_EQ(kNoErr, apm->Initialize( - msg.sample_rate(), - output_sample_rate, - reverse_sample_rate, - LayoutFromChannels(msg.num_input_channels()), - output_layout, - LayoutFromChannels(msg.num_reverse_channels()))); + output_layout = + LayoutFromChannels(static_cast<size_t>(msg.num_output_channels())); + ASSERT_EQ(kNoErr, + apm->Initialize( + msg.sample_rate(), + output_sample_rate, + reverse_sample_rate, + LayoutFromChannels( + static_cast<size_t>(msg.num_input_channels())), + output_layout, + LayoutFromChannels( + static_cast<size_t>(msg.num_reverse_channels())))); samples_per_channel = msg.sample_rate() / 100; far_frame.sample_rate_hz_ = reverse_sample_rate; @@ -636,11 +641,11 @@ void void_main(int argc, char* argv[]) { } if (!raw_output) { - // The WAV file needs to be reset every time, because it cant change - // it's sample rate or number of channels. - output_wav_file.reset(new WavWriter(out_filename + ".wav", - output_sample_rate, - msg.num_output_channels())); + // The WAV file needs to be reset every time, because it can't change + // its sample rate or number of channels. + output_wav_file.reset(new WavWriter( + out_filename + ".wav", output_sample_rate, + static_cast<size_t>(msg.num_output_channels()))); } } else if (event_msg.type() == Event::REVERSE_STREAM) { @@ -1049,7 +1054,9 @@ void void_main(int argc, char* argv[]) { } } } - printf("100%% complete\r"); + if (progress) { + printf("100%% complete\r"); + } if (aecm_echo_path_out_file != NULL) { const size_t path_size = diff --git a/webrtc/modules/audio_processing/test/test_utils.cc b/webrtc/modules/audio_processing/test/test_utils.cc index 1b9ac3ce4c..0bd70126ae 100644 --- a/webrtc/modules/audio_processing/test/test_utils.cc +++ b/webrtc/modules/audio_processing/test/test_utils.cc @@ -8,6 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include <utility> + #include "webrtc/base/checks.h" #include "webrtc/modules/audio_processing/test/test_utils.h" @@ -31,6 +33,35 @@ void RawFile::WriteSamples(const float* samples, size_t num_samples) { fwrite(samples, sizeof(*samples), num_samples, file_handle_); } +ChannelBufferWavReader::ChannelBufferWavReader(rtc::scoped_ptr<WavReader> file) + : file_(std::move(file)) {} + +bool ChannelBufferWavReader::Read(ChannelBuffer<float>* buffer) { + RTC_CHECK_EQ(file_->num_channels(), buffer->num_channels()); + interleaved_.resize(buffer->size()); + if (file_->ReadSamples(interleaved_.size(), &interleaved_[0]) != + interleaved_.size()) { + return false; + } + + FloatS16ToFloat(&interleaved_[0], interleaved_.size(), &interleaved_[0]); + Deinterleave(&interleaved_[0], buffer->num_frames(), buffer->num_channels(), + buffer->channels()); + return true; +} + +ChannelBufferWavWriter::ChannelBufferWavWriter(rtc::scoped_ptr<WavWriter> file) + : file_(std::move(file)) {} + +void ChannelBufferWavWriter::Write(const ChannelBuffer<float>& buffer) { + RTC_CHECK_EQ(file_->num_channels(), buffer.num_channels()); + interleaved_.resize(buffer.size()); + Interleave(buffer.channels(), buffer.num_frames(), buffer.num_channels(), + &interleaved_[0]); + FloatToFloatS16(&interleaved_[0], interleaved_.size(), &interleaved_[0]); + file_->WriteSamples(&interleaved_[0], interleaved_.size()); +} + void WriteIntData(const int16_t* data, size_t length, WavWriter* wav_file, @@ -44,8 +75,8 @@ void WriteIntData(const int16_t* data, } void WriteFloatData(const float* const* data, - int samples_per_channel, - int num_channels, + size_t samples_per_channel, + size_t num_channels, WavWriter* wav_file, RawFile* raw_file) { size_t length = num_channels * samples_per_channel; @@ -74,8 +105,8 @@ FILE* OpenFile(const std::string& filename, const char* mode) { return file; } -int SamplesFromRate(int rate) { - return AudioProcessing::kChunkSizeMs * rate / 1000; +size_t SamplesFromRate(int rate) { + return static_cast<size_t>(AudioProcessing::kChunkSizeMs * rate / 1000); } void SetFrameSampleRate(AudioFrame* frame, @@ -85,35 +116,39 @@ void SetFrameSampleRate(AudioFrame* frame, sample_rate_hz / 1000; } -AudioProcessing::ChannelLayout LayoutFromChannels(int num_channels) { +AudioProcessing::ChannelLayout LayoutFromChannels(size_t num_channels) { switch (num_channels) { case 1: return AudioProcessing::kMono; case 2: return AudioProcessing::kStereo; default: - assert(false); + RTC_CHECK(false); return AudioProcessing::kMono; } } -std::vector<Point> ParseArrayGeometry(const std::string& mic_positions, - size_t num_mics) { +std::vector<Point> ParseArrayGeometry(const std::string& mic_positions) { const std::vector<float> values = ParseList<float>(mic_positions); - RTC_CHECK_EQ(values.size(), 3 * num_mics) - << "Could not parse mic_positions or incorrect number of points."; + const size_t num_mics = + rtc::CheckedDivExact(values.size(), static_cast<size_t>(3)); + RTC_CHECK_GT(num_mics, 0u) << "mic_positions is not large enough."; std::vector<Point> result; result.reserve(num_mics); for (size_t i = 0; i < values.size(); i += 3) { - double x = values[i + 0]; - double y = values[i + 1]; - double z = values[i + 2]; - result.push_back(Point(x, y, z)); + result.push_back(Point(values[i + 0], values[i + 1], values[i + 2])); } return result; } +std::vector<Point> ParseArrayGeometry(const std::string& mic_positions, + size_t num_mics) { + std::vector<Point> result = ParseArrayGeometry(mic_positions); + RTC_CHECK_EQ(result.size(), num_mics) + << "Could not parse mic_positions or incorrect number of points."; + return result; +} } // namespace webrtc diff --git a/webrtc/modules/audio_processing/test/test_utils.h b/webrtc/modules/audio_processing/test/test_utils.h index 8dd380b15d..e23beb66f4 100644 --- a/webrtc/modules/audio_processing/test/test_utils.h +++ b/webrtc/modules/audio_processing/test/test_utils.h @@ -22,7 +22,7 @@ #include "webrtc/common_audio/channel_buffer.h" #include "webrtc/common_audio/wav_file.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" namespace webrtc { @@ -43,28 +43,57 @@ class RawFile final { RTC_DISALLOW_COPY_AND_ASSIGN(RawFile); }; +// Reads ChannelBuffers from a provided WavReader. +class ChannelBufferWavReader final { + public: + explicit ChannelBufferWavReader(rtc::scoped_ptr<WavReader> file); + + // Reads data from the file according to the |buffer| format. Returns false if + // a full buffer can't be read from the file. + bool Read(ChannelBuffer<float>* buffer); + + private: + rtc::scoped_ptr<WavReader> file_; + std::vector<float> interleaved_; + + RTC_DISALLOW_COPY_AND_ASSIGN(ChannelBufferWavReader); +}; + +// Writes ChannelBuffers to a provided WavWriter. +class ChannelBufferWavWriter final { + public: + explicit ChannelBufferWavWriter(rtc::scoped_ptr<WavWriter> file); + void Write(const ChannelBuffer<float>& buffer); + + private: + rtc::scoped_ptr<WavWriter> file_; + std::vector<float> interleaved_; + + RTC_DISALLOW_COPY_AND_ASSIGN(ChannelBufferWavWriter); +}; + void WriteIntData(const int16_t* data, size_t length, WavWriter* wav_file, RawFile* raw_file); void WriteFloatData(const float* const* data, - int samples_per_channel, - int num_channels, + size_t samples_per_channel, + size_t num_channels, WavWriter* wav_file, RawFile* raw_file); // Exits on failure; do not use in unit tests. FILE* OpenFile(const std::string& filename, const char* mode); -int SamplesFromRate(int rate); +size_t SamplesFromRate(int rate); void SetFrameSampleRate(AudioFrame* frame, int sample_rate_hz); template <typename T> void SetContainerFormat(int sample_rate_hz, - int num_channels, + size_t num_channels, AudioFrame* frame, rtc::scoped_ptr<ChannelBuffer<T> >* cb) { SetFrameSampleRate(frame, sample_rate_hz); @@ -72,14 +101,14 @@ void SetContainerFormat(int sample_rate_hz, cb->reset(new ChannelBuffer<T>(frame->samples_per_channel_, num_channels)); } -AudioProcessing::ChannelLayout LayoutFromChannels(int num_channels); +AudioProcessing::ChannelLayout LayoutFromChannels(size_t num_channels); template <typename T> -float ComputeSNR(const T* ref, const T* test, int length, float* variance) { +float ComputeSNR(const T* ref, const T* test, size_t length, float* variance) { float mse = 0; float mean = 0; *variance = 0; - for (int i = 0; i < length; ++i) { + for (size_t i = 0; i < length; ++i) { T error = ref[i] - test[i]; mse += error * error; *variance += ref[i] * ref[i]; @@ -118,6 +147,9 @@ std::vector<T> ParseList(const std::string& to_parse) { std::vector<Point> ParseArrayGeometry(const std::string& mic_positions, size_t num_mics); +// Same as above, but without the num_mics check for when it isn't available. +std::vector<Point> ParseArrayGeometry(const std::string& mic_positions); + } // namespace webrtc #endif // WEBRTC_MODULES_AUDIO_PROCESSING_TEST_TEST_UTILS_H_ diff --git a/webrtc/modules/audio_processing/test/unpack.cc b/webrtc/modules/audio_processing/test/unpack.cc index 24578e240c..8b2b082f97 100644 --- a/webrtc/modules/audio_processing/test/unpack.cc +++ b/webrtc/modules/audio_processing/test/unpack.cc @@ -17,6 +17,7 @@ #include "gflags/gflags.h" #include "webrtc/audio_processing/debug.pb.h" +#include "webrtc/base/format_macros.h" #include "webrtc/base/scoped_ptr.h" #include "webrtc/modules/audio_processing/test/protobuf_utils.h" #include "webrtc/modules/audio_processing/test/test_utils.h" @@ -76,12 +77,12 @@ int do_main(int argc, char* argv[]) { Event event_msg; int frame_count = 0; - int reverse_samples_per_channel = 0; - int input_samples_per_channel = 0; - int output_samples_per_channel = 0; - int num_reverse_channels = 0; - int num_input_channels = 0; - int num_output_channels = 0; + size_t reverse_samples_per_channel = 0; + size_t input_samples_per_channel = 0; + size_t output_samples_per_channel = 0; + size_t num_reverse_channels = 0; + size_t num_input_channels = 0; + size_t num_output_channels = 0; rtc::scoped_ptr<WavWriter> reverse_wav_file; rtc::scoped_ptr<WavWriter> input_wav_file; rtc::scoped_ptr<WavWriter> output_wav_file; @@ -117,7 +118,7 @@ int do_main(int argc, char* argv[]) { } rtc::scoped_ptr<const float* []> data( new const float* [num_reverse_channels]); - for (int i = 0; i < num_reverse_channels; ++i) { + for (size_t i = 0; i < num_reverse_channels; ++i) { data[i] = reinterpret_cast<const float*>(msg.channel(i).data()); } WriteFloatData(data.get(), @@ -148,7 +149,7 @@ int do_main(int argc, char* argv[]) { } rtc::scoped_ptr<const float* []> data( new const float* [num_input_channels]); - for (int i = 0; i < num_input_channels; ++i) { + for (size_t i = 0; i < num_input_channels; ++i) { data[i] = reinterpret_cast<const float*>(msg.input_channel(i).data()); } WriteFloatData(data.get(), @@ -172,7 +173,7 @@ int do_main(int argc, char* argv[]) { } rtc::scoped_ptr<const float* []> data( new const float* [num_output_channels]); - for (int i = 0; i < num_output_channels; ++i) { + for (size_t i = 0; i < num_output_channels; ++i) { data[i] = reinterpret_cast<const float*>(msg.output_channel(i).data()); } @@ -268,11 +269,14 @@ int do_main(int argc, char* argv[]) { " Reverse sample rate: %d\n", reverse_sample_rate); num_input_channels = msg.num_input_channels(); - fprintf(settings_file, " Input channels: %d\n", num_input_channels); + fprintf(settings_file, " Input channels: %" PRIuS "\n", + num_input_channels); num_output_channels = msg.num_output_channels(); - fprintf(settings_file, " Output channels: %d\n", num_output_channels); + fprintf(settings_file, " Output channels: %" PRIuS "\n", + num_output_channels); num_reverse_channels = msg.num_reverse_channels(); - fprintf(settings_file, " Reverse channels: %d\n", num_reverse_channels); + fprintf(settings_file, " Reverse channels: %" PRIuS "\n", + num_reverse_channels); fprintf(settings_file, "\n"); @@ -283,9 +287,12 @@ int do_main(int argc, char* argv[]) { output_sample_rate = input_sample_rate; } - reverse_samples_per_channel = reverse_sample_rate / 100; - input_samples_per_channel = input_sample_rate / 100; - output_samples_per_channel = output_sample_rate / 100; + reverse_samples_per_channel = + static_cast<size_t>(reverse_sample_rate / 100); + input_samples_per_channel = + static_cast<size_t>(input_sample_rate / 100); + output_samples_per_channel = + static_cast<size_t>(output_sample_rate / 100); if (!FLAGS_raw) { // The WAV files need to be reset every time, because they cant change diff --git a/webrtc/modules/audio_processing/transient/file_utils_unittest.cc b/webrtc/modules/audio_processing/transient/file_utils_unittest.cc index 7a035d2b41..7fb7d2d6a9 100644 --- a/webrtc/modules/audio_processing/transient/file_utils_unittest.cc +++ b/webrtc/modules/audio_processing/transient/file_utils_unittest.cc @@ -17,7 +17,6 @@ #include "webrtc/base/scoped_ptr.h" #include "webrtc/system_wrappers/include/file_wrapper.h" #include "webrtc/test/testsupport/fileutils.h" -#include "webrtc/test/testsupport/gtest_disable.h" #include "webrtc/typedefs.h" namespace webrtc { @@ -59,7 +58,12 @@ class TransientFileUtilsTest: public ::testing::Test { const std::string kTestFileNamef; }; -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ConvertByteArrayToFloat)) { +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertByteArrayToFloat DISABLED_ConvertByteArrayToFloat +#else +#define MAYBE_ConvertByteArrayToFloat ConvertByteArrayToFloat +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertByteArrayToFloat) { float value = 0.0; EXPECT_EQ(0, ConvertByteArrayToFloat(kPiBytesf, &value)); @@ -72,7 +76,12 @@ TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ConvertByteArrayToFloat)) { EXPECT_FLOAT_EQ(kAvogadro, value); } -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ConvertByteArrayToDouble)) { +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertByteArrayToDouble DISABLED_ConvertByteArrayToDouble +#else +#define MAYBE_ConvertByteArrayToDouble ConvertByteArrayToDouble +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertByteArrayToDouble) { double value = 0.0; EXPECT_EQ(0, ConvertByteArrayToDouble(kPiBytes, &value)); @@ -85,7 +94,12 @@ TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ConvertByteArrayToDouble)) { EXPECT_DOUBLE_EQ(kAvogadro, value); } -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ConvertFloatToByteArray)) { +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertFloatToByteArray DISABLED_ConvertFloatToByteArray +#else +#define MAYBE_ConvertFloatToByteArray ConvertFloatToByteArray +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertFloatToByteArray) { rtc::scoped_ptr<uint8_t[]> bytes(new uint8_t[4]); EXPECT_EQ(0, ConvertFloatToByteArray(kPi, bytes.get())); @@ -98,7 +112,12 @@ TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ConvertFloatToByteArray)) { EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytesf, 4)); } -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ConvertDoubleToByteArray)) { +#if defined(WEBRTC_IOS) +#define MAYBE_ConvertDoubleToByteArray DISABLED_ConvertDoubleToByteArray +#else +#define MAYBE_ConvertDoubleToByteArray ConvertDoubleToByteArray +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ConvertDoubleToByteArray) { rtc::scoped_ptr<uint8_t[]> bytes(new uint8_t[8]); EXPECT_EQ(0, ConvertDoubleToByteArray(kPi, bytes.get())); @@ -111,7 +130,12 @@ TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ConvertDoubleToByteArray)) { EXPECT_EQ(0, memcmp(bytes.get(), kAvogadroBytes, 8)); } -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ReadInt16BufferFromFile)) { +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16BufferFromFile DISABLED_ReadInt16BufferFromFile +#else +#define MAYBE_ReadInt16BufferFromFile ReadInt16BufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16BufferFromFile) { std::string test_filename = kTestFileName; rtc::scoped_ptr<FileWrapper> file(FileWrapper::Create()); @@ -149,8 +173,13 @@ TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ReadInt16BufferFromFile)) { EXPECT_EQ(17631, buffer[kBufferLength - 1]); } -TEST_F(TransientFileUtilsTest, - DISABLED_ON_IOS(ReadInt16FromFileToFloatBuffer)) { +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16FromFileToFloatBuffer \ + DISABLED_ReadInt16FromFileToFloatBuffer +#else +#define MAYBE_ReadInt16FromFileToFloatBuffer ReadInt16FromFileToFloatBuffer +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16FromFileToFloatBuffer) { std::string test_filename = kTestFileName; rtc::scoped_ptr<FileWrapper> file(FileWrapper::Create()); @@ -191,8 +220,13 @@ TEST_F(TransientFileUtilsTest, EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); } -TEST_F(TransientFileUtilsTest, - DISABLED_ON_IOS(ReadInt16FromFileToDoubleBuffer)) { +#if defined(WEBRTC_IOS) +#define MAYBE_ReadInt16FromFileToDoubleBuffer \ + DISABLED_ReadInt16FromFileToDoubleBuffer +#else +#define MAYBE_ReadInt16FromFileToDoubleBuffer ReadInt16FromFileToDoubleBuffer +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadInt16FromFileToDoubleBuffer) { std::string test_filename = kTestFileName; rtc::scoped_ptr<FileWrapper> file(FileWrapper::Create()); @@ -232,7 +266,12 @@ TEST_F(TransientFileUtilsTest, EXPECT_DOUBLE_EQ(17631, buffer[kBufferLength - 1]); } -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ReadFloatBufferFromFile)) { +#if defined(WEBRTC_IOS) +#define MAYBE_ReadFloatBufferFromFile DISABLED_ReadFloatBufferFromFile +#else +#define MAYBE_ReadFloatBufferFromFile ReadFloatBufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadFloatBufferFromFile) { std::string test_filename = kTestFileNamef; rtc::scoped_ptr<FileWrapper> file(FileWrapper::Create()); @@ -269,7 +308,12 @@ TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ReadFloatBufferFromFile)) { EXPECT_FLOAT_EQ(kAvogadro, buffer[2]); } -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ReadDoubleBufferFromFile)) { +#if defined(WEBRTC_IOS) +#define MAYBE_ReadDoubleBufferFromFile DISABLED_ReadDoubleBufferFromFile +#else +#define MAYBE_ReadDoubleBufferFromFile ReadDoubleBufferFromFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ReadDoubleBufferFromFile) { std::string test_filename = kTestFileName; rtc::scoped_ptr<FileWrapper> file(FileWrapper::Create()); @@ -306,7 +350,12 @@ TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ReadDoubleBufferFromFile)) { EXPECT_DOUBLE_EQ(kAvogadro, buffer[2]); } -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(WriteInt16BufferToFile)) { +#if defined(WEBRTC_IOS) +#define MAYBE_WriteInt16BufferToFile DISABLED_WriteInt16BufferToFile +#else +#define MAYBE_WriteInt16BufferToFile WriteInt16BufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteInt16BufferToFile) { rtc::scoped_ptr<FileWrapper> file(FileWrapper::Create()); std::string kOutFileName = test::TempFilename(test::OutputPath(), @@ -348,7 +397,12 @@ TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(WriteInt16BufferToFile)) { kBufferLength * sizeof(written_buffer[0]))); } -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(WriteFloatBufferToFile)) { +#if defined(WEBRTC_IOS) +#define MAYBE_WriteFloatBufferToFile DISABLED_WriteFloatBufferToFile +#else +#define MAYBE_WriteFloatBufferToFile WriteFloatBufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteFloatBufferToFile) { rtc::scoped_ptr<FileWrapper> file(FileWrapper::Create()); std::string kOutFileName = test::TempFilename(test::OutputPath(), @@ -390,7 +444,12 @@ TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(WriteFloatBufferToFile)) { kBufferLength * sizeof(written_buffer[0]))); } -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(WriteDoubleBufferToFile)) { +#if defined(WEBRTC_IOS) +#define MAYBE_WriteDoubleBufferToFile DISABLED_WriteDoubleBufferToFile +#else +#define MAYBE_WriteDoubleBufferToFile WriteDoubleBufferToFile +#endif +TEST_F(TransientFileUtilsTest, MAYBE_WriteDoubleBufferToFile) { rtc::scoped_ptr<FileWrapper> file(FileWrapper::Create()); std::string kOutFileName = test::TempFilename(test::OutputPath(), @@ -432,7 +491,12 @@ TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(WriteDoubleBufferToFile)) { kBufferLength * sizeof(written_buffer[0]))); } -TEST_F(TransientFileUtilsTest, DISABLED_ON_IOS(ExpectedErrorReturnValues)) { +#if defined(WEBRTC_IOS) +#define MAYBE_ExpectedErrorReturnValues DISABLED_ExpectedErrorReturnValues +#else +#define MAYBE_ExpectedErrorReturnValues ExpectedErrorReturnValues +#endif +TEST_F(TransientFileUtilsTest, MAYBE_ExpectedErrorReturnValues) { std::string test_filename = kTestFileName; double value; diff --git a/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc b/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc index 6a70a3f92c..b60077510b 100644 --- a/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc +++ b/webrtc/modules/audio_processing/transient/transient_detector_unittest.cc @@ -19,8 +19,7 @@ #include "webrtc/modules/audio_processing/transient/file_utils.h" #include "webrtc/system_wrappers/include/file_wrapper.h" #include "webrtc/test/testsupport/fileutils.h" -#include "webrtc/test/testsupport/gtest_disable.h" - #include "webrtc/typedefs.h" +#include "webrtc/typedefs.h" namespace webrtc { @@ -37,7 +36,11 @@ static const size_t kNumberOfSampleRates = // The files contain all the results in double precision (Little endian). // The audio files used with different sample rates are stored in the same // directory. -TEST(TransientDetectorTest, DISABLED_ON_IOS(CorrectnessBasedOnFiles)) { +#if defined(WEBRTC_IOS) +TEST(TransientDetectorTest, DISABLED_CorrectnessBasedOnFiles) { +#else +TEST(TransientDetectorTest, CorrectnessBasedOnFiles) { +#endif for (size_t i = 0; i < kNumberOfSampleRates; ++i) { int sample_rate_hz = kSampleRatesHz[i]; diff --git a/webrtc/modules/audio_processing/transient/transient_suppression_test.cc b/webrtc/modules/audio_processing/transient/transient_suppression_test.cc index 506abaf203..b7b7595abf 100644 --- a/webrtc/modules/audio_processing/transient/transient_suppression_test.cc +++ b/webrtc/modules/audio_processing/transient/transient_suppression_test.cc @@ -19,7 +19,7 @@ #include "webrtc/base/scoped_ptr.h" #include "webrtc/common_audio/include/audio_util.h" #include "webrtc/modules/audio_processing/agc/agc.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" #include "webrtc/test/testsupport/fileutils.h" #include "webrtc/typedefs.h" diff --git a/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc b/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc index 7c99f4f161..e4e9048f88 100644 --- a/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc +++ b/webrtc/modules/audio_processing/transient/wpd_tree_unittest.cc @@ -19,7 +19,6 @@ #include "webrtc/modules/audio_processing/transient/file_utils.h" #include "webrtc/system_wrappers/include/file_wrapper.h" #include "webrtc/test/testsupport/fileutils.h" -#include "webrtc/test/testsupport/gtest_disable.h" namespace webrtc { @@ -69,7 +68,11 @@ TEST(WPDTreeTest, Construction) { // It also writes the results in its own set of files in the out directory. // Matlab and output files contain all the results in double precision (Little // endian) appended. -TEST(WPDTreeTest, DISABLED_ON_IOS(CorrectnessBasedOnMatlabFiles)) { +#if defined(WEBRTC_IOS) +TEST(WPDTreeTest, DISABLED_CorrectnessBasedOnMatlabFiles) { +#else +TEST(WPDTreeTest, CorrectnessBasedOnMatlabFiles) { +#endif // 10 ms at 16000 Hz. const size_t kTestBufferSize = 160; const int kLevels = 3; diff --git a/webrtc/modules/audio_processing/typing_detection.h b/webrtc/modules/audio_processing/typing_detection.h index 5fa6456e9e..40608f885d 100644 --- a/webrtc/modules/audio_processing/typing_detection.h +++ b/webrtc/modules/audio_processing/typing_detection.h @@ -11,7 +11,7 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_ -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" #include "webrtc/typedefs.h" namespace webrtc { diff --git a/webrtc/modules/audio_processing/vad/pitch_based_vad.cc b/webrtc/modules/audio_processing/vad/pitch_based_vad.cc index 39ec37e6ec..fce144de6b 100644 --- a/webrtc/modules/audio_processing/vad/pitch_based_vad.cc +++ b/webrtc/modules/audio_processing/vad/pitch_based_vad.cc @@ -18,7 +18,7 @@ #include "webrtc/modules/audio_processing/vad/common.h" #include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h" #include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" namespace webrtc { diff --git a/webrtc/modules/audio_processing/vad/standalone_vad.cc b/webrtc/modules/audio_processing/vad/standalone_vad.cc index 468b8ff3f0..1209526a92 100644 --- a/webrtc/modules/audio_processing/vad/standalone_vad.cc +++ b/webrtc/modules/audio_processing/vad/standalone_vad.cc @@ -12,8 +12,8 @@ #include <assert.h> -#include "webrtc/modules/interface/module_common_types.h" -#include "webrtc/modules/utility/interface/audio_frame_operations.h" +#include "webrtc/modules/include/module_common_types.h" +#include "webrtc/modules/utility/include/audio_frame_operations.h" #include "webrtc/typedefs.h" namespace webrtc { diff --git a/webrtc/modules/audio_processing/vad/standalone_vad_unittest.cc b/webrtc/modules/audio_processing/vad/standalone_vad_unittest.cc index 942008e733..1d1dcc7066 100644 --- a/webrtc/modules/audio_processing/vad/standalone_vad_unittest.cc +++ b/webrtc/modules/audio_processing/vad/standalone_vad_unittest.cc @@ -14,9 +14,8 @@ #include "testing/gtest/include/gtest/gtest.h" #include "webrtc/base/scoped_ptr.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" #include "webrtc/test/testsupport/fileutils.h" -#include "webrtc/test/testsupport/gtest_disable.h" namespace webrtc { @@ -55,7 +54,11 @@ TEST(StandaloneVadTest, Api) { EXPECT_EQ(kMode, vad->mode()); } -TEST(StandaloneVadTest, DISABLED_ON_IOS(ActivityDetection)) { +#if defined(WEBRTC_IOS) +TEST(StandaloneVadTest, DISABLED_ActivityDetection) { +#else +TEST(StandaloneVadTest, ActivityDetection) { +#endif rtc::scoped_ptr<StandaloneVad> vad(StandaloneVad::Create()); const size_t kDataLength = kLength10Ms; int16_t data[kDataLength] = {0}; diff --git a/webrtc/modules/audio_processing/vad/vad_audio_proc.cc b/webrtc/modules/audio_processing/vad/vad_audio_proc.cc index 8535d1ff57..1a595597b6 100644 --- a/webrtc/modules/audio_processing/vad/vad_audio_proc.cc +++ b/webrtc/modules/audio_processing/vad/vad_audio_proc.cc @@ -23,7 +23,7 @@ extern "C" { #include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" #include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h" } -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" namespace webrtc { diff --git a/webrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc b/webrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc index f509af476f..a8a4ead2e3 100644 --- a/webrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc +++ b/webrtc/modules/audio_processing/vad/vad_audio_proc_unittest.cc @@ -21,7 +21,7 @@ #include "testing/gtest/include/gtest/gtest.h" #include "webrtc/modules/audio_processing/vad/common.h" -#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/include/module_common_types.h" #include "webrtc/test/testsupport/fileutils.h" namespace webrtc { diff --git a/webrtc/modules/audio_processing/vad/voice_activity_detector.cc b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc index ef56a3574c..fc9d103918 100644 --- a/webrtc/modules/audio_processing/vad/voice_activity_detector.cc +++ b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc @@ -18,7 +18,7 @@ namespace webrtc { namespace { const size_t kMaxLength = 320; -const int kNumChannels = 1; +const size_t kNumChannels = 1; const double kDefaultVoiceValue = 1.0; const double kNeutralProbability = 0.5; diff --git a/webrtc/modules/audio_processing/voice_detection_impl.cc b/webrtc/modules/audio_processing/voice_detection_impl.cc index 374189e709..22d218c371 100644 --- a/webrtc/modules/audio_processing/voice_detection_impl.cc +++ b/webrtc/modules/audio_processing/voice_detection_impl.cc @@ -10,61 +10,61 @@ #include "webrtc/modules/audio_processing/voice_detection_impl.h" -#include <assert.h> - #include "webrtc/common_audio/vad/include/webrtc_vad.h" #include "webrtc/modules/audio_processing/audio_buffer.h" -#include "webrtc/system_wrappers/include/critical_section_wrapper.h" namespace webrtc { - -typedef VadInst Handle; - -namespace { -int MapSetting(VoiceDetection::Likelihood likelihood) { - switch (likelihood) { - case VoiceDetection::kVeryLowLikelihood: - return 3; - case VoiceDetection::kLowLikelihood: - return 2; - case VoiceDetection::kModerateLikelihood: - return 1; - case VoiceDetection::kHighLikelihood: - return 0; +class VoiceDetectionImpl::Vad { + public: + Vad() { + state_ = WebRtcVad_Create(); + RTC_CHECK(state_); + int error = WebRtcVad_Init(state_); + RTC_DCHECK_EQ(0, error); + } + ~Vad() { + WebRtcVad_Free(state_); } - assert(false); - return -1; + VadInst* state() { return state_; } + private: + VadInst* state_ = nullptr; + RTC_DISALLOW_COPY_AND_ASSIGN(Vad); +}; + +VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit) + : crit_(crit) { + RTC_DCHECK(crit); } -} // namespace - -VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm, - CriticalSectionWrapper* crit) - : ProcessingComponent(), - apm_(apm), - crit_(crit), - stream_has_voice_(false), - using_external_vad_(false), - likelihood_(kLowLikelihood), - frame_size_ms_(10), - frame_size_samples_(0) {} VoiceDetectionImpl::~VoiceDetectionImpl() {} -int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { - if (!is_component_enabled()) { - return apm_->kNoError; +void VoiceDetectionImpl::Initialize(int sample_rate_hz) { + rtc::CritScope cs(crit_); + sample_rate_hz_ = sample_rate_hz; + rtc::scoped_ptr<Vad> new_vad; + if (enabled_) { + new_vad.reset(new Vad()); } + vad_.swap(new_vad); + using_external_vad_ = false; + frame_size_samples_ = + static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000; + set_likelihood(likelihood_); +} +void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { + rtc::CritScope cs(crit_); + if (!enabled_) { + return; + } if (using_external_vad_) { using_external_vad_ = false; - return apm_->kNoError; + return; } - assert(audio->num_frames_per_band() <= 160); + RTC_DCHECK_GE(160u, audio->num_frames_per_band()); // TODO(ajm): concatenate data in frame buffer here. - - int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)), - apm_->proc_split_sample_rate_hz(), + int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_, audio->mixed_low_pass_data(), frame_size_samples_); if (vad_ret == 0) { @@ -74,103 +74,81 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { stream_has_voice_ = true; audio->set_activity(AudioFrame::kVadActive); } else { - return apm_->kUnspecifiedError; + RTC_NOTREACHED(); } - - return apm_->kNoError; } int VoiceDetectionImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(crit_); - return EnableComponent(enable); + rtc::CritScope cs(crit_); + if (enabled_ != enable) { + enabled_ = enable; + Initialize(sample_rate_hz_); + } + return AudioProcessing::kNoError; } bool VoiceDetectionImpl::is_enabled() const { - return is_component_enabled(); + rtc::CritScope cs(crit_); + return enabled_; } int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { + rtc::CritScope cs(crit_); using_external_vad_ = true; stream_has_voice_ = has_voice; - return apm_->kNoError; + return AudioProcessing::kNoError; } bool VoiceDetectionImpl::stream_has_voice() const { + rtc::CritScope cs(crit_); // TODO(ajm): enable this assertion? //assert(using_external_vad_ || is_component_enabled()); return stream_has_voice_; } int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { - CriticalSectionScoped crit_scoped(crit_); - if (MapSetting(likelihood) == -1) { - return apm_->kBadParameterError; - } - + rtc::CritScope cs(crit_); likelihood_ = likelihood; - return Configure(); + if (enabled_) { + int mode = 2; + switch (likelihood) { + case VoiceDetection::kVeryLowLikelihood: + mode = 3; + break; + case VoiceDetection::kLowLikelihood: + mode = 2; + break; + case VoiceDetection::kModerateLikelihood: + mode = 1; + break; + case VoiceDetection::kHighLikelihood: + mode = 0; + break; + default: + RTC_NOTREACHED(); + break; + } + int error = WebRtcVad_set_mode(vad_->state(), mode); + RTC_DCHECK_EQ(0, error); + } + return AudioProcessing::kNoError; } VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { + rtc::CritScope cs(crit_); return likelihood_; } int VoiceDetectionImpl::set_frame_size_ms(int size) { - CriticalSectionScoped crit_scoped(crit_); - assert(size == 10); // TODO(ajm): remove when supported. - if (size != 10 && - size != 20 && - size != 30) { - return apm_->kBadParameterError; - } - + rtc::CritScope cs(crit_); + RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported. frame_size_ms_ = size; - - return Initialize(); + Initialize(sample_rate_hz_); + return AudioProcessing::kNoError; } int VoiceDetectionImpl::frame_size_ms() const { + rtc::CritScope cs(crit_); return frame_size_ms_; } - -int VoiceDetectionImpl::Initialize() { - int err = ProcessingComponent::Initialize(); - if (err != apm_->kNoError || !is_component_enabled()) { - return err; - } - - using_external_vad_ = false; - frame_size_samples_ = static_cast<size_t>( - frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000); - // TODO(ajm): intialize frame buffer here. - - return apm_->kNoError; -} - -void* VoiceDetectionImpl::CreateHandle() const { - return WebRtcVad_Create(); -} - -void VoiceDetectionImpl::DestroyHandle(void* handle) const { - WebRtcVad_Free(static_cast<Handle*>(handle)); -} - -int VoiceDetectionImpl::InitializeHandle(void* handle) const { - return WebRtcVad_Init(static_cast<Handle*>(handle)); -} - -int VoiceDetectionImpl::ConfigureHandle(void* handle) const { - return WebRtcVad_set_mode(static_cast<Handle*>(handle), - MapSetting(likelihood_)); -} - -int VoiceDetectionImpl::num_handles_required() const { - return 1; -} - -int VoiceDetectionImpl::GetHandleError(void* handle) const { - // The VAD has no get_error() function. - assert(handle != NULL); - return apm_->kUnspecifiedError; -} } // namespace webrtc diff --git a/webrtc/modules/audio_processing/voice_detection_impl.h b/webrtc/modules/audio_processing/voice_detection_impl.h index b18808316e..0d6d8cf14a 100644 --- a/webrtc/modules/audio_processing/voice_detection_impl.h +++ b/webrtc/modules/audio_processing/voice_detection_impl.h @@ -11,31 +11,27 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_ +#include "webrtc/base/constructormagic.h" +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/scoped_ptr.h" #include "webrtc/modules/audio_processing/include/audio_processing.h" -#include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { class AudioBuffer; -class CriticalSectionWrapper; -class VoiceDetectionImpl : public VoiceDetection, - public ProcessingComponent { +class VoiceDetectionImpl : public VoiceDetection { public: - VoiceDetectionImpl(const AudioProcessing* apm, CriticalSectionWrapper* crit); - virtual ~VoiceDetectionImpl(); + explicit VoiceDetectionImpl(rtc::CriticalSection* crit); + ~VoiceDetectionImpl() override; - int ProcessCaptureAudio(AudioBuffer* audio); + // TODO(peah): Fold into ctor, once public API is removed. + void Initialize(int sample_rate_hz); + void ProcessCaptureAudio(AudioBuffer* audio); // VoiceDetection implementation. - bool is_enabled() const override; - - // ProcessingComponent implementation. - int Initialize() override; - - private: - // VoiceDetection implementation. int Enable(bool enable) override; + bool is_enabled() const override; int set_stream_has_voice(bool has_voice) override; bool stream_has_voice() const override; int set_likelihood(Likelihood likelihood) override; @@ -43,21 +39,18 @@ class VoiceDetectionImpl : public VoiceDetection, int set_frame_size_ms(int size) override; int frame_size_ms() const override; - // ProcessingComponent implementation. - void* CreateHandle() const override; - int InitializeHandle(void* handle) const override; - int ConfigureHandle(void* handle) const override; - void DestroyHandle(void* handle) const override; - int num_handles_required() const override; - int GetHandleError(void* handle) const override; - - const AudioProcessing* apm_; - CriticalSectionWrapper* crit_; - bool stream_has_voice_; - bool using_external_vad_; - Likelihood likelihood_; - int frame_size_ms_; - size_t frame_size_samples_; + private: + class Vad; + rtc::CriticalSection* const crit_; + bool enabled_ GUARDED_BY(crit_) = false; + bool stream_has_voice_ GUARDED_BY(crit_) = false; + bool using_external_vad_ GUARDED_BY(crit_) = false; + Likelihood likelihood_ GUARDED_BY(crit_) = kLowLikelihood; + int frame_size_ms_ GUARDED_BY(crit_) = 10; + size_t frame_size_samples_ GUARDED_BY(crit_) = 0; + int sample_rate_hz_ GUARDED_BY(crit_) = 0; + rtc::scoped_ptr<Vad> vad_ GUARDED_BY(crit_); + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(VoiceDetectionImpl); }; } // namespace webrtc |