diff options
Diffstat (limited to 'src/modules/audio_processing/agc/digital_agc.c')
-rw-r--r-- | src/modules/audio_processing/agc/digital_agc.c | 798 |
1 files changed, 798 insertions, 0 deletions
diff --git a/src/modules/audio_processing/agc/digital_agc.c b/src/modules/audio_processing/agc/digital_agc.c new file mode 100644 index 0000000000..3b4b39b9cc --- /dev/null +++ b/src/modules/audio_processing/agc/digital_agc.c @@ -0,0 +1,798 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* digital_agc.c + * + */ + +#include "digital_agc.h" + +#include <assert.h> +#include <string.h> +#ifdef AGC_DEBUG +#include <stdio.h> +#endif + +#include "gain_control.h" + +// To generate the gaintable, copy&paste the following lines to a Matlab window: +// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1; +// zeros = 0:31; lvl = 2.^(1-zeros); +// A = -10*log10(lvl) * (CompRatio - 1) / CompRatio; +// B = MaxGain - MinGain; +// gains = round(2^16*10.^(0.05 * (MinGain + B * ( log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / log(1/(1+exp(Knee*B)))))); +// fprintf(1, '\t%i, %i, %i, %i,\n', gains); +// % Matlab code for plotting the gain and input/output level characteristic (copy/paste the following 3 lines): +// in = 10*log10(lvl); out = 20*log10(gains/65536); +// subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input (dB)'); ylabel('Gain (dB)'); +// subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on; xlabel('Input (dB)'); ylabel('Output (dB)'); +// zoom on; + +// Generator table for y=log2(1+e^x) in Q8. +enum { kGenFuncTableSize = 128 }; +static const WebRtc_UWord16 kGenFuncTable[kGenFuncTableSize] = { + 256, 485, 786, 1126, 1484, 1849, 2217, 2586, + 2955, 3324, 3693, 4063, 4432, 4801, 5171, 5540, + 5909, 6279, 6648, 7017, 7387, 7756, 8125, 8495, + 8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449, + 11819, 12188, 12557, 12927, 13296, 13665, 14035, 14404, + 14773, 15143, 15512, 15881, 16251, 16620, 16989, 17359, + 17728, 18097, 18466, 18836, 19205, 19574, 19944, 20313, + 20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268, + 23637, 24006, 24376, 24745, 25114, 25484, 25853, 26222, + 26592, 26961, 27330, 27700, 28069, 28438, 28808, 29177, + 29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132, + 32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086, + 35456, 35825, 36194, 36564, 36933, 37302, 37672, 38041, + 38410, 38780, 39149, 39518, 39888, 40257, 40626, 40996, + 41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950, + 44320, 44689, 45058, 45428, 45797, 46166, 46536, 46905 +}; + +static const WebRtc_Word16 kAvgDecayTime = 250; // frames; < 3000 + +WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 + WebRtc_Word16 digCompGaindB, // Q0 + WebRtc_Word16 targetLevelDbfs,// Q0 + WebRtc_UWord8 limiterEnable, + WebRtc_Word16 analogTarget) // Q0 +{ + // This function generates the compressor gain table used in the fixed digital part. + WebRtc_UWord32 tmpU32no1, tmpU32no2, absInLevel, logApprox; + WebRtc_Word32 inLevel, limiterLvl; + WebRtc_Word32 tmp32, tmp32no1, tmp32no2, numFIX, den, y32; + const WebRtc_UWord16 kLog10 = 54426; // log2(10) in Q14 + const WebRtc_UWord16 kLog10_2 = 49321; // 10*log10(2) in Q14 + const WebRtc_UWord16 kLogE_1 = 23637; // log2(e) in Q14 + WebRtc_UWord16 constMaxGain; + WebRtc_UWord16 tmpU16, intPart, fracPart; + const WebRtc_Word16 kCompRatio = 3; + const WebRtc_Word16 kSoftLimiterLeft = 1; + WebRtc_Word16 limiterOffset = 0; // Limiter offset + WebRtc_Word16 limiterIdx, limiterLvlX; + WebRtc_Word16 constLinApprox, zeroGainLvl, maxGain, diffGain; + WebRtc_Word16 i, tmp16, tmp16no1; + int zeros, zerosScale; + + // Constants +// kLogE_1 = 23637; // log2(e) in Q14 +// kLog10 = 54426; // log2(10) in Q14 +// kLog10_2 = 49321; // 10*log10(2) in Q14 + + // Calculate maximum digital gain and zero gain level + tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB - analogTarget, kCompRatio - 1); + tmp16no1 = analogTarget - targetLevelDbfs; + tmp16no1 += WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs)); + tmp32no1 = WEBRTC_SPL_MUL_16_16(maxGain, kCompRatio); + zeroGainLvl = digCompGaindB; + zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1), + kCompRatio - 1); + if ((digCompGaindB <= analogTarget) && (limiterEnable)) + { + zeroGainLvl += (analogTarget - digCompGaindB + kSoftLimiterLeft); + limiterOffset = 0; + } + + // Calculate the difference between maximum gain and gain at 0dB0v: + // diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio + // = (compRatio-1)*digCompGaindB/compRatio + tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB, kCompRatio - 1); + diffGain = WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + if (diffGain < 0 || diffGain >= kGenFuncTableSize) + { + assert(0); + return -1; + } + + // Calculate the limiter level and index: + // limiterLvlX = analogTarget - limiterOffset + // limiterLvl = targetLevelDbfs + limiterOffset/compRatio + limiterLvlX = analogTarget - limiterOffset; + limiterIdx = 2 + + WebRtcSpl_DivW32W16ResW16(WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)limiterLvlX, 13), + WEBRTC_SPL_RSHIFT_U16(kLog10_2, 1)); + tmp16no1 = WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio); + limiterLvl = targetLevelDbfs + tmp16no1; + + // Calculate (through table lookup): + // constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8) + constMaxGain = kGenFuncTable[diffGain]; // in Q8 + + // Calculate a parameter used to approximate the fractional part of 2^x with a + // piecewise linear function in Q14: + // constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14); + constLinApprox = 22817; // in Q14 + + // Calculate a denominator used in the exponential part to convert from dB to linear scale: + // den = 20*constMaxGain (in Q8) + den = WEBRTC_SPL_MUL_16_U16(20, constMaxGain); // in Q8 + + for (i = 0; i < 32; i++) + { + // Calculate scaled input level (compressor): + // inLevel = fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio) + tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(kCompRatio - 1, i - 1); // Q0 + tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14 + inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14 + + // Calculate diffGain-inLevel, to map using the genFuncTable + inLevel = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)diffGain, 14) - inLevel; // Q14 + + // Make calculations on abs(inLevel) and compensate for the sign afterwards. + absInLevel = (WebRtc_UWord32)WEBRTC_SPL_ABS_W32(inLevel); // Q14 + + // LUT with interpolation + intPart = (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_U32(absInLevel, 14); + fracPart = (WebRtc_UWord16)(absInLevel & 0x00003FFF); // extract the fractional part + tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8 + tmpU32no1 = WEBRTC_SPL_UMUL_16_16(tmpU16, fracPart); // Q22 + tmpU32no1 += WEBRTC_SPL_LSHIFT_U32((WebRtc_UWord32)kGenFuncTable[intPart], 14); // Q22 + logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 8); // Q14 + // Compensate for negative exponent using the relation: + // log2(1 + 2^-x) = log2(1 + 2^x) - x + if (inLevel < 0) + { + zeros = WebRtcSpl_NormU32(absInLevel); + zerosScale = 0; + if (zeros < 15) + { + // Not enough space for multiplication + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(absInLevel, 15 - zeros); // Q(zeros-1) + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13) + if (zeros < 9) + { + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 9 - zeros); // Q(zeros+13) + zerosScale = 9 - zeros; + } else + { + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, zeros - 9); // Q22 + } + } else + { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28 + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 6); // Q22 + } + logApprox = 0; + if (tmpU32no2 < tmpU32no1) + { + logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1 - tmpU32no2, 8 - zerosScale); //Q14 + } + } + numFIX = WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_U16(maxGain, constMaxGain), 6); // Q14 + numFIX -= WEBRTC_SPL_MUL_32_16((WebRtc_Word32)logApprox, diffGain); // Q14 + + // Calculate ratio + // Shift |numFIX| as much as possible. + // Ensure we avoid wrap-around in |den| as well. + if (numFIX > (den >> 8)) // |den| is Q8. + { + zeros = WebRtcSpl_NormW32(numFIX); + } else + { + zeros = WebRtcSpl_NormW32(den) + 8; + } + numFIX = WEBRTC_SPL_LSHIFT_W32(numFIX, zeros); // Q(14+zeros) + + // Shift den so we end up in Qy1 + tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 8); // Q(zeros) + if (numFIX < 0) + { + numFIX -= WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1); + } else + { + numFIX += WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1); + } + y32 = WEBRTC_SPL_DIV(numFIX, tmp32no1); // in Q14 + if (limiterEnable && (i < limiterIdx)) + { + tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14 + tmp32 -= WEBRTC_SPL_LSHIFT_W32(limiterLvl, 14); // Q14 + y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20); + } + if (y32 > 39000) + { + tmp32 = WEBRTC_SPL_MUL(y32 >> 1, kLog10) + 4096; // in Q27 + tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 13); // in Q14 + } else + { + tmp32 = WEBRTC_SPL_MUL(y32, kLog10) + 8192; // in Q28 + tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 14); // in Q14 + } + tmp32 += WEBRTC_SPL_LSHIFT_W32(16, 14); // in Q14 (Make sure final output is in Q16) + + // Calculate power + if (tmp32 > 0) + { + intPart = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 14); + fracPart = (WebRtc_UWord16)(tmp32 & 0x00003FFF); // in Q14 + if (WEBRTC_SPL_RSHIFT_W32(fracPart, 13)) + { + tmp16 = WEBRTC_SPL_LSHIFT_W16(2, 14) - constLinApprox; + tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - fracPart; + tmp32no2 = WEBRTC_SPL_MUL_32_16(tmp32no2, tmp16); + tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13); + tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - tmp32no2; + } else + { + tmp16 = constLinApprox - WEBRTC_SPL_LSHIFT_W16(1, 14); + tmp32no2 = WEBRTC_SPL_MUL_32_16(fracPart, tmp16); + tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13); + } + fracPart = (WebRtc_UWord16)tmp32no2; + gainTable[i] = WEBRTC_SPL_LSHIFT_W32(1, intPart) + + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14); + } else + { + gainTable[i] = 0; + } + } + + return 0; +} + +WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode) +{ + + if (agcMode == kAgcModeFixedDigital) + { + // start at minimum to find correct gain faster + stt->capacitorSlow = 0; + } else + { + // start out with 0 dB gain + stt->capacitorSlow = 134217728; // (WebRtc_Word32)(0.125f * 32768.0f * 32768.0f); + } + stt->capacitorFast = 0; + stt->gain = 65536; + stt->gatePrevious = 0; + stt->agcMode = agcMode; +#ifdef AGC_DEBUG + stt->frameCounter = 0; +#endif + + // initialize VADs + WebRtcAgc_InitVad(&stt->vadNearend); + WebRtcAgc_InitVad(&stt->vadFarend); + + return 0; +} + +WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_far, + WebRtc_Word16 nrSamples) +{ + // Check for valid pointer + if (&stt->vadFarend == NULL) + { + return -1; + } + + // VAD for far end + WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples); + + return 0; +} + +WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_near, + const WebRtc_Word16 *in_near_H, WebRtc_Word16 *out, + WebRtc_Word16 *out_H, WebRtc_UWord32 FS, + WebRtc_Word16 lowlevelSignal) +{ + // array for gains (one value per ms, incl start & end) + WebRtc_Word32 gains[11]; + + WebRtc_Word32 out_tmp, tmp32; + WebRtc_Word32 env[10]; + WebRtc_Word32 nrg, max_nrg; + WebRtc_Word32 cur_level; + WebRtc_Word32 gain32, delta; + WebRtc_Word16 logratio; + WebRtc_Word16 lower_thr, upper_thr; + WebRtc_Word16 zeros, zeros_fast, frac; + WebRtc_Word16 decay; + WebRtc_Word16 gate, gain_adj; + WebRtc_Word16 k, n; + WebRtc_Word16 L, L2; // samples/subframe + + // determine number of samples per ms + if (FS == 8000) + { + L = 8; + L2 = 3; + } else if (FS == 16000) + { + L = 16; + L2 = 4; + } else if (FS == 32000) + { + L = 16; + L2 = 4; + } else + { + return -1; + } + + // TODO(andrew): again, we don't need input and output pointers... + if (in_near != out) + { + // Only needed if they don't already point to the same place. + memcpy(out, in_near, 10 * L * sizeof(WebRtc_Word16)); + } + if (FS == 32000) + { + if (in_near_H != out_H) + { + memcpy(out_H, in_near_H, 10 * L * sizeof(WebRtc_Word16)); + } + } + // VAD for near end + logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out, L * 10); + + // Account for far end VAD + if (stt->vadFarend.counter > 10) + { + tmp32 = WEBRTC_SPL_MUL_16_16(3, logratio); + logratio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 - stt->vadFarend.logRatio, 2); + } + + // Determine decay factor depending on VAD + // upper_thr = 1.0f; + // lower_thr = 0.25f; + upper_thr = 1024; // Q10 + lower_thr = 0; // Q10 + if (logratio > upper_thr) + { + // decay = -2^17 / DecayTime; -> -65 + decay = -65; + } else if (logratio < lower_thr) + { + decay = 0; + } else + { + // decay = (WebRtc_Word16)(((lower_thr - logratio) + // * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10); + // SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65 + tmp32 = WEBRTC_SPL_MUL_16_16((lower_thr - logratio), 65); + decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 10); + } + + // adjust decay factor for long silence (detected as low standard deviation) + // This is only done in the adaptive modes + if (stt->agcMode != kAgcModeFixedDigital) + { + if (stt->vadNearend.stdLongTerm < 4000) + { + decay = 0; + } else if (stt->vadNearend.stdLongTerm < 8096) + { + // decay = (WebRtc_Word16)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12); + tmp32 = WEBRTC_SPL_MUL_16_16((stt->vadNearend.stdLongTerm - 4000), decay); + decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 12); + } + + if (lowlevelSignal != 0) + { + decay = 0; + } + } +#ifdef AGC_DEBUG + stt->frameCounter++; + fprintf(stt->logFile, "%5.2f\t%d\t%d\t%d\t", (float)(stt->frameCounter) / 100, logratio, decay, stt->vadNearend.stdLongTerm); +#endif + // Find max amplitude per sub frame + // iterate over sub frames + for (k = 0; k < 10; k++) + { + // iterate over samples + max_nrg = 0; + for (n = 0; n < L; n++) + { + nrg = WEBRTC_SPL_MUL_16_16(out[k * L + n], out[k * L + n]); + if (nrg > max_nrg) + { + max_nrg = nrg; + } + } + env[k] = max_nrg; + } + + // Calculate gain per sub frame + gains[0] = stt->gain; + for (k = 0; k < 10; k++) + { + // Fast envelope follower + // decay time = -131000 / -1000 = 131 (ms) + stt->capacitorFast = AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast); + if (env[k] > stt->capacitorFast) + { + stt->capacitorFast = env[k]; + } + // Slow envelope follower + if (env[k] > stt->capacitorSlow) + { + // increase capacitorSlow + stt->capacitorSlow + = AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow), stt->capacitorSlow); + } else + { + // decrease capacitorSlow + stt->capacitorSlow + = AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow); + } + + // use maximum of both capacitors as current level + if (stt->capacitorFast > stt->capacitorSlow) + { + cur_level = stt->capacitorFast; + } else + { + cur_level = stt->capacitorSlow; + } + // Translate signal level into gain, using a piecewise linear approximation + // find number of leading zeros + zeros = WebRtcSpl_NormU32((WebRtc_UWord32)cur_level); + if (cur_level == 0) + { + zeros = 31; + } + tmp32 = (WEBRTC_SPL_LSHIFT_W32(cur_level, zeros) & 0x7FFFFFFF); + frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 19); // Q12 + tmp32 = WEBRTC_SPL_MUL((stt->gainTable[zeros-1] - stt->gainTable[zeros]), frac); + gains[k + 1] = stt->gainTable[zeros] + WEBRTC_SPL_RSHIFT_W32(tmp32, 12); +#ifdef AGC_DEBUG + if (k == 0) + { + fprintf(stt->logFile, "%d\t%d\t%d\t%d\t%d\n", env[0], cur_level, stt->capacitorFast, stt->capacitorSlow, zeros); + } +#endif + } + + // Gate processing (lower gain during absence of speech) + zeros = WEBRTC_SPL_LSHIFT_W16(zeros, 9) - WEBRTC_SPL_RSHIFT_W16(frac, 3); + // find number of leading zeros + zeros_fast = WebRtcSpl_NormU32((WebRtc_UWord32)stt->capacitorFast); + if (stt->capacitorFast == 0) + { + zeros_fast = 31; + } + tmp32 = (WEBRTC_SPL_LSHIFT_W32(stt->capacitorFast, zeros_fast) & 0x7FFFFFFF); + zeros_fast = WEBRTC_SPL_LSHIFT_W16(zeros_fast, 9); + zeros_fast -= (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 22); + + gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm; + + if (gate < 0) + { + stt->gatePrevious = 0; + } else + { + tmp32 = WEBRTC_SPL_MUL_16_16(stt->gatePrevious, 7); + gate = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)gate + tmp32, 3); + stt->gatePrevious = gate; + } + // gate < 0 -> no gate + // gate > 2500 -> max gate + if (gate > 0) + { + if (gate < 2500) + { + gain_adj = WEBRTC_SPL_RSHIFT_W16(2500 - gate, 5); + } else + { + gain_adj = 0; + } + for (k = 0; k < 10; k++) + { + if ((gains[k + 1] - stt->gainTable[0]) > 8388608) + { + // To prevent wraparound + tmp32 = WEBRTC_SPL_RSHIFT_W32((gains[k+1] - stt->gainTable[0]), 8); + tmp32 = WEBRTC_SPL_MUL(tmp32, (178 + gain_adj)); + } else + { + tmp32 = WEBRTC_SPL_MUL((gains[k+1] - stt->gainTable[0]), (178 + gain_adj)); + tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 8); + } + gains[k + 1] = stt->gainTable[0] + tmp32; + } + } + + // Limit gain to avoid overload distortion + for (k = 0; k < 10; k++) + { + // To prevent wrap around + zeros = 10; + if (gains[k + 1] > 47453132) + { + zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]); + } + gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1; + gain32 = WEBRTC_SPL_MUL(gain32, gain32); + // check for overflow + while (AGC_MUL32(WEBRTC_SPL_RSHIFT_W32(env[k], 12) + 1, gain32) + > WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)32767, 2 * (1 - zeros + 10))) + { + // multiply by 253/256 ==> -0.1 dB + if (gains[k + 1] > 8388607) + { + // Prevent wrap around + gains[k + 1] = WEBRTC_SPL_MUL(WEBRTC_SPL_RSHIFT_W32(gains[k+1], 8), 253); + } else + { + gains[k + 1] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(gains[k+1], 253), 8); + } + gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1; + gain32 = WEBRTC_SPL_MUL(gain32, gain32); + } + } + // gain reductions should be done 1 ms earlier than gain increases + for (k = 1; k < 10; k++) + { + if (gains[k] > gains[k + 1]) + { + gains[k] = gains[k + 1]; + } + } + // save start gain for next frame + stt->gain = gains[10]; + + // Apply gain + // handle first sub frame separately + delta = WEBRTC_SPL_LSHIFT_W32(gains[1] - gains[0], (4 - L2)); + gain32 = WEBRTC_SPL_LSHIFT_W32(gains[0], 4); + // iterate over samples + for (n = 0; n < L; n++) + { + // For lower band + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7)); + out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + if (out_tmp > 4095) + { + out[n] = (WebRtc_Word16)32767; + } else if (out_tmp < -4096) + { + out[n] = (WebRtc_Word16)-32768; + } else + { + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32, 4)); + out[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + } + // For higher band + if (FS == 32000) + { + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n], + WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7)); + out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + if (out_tmp > 4095) + { + out_H[n] = (WebRtc_Word16)32767; + } else if (out_tmp < -4096) + { + out_H[n] = (WebRtc_Word16)-32768; + } else + { + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n], + WEBRTC_SPL_RSHIFT_W32(gain32, 4)); + out_H[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + } + } + // + + gain32 += delta; + } + // iterate over subframes + for (k = 1; k < 10; k++) + { + delta = WEBRTC_SPL_LSHIFT_W32(gains[k+1] - gains[k], (4 - L2)); + gain32 = WEBRTC_SPL_LSHIFT_W32(gains[k], 4); + // iterate over samples + for (n = 0; n < L; n++) + { + // For lower band + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[k * L + n], + WEBRTC_SPL_RSHIFT_W32(gain32, 4)); + out[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + // For higher band + if (FS == 32000) + { + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[k * L + n], + WEBRTC_SPL_RSHIFT_W32(gain32, 4)); + out_H[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + } + gain32 += delta; + } + } + + return 0; +} + +void WebRtcAgc_InitVad(AgcVad_t *state) +{ + WebRtc_Word16 k; + + state->HPstate = 0; // state of high pass filter + state->logRatio = 0; // log( P(active) / P(inactive) ) + // average input level (Q10) + state->meanLongTerm = WEBRTC_SPL_LSHIFT_W16(15, 10); + + // variance of input level (Q8) + state->varianceLongTerm = WEBRTC_SPL_LSHIFT_W32(500, 8); + + state->stdLongTerm = 0; // standard deviation of input level in dB + // short-term average input level (Q10) + state->meanShortTerm = WEBRTC_SPL_LSHIFT_W16(15, 10); + + // short-term variance of input level (Q8) + state->varianceShortTerm = WEBRTC_SPL_LSHIFT_W32(500, 8); + + state->stdShortTerm = 0; // short-term standard deviation of input level in dB + state->counter = 3; // counts updates + for (k = 0; k < 8; k++) + { + // downsampling filter + state->downState[k] = 0; + } +} + +WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state + const WebRtc_Word16 *in, // (i) Speech signal + WebRtc_Word16 nrSamples) // (i) number of samples +{ + WebRtc_Word32 out, nrg, tmp32, tmp32b; + WebRtc_UWord16 tmpU16; + WebRtc_Word16 k, subfr, tmp16; + WebRtc_Word16 buf1[8]; + WebRtc_Word16 buf2[4]; + WebRtc_Word16 HPstate; + WebRtc_Word16 zeros, dB; + + // process in 10 sub frames of 1 ms (to save on memory) + nrg = 0; + HPstate = state->HPstate; + for (subfr = 0; subfr < 10; subfr++) + { + // downsample to 4 kHz + if (nrSamples == 160) + { + for (k = 0; k < 8; k++) + { + tmp32 = (WebRtc_Word32)in[2 * k] + (WebRtc_Word32)in[2 * k + 1]; + tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 1); + buf1[k] = (WebRtc_Word16)tmp32; + } + in += 16; + + WebRtcSpl_DownsampleBy2(buf1, 8, buf2, state->downState); + } else + { + WebRtcSpl_DownsampleBy2(in, 8, buf2, state->downState); + in += 8; + } + + // high pass filter and compute energy + for (k = 0; k < 4; k++) + { + out = buf2[k] + HPstate; + tmp32 = WEBRTC_SPL_MUL(600, out); + HPstate = (WebRtc_Word16)(WEBRTC_SPL_RSHIFT_W32(tmp32, 10) - buf2[k]); + tmp32 = WEBRTC_SPL_MUL(out, out); + nrg += WEBRTC_SPL_RSHIFT_W32(tmp32, 6); + } + } + state->HPstate = HPstate; + + // find number of leading zeros + if (!(0xFFFF0000 & nrg)) + { + zeros = 16; + } else + { + zeros = 0; + } + if (!(0xFF000000 & (nrg << zeros))) + { + zeros += 8; + } + if (!(0xF0000000 & (nrg << zeros))) + { + zeros += 4; + } + if (!(0xC0000000 & (nrg << zeros))) + { + zeros += 2; + } + if (!(0x80000000 & (nrg << zeros))) + { + zeros += 1; + } + + // energy level (range {-32..30}) (Q10) + dB = WEBRTC_SPL_LSHIFT_W16(15 - zeros, 11); + + // Update statistics + + if (state->counter < kAvgDecayTime) + { + // decay time = AvgDecTime * 10 ms + state->counter++; + } + + // update short-term estimate of mean energy level (Q10) + tmp32 = (WEBRTC_SPL_MUL_16_16(state->meanShortTerm, 15) + (WebRtc_Word32)dB); + state->meanShortTerm = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 4); + + // update short-term estimate of variance in energy level (Q8) + tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12); + tmp32 += WEBRTC_SPL_MUL(state->varianceShortTerm, 15); + state->varianceShortTerm = WEBRTC_SPL_RSHIFT_W32(tmp32, 4); + + // update short-term estimate of standard deviation in energy level (Q10) + tmp32 = WEBRTC_SPL_MUL_16_16(state->meanShortTerm, state->meanShortTerm); + tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceShortTerm, 12) - tmp32; + state->stdShortTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32); + + // update long-term estimate of mean energy level (Q10) + tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->counter) + (WebRtc_Word32)dB; + state->meanLongTerm = WebRtcSpl_DivW32W16ResW16(tmp32, + WEBRTC_SPL_ADD_SAT_W16(state->counter, 1)); + + // update long-term estimate of variance in energy level (Q8) + tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12); + tmp32 += WEBRTC_SPL_MUL(state->varianceLongTerm, state->counter); + state->varianceLongTerm = WebRtcSpl_DivW32W16(tmp32, + WEBRTC_SPL_ADD_SAT_W16(state->counter, 1)); + + // update long-term estimate of standard deviation in energy level (Q10) + tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->meanLongTerm); + tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceLongTerm, 12) - tmp32; + state->stdLongTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32); + + // update voice activity measure (Q10) + tmp16 = WEBRTC_SPL_LSHIFT_W16(3, 12); + tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, (dB - state->meanLongTerm)); + tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm); + tmpU16 = WEBRTC_SPL_LSHIFT_U16((WebRtc_UWord16)13, 12); + tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16); + tmp32 += WEBRTC_SPL_RSHIFT_W32(tmp32b, 10); + + state->logRatio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 6); + + // limit + if (state->logRatio > 2048) + { + state->logRatio = 2048; + } + if (state->logRatio < -2048) + { + state->logRatio = -2048; + } + + return state->logRatio; // Q10 +} |