diff options
author | Eric Laurent <elaurent@google.com> | 2011-06-16 21:50:24 -0700 |
---|---|---|
committer | Eric Laurent <elaurent@google.com> | 2011-07-12 19:18:53 -0700 |
commit | e48d5845c8b35de2ab73ea055c18a61fa3a9f0be (patch) | |
tree | 94666ca7cea55ee1772adc6b15f083e57fc20b4f /src/modules/audio_processing | |
parent | 81fb7e291baf261ed747baf4539e97a01a417125 (diff) | |
download | webrtc-e48d5845c8b35de2ab73ea055c18a61fa3a9f0be.tar.gz |
Added webrtc audio processing library
Only the modules necessary for audio processing have been imported:
src/common_audio/
src/modules/audio_processing/
src/modules/interface/
src/system_wrappers/
src/typedefs.h
src/common_types.h
Android.mk
android-webrtc.mk
Android.mk and android-webrtc.mk have been modified to build only the
audio processing modules.
Files for Windows compatibility have been removed from system_wrappers.
fft_ARM9E directory has been removed from
src/common_audio/signal_processing_library/main/source/
Fixed x86 build.
SVN checkout at working revision 180.
Change-Id: If650f61d96557be8247b17eb4f4d32b7a6ba025d
Diffstat (limited to 'src/modules/audio_processing')
102 files changed, 29947 insertions, 0 deletions
diff --git a/src/modules/audio_processing/OWNERS b/src/modules/audio_processing/OWNERS new file mode 100644 index 0000000000..aecf56ed33 --- /dev/null +++ b/src/modules/audio_processing/OWNERS @@ -0,0 +1,2 @@ +ajm@google.com +bjornv@google.com diff --git a/src/modules/audio_processing/aec/main/interface/echo_cancellation.h b/src/modules/audio_processing/aec/main/interface/echo_cancellation.h new file mode 100644 index 0000000000..883357da1e --- /dev/null +++ b/src/modules/audio_processing/aec/main/interface/echo_cancellation.h @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_ + +#include "typedefs.h" + +// Errors +#define AEC_UNSPECIFIED_ERROR 12000 +#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001 +#define AEC_UNINITIALIZED_ERROR 12002 +#define AEC_NULL_POINTER_ERROR 12003 +#define AEC_BAD_PARAMETER_ERROR 12004 + +// Warnings +#define AEC_BAD_PARAMETER_WARNING 12050 + +enum { + kAecNlpConservative = 0, + kAecNlpModerate, + kAecNlpAggressive +}; + +enum { + kAecFalse = 0, + kAecTrue +}; + +typedef struct { + WebRtc_Word16 nlpMode; // default kAecNlpModerate + WebRtc_Word16 skewMode; // default kAecFalse + WebRtc_Word16 metricsMode; // default kAecFalse + //float realSkew; +} AecConfig; + +typedef struct { + WebRtc_Word16 instant; + WebRtc_Word16 average; + WebRtc_Word16 max; + WebRtc_Word16 min; +} AecLevel; + +typedef struct { + AecLevel rerl; + AecLevel erl; + AecLevel erle; + AecLevel aNlp; +} AecMetrics; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allocates the memory needed by the AEC. The memory needs to be initialized + * separately using the WebRtcAec_Init() function. + * + * Inputs Description + * ------------------------------------------------------------------- + * void **aecInst Pointer to the AEC instance to be created + * and initilized + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAec_Create(void **aecInst); + +/* + * This function releases the memory allocated by WebRtcAec_Create(). + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecInst Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAec_Free(void *aecInst); + +/* + * Initializes an AEC instance. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecInst Pointer to the AEC instance + * WebRtc_Word32 sampFreq Sampling frequency of data + * WebRtc_Word32 scSampFreq Soundcard sampling frequency + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAec_Init(void *aecInst, + WebRtc_Word32 sampFreq, + WebRtc_Word32 scSampFreq); + +/* + * Inserts an 80 or 160 sample block of data into the farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecInst Pointer to the AEC instance + * WebRtc_Word16 *farend In buffer containing one frame of + * farend signal for L band + * WebRtc_Word16 nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAec_BufferFarend(void *aecInst, + const WebRtc_Word16 *farend, + WebRtc_Word16 nrOfSamples); + +/* + * Runs the echo canceller on an 80 or 160 sample blocks of data. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecInst Pointer to the AEC instance + * WebRtc_Word16 *nearend In buffer containing one frame of + * nearend+echo signal for L band + * WebRtc_Word16 *nearendH In buffer containing one frame of + * nearend+echo signal for H band + * WebRtc_Word16 nrOfSamples Number of samples in nearend buffer + * WebRtc_Word16 msInSndCardBuf Delay estimate for sound card and + * system buffers + * WebRtc_Word16 skew Difference between number of samples played + * and recorded at the soundcard (for clock skew + * compensation) + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word16 *out Out buffer, one frame of processed nearend + * for L band + * WebRtc_Word16 *outH Out buffer, one frame of processed nearend + * for H band + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAec_Process(void *aecInst, + const WebRtc_Word16 *nearend, + const WebRtc_Word16 *nearendH, + WebRtc_Word16 *out, + WebRtc_Word16 *outH, + WebRtc_Word16 nrOfSamples, + WebRtc_Word16 msInSndCardBuf, + WebRtc_Word32 skew); + +/* + * This function enables the user to set certain parameters on-the-fly. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecInst Pointer to the AEC instance + * AecConfig config Config instance that contains all + * properties to be set + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAec_set_config(void *aecInst, AecConfig config); + +/* + * Gets the on-the-fly paramters. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecInst Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * AecConfig *config Pointer to the config instance that + * all properties will be written to + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAec_get_config(void *aecInst, AecConfig *config); + +/* + * Gets the current echo status of the nearend signal. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecInst Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word16 *status 0: Almost certainly nearend single-talk + * 1: Might not be neared single-talk + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAec_get_echo_status(void *aecInst, WebRtc_Word16 *status); + +/* + * Gets the current echo metrics for the session. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecInst Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * AecMetrics *metrics Struct which will be filled out with the + * current echo metrics. + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAec_GetMetrics(void *aecInst, AecMetrics *metrics); + +/* + * Gets the last error code. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecInst Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 11000-11100: error code + */ +WebRtc_Word32 WebRtcAec_get_error_code(void *aecInst); + +/* + * Gets a version string. + * + * Inputs Description + * ------------------------------------------------------------------- + * char *versionStr Pointer to a string array + * WebRtc_Word16 len The maximum length of the string + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word8 *versionStr Pointer to a string array + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAec_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len); + +#ifdef __cplusplus +} +#endif +#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_ */ diff --git a/src/modules/audio_processing/aec/main/matlab/fullaec.m b/src/modules/audio_processing/aec/main/matlab/fullaec.m new file mode 100644 index 0000000000..0f86a8c58d --- /dev/null +++ b/src/modules/audio_processing/aec/main/matlab/fullaec.m @@ -0,0 +1,953 @@ +% Partitioned block frequency domain adaptive filtering NLMS and +% standard time-domain sample-based NLMS +%fid=fopen('aecFar-samsung.pcm', 'rb'); % Load far end +fid=fopen('aecFar.pcm', 'rb'); % Load far end +%fid=fopen(farFile, 'rb'); % Load far end +rrin=fread(fid,inf,'int16'); +fclose(fid); +%rrin=loadsl('data/far_me2.pcm'); % Load far end +%fid=fopen('aecNear-samsung.pcm', 'rb'); % Load near end +fid=fopen('aecNear.pcm', 'rb'); % Load near end +%fid=fopen(nearFile, 'rb'); % Load near end +ssin=fread(fid,inf,'int16'); +%ssin = [zeros(1024,1) ; ssin(1:end-1024)]; + +fclose(fid); +rand('state',13); +fs=16000; +mult=fs/8000; +%rrin=rrin(fs*0+1:round(fs*120)); +%ssin=ssin(fs*0+1:round(fs*120)); +if fs == 8000 + cohRange = 2:3; +elseif fs==16000 + cohRange = 2; +end + +% Flags +NLPon=1; % NLP +CNon=1; % Comfort noise +PLTon=1; % Plotting + +M = 16; % Number of partitions +N = 64; % Partition length +L = M*N; % Filter length +if fs == 8000 + mufb = 0.6; +else + mufb = 0.5; +end +%mufb=1; +VADtd=48; +alp = 0.1; % Power estimation factor alc = 0.1; % Coherence estimation factor +beta = 0.9; % Plotting factor +%% Changed a little %% +step = 0.3;%0.1875; % Downward step size +%% +if fs == 8000 + threshold=2e-6; % DTrob threshold +else + %threshold=0.7e-6; + threshold=1.5e-6; end + +if fs == 8000 + echoBandRange = ceil(300*2/fs*N):floor(1800*2/fs*N); + %echoBandRange = ceil(1500*2/fs*N):floor(2500*2/fs*N); +else + echoBandRange = ceil(300*2/fs*N):floor(1800*2/fs*N); + %echoBandRange = ceil(300*2/fs*N):floor(1800*2/fs*N); +end +%echoBandRange = ceil(1600*2/fs*N):floor(1900*2/fs*N); +%echoBandRange = ceil(2000*2/fs*N):floor(4000*2/fs*N); +suppState = 1; +transCtr = 0; + +Nt=1; +vt=1; + +ramp = 1.0003; % Upward ramp +rampd = 0.999; % Downward ramp +cvt = 20; % Subband VAD threshold; +nnthres = 20; % Noise threshold + +shh=logspace(-1.3,-2.2,N+1)'; +sh=[shh;flipud(shh(2:end-1))]; % Suppression profile + +len=length(ssin); +w=zeros(L,1); % Sample-based TD NLMS +WFb=zeros(N+1,M); % Block-based FD NLMS +WFbOld=zeros(N+1,M); % Block-based FD NLMS +YFb=zeros(N+1,M); +erfb=zeros(len,1); +erfb3=zeros(len,1); + +ercn=zeros(len,1); +zm=zeros(N,1); +XFm=zeros(N+1,M); +YFm=zeros(N+1,M); +pn0=10*ones(N+1,1); +pn=zeros(N+1,1); +NN=len; +Nb=floor(NN/N)-M; +erifb=zeros(Nb+1,1)+0.1; +erifb3=zeros(Nb+1,1)+0.1; +ericn=zeros(Nb+1,1)+0.1; +dri=zeros(Nb+1,1)+0.1; +start=1; +xo=zeros(N,1); +do=xo; +eo=xo; + +echoBands=zeros(Nb+1,1); +cohxdAvg=zeros(Nb+1,1); +cohxdSlow=zeros(Nb+1,N+1); +cohedSlow=zeros(Nb+1,N+1); +%overdriveM=zeros(Nb+1,N+1); +cohxdFastAvg=zeros(Nb+1,1); +cohxdAvgBad=zeros(Nb+1,1); +cohedAvg=zeros(Nb+1,1); +cohedFastAvg=zeros(Nb+1,1); +hnledAvg=zeros(Nb+1,1); +hnlxdAvg=zeros(Nb+1,1); +ovrdV=zeros(Nb+1,1); +dIdxV=zeros(Nb+1,1); +SLxV=zeros(Nb+1,1); +hnlSortQV=zeros(Nb+1,1); +hnlPrefAvgV=zeros(Nb+1,1); +mutInfAvg=zeros(Nb+1,1); +%overdrive=zeros(Nb+1,1); +hnled = zeros(N+1, 1); +weight=zeros(N+1,1); +hnlMax = zeros(N+1, 1); +hnl = zeros(N+1, 1); +overdrive = ones(1, N+1); +xfwm=zeros(N+1,M); +dfm=zeros(N+1,M); +WFbD=ones(N+1,1); + +fbSupp = 0; +hnlLocalMin = 1; +cohxdLocalMin = 1; +hnlLocalMinV=zeros(Nb+1,1); +cohxdLocalMinV=zeros(Nb+1,1); +hnlMinV=zeros(Nb+1,1); +dkEnV=zeros(Nb+1,1); +ekEnV=zeros(Nb+1,1); +ovrd = 2; +ovrdPos = floor((N+1)/4); +ovrdSm = 2; +hnlMin = 1; +minCtr = 0; +SeMin = 0; +SdMin = 0; +SeLocalAvg = 0; +SeMinSm = 0; +divergeFact = 1; +dIdx = 1; +hnlMinCtr = 0; +hnlNewMin = 0; +divergeState = 0; + +Sy=ones(N+1,1); +Sym=1e7*ones(N+1,1); + +wins=[0;sqrt(hanning(2*N-1))]; +ubufn=zeros(2*N,1); +ebuf=zeros(2*N,1); +ebuf2=zeros(2*N,1); +ebuf4=zeros(2*N,1); +mbuf=zeros(2*N,1); + +cohedFast = zeros(N+1,1); +cohxdFast = zeros(N+1,1); +cohxd = zeros(N+1,1); +Se = zeros(N+1,1); +Sd = zeros(N+1,1); +Sx = zeros(N+1,1); +SxBad = zeros(N+1,1); +Sed = zeros(N+1,1); +Sxd = zeros(N+1,1); +SxdBad = zeros(N+1,1); +hnledp=[]; + +cohxdMax = 0; + +%hh=waitbar(0,'Please wait...'); +progressbar(0); + +%spaces = ' '; +%spaces = repmat(spaces, 50, 1); +%spaces = ['[' ; spaces ; ']']; +%fprintf(1, spaces); +%fprintf(1, '\n'); + +for kk=1:Nb + pos = N * (kk-1) + start; + + % FD block method + % ---------------------- Organize data + xk = rrin(pos:pos+N-1); + dk = ssin(pos:pos+N-1); + + xx = [xo;xk]; + xo = xk; + tmp = fft(xx); + XX = tmp(1:N+1); + + dd = [do;dk]; % Overlap + do = dk; + tmp = fft(dd); % Frequency domain + DD = tmp(1:N+1); + + % ------------------------ Power estimation + pn0 = (1 - alp) * pn0 + alp * real(XX.* conj(XX)); + pn = pn0; + %pn = (1 - alp) * pn + alp * M * pn0; + if (CNon) + Yp = real(conj(DD).*DD); % Instantaneous power + Sy = (1 - alp) * Sy + alp * Yp; % Averaged power + + mm = min(Sy,Sym); + diff = Sym - mm; + if (kk>50) + Sym = (mm + step*diff) * ramp; % Estimated background noise power + end + end + + % ---------------------- Filtering + XFm(:,1) = XX; + for mm=0:(M-1) + m=mm+1; + YFb(:,m) = XFm(:,m) .* WFb(:,m); + end + yfk = sum(YFb,2); + tmp = [yfk ; flipud(conj(yfk(2:N)))]; + ykt = real(ifft(tmp)); + ykfb = ykt(end-N+1:end); + + % ---------------------- Error estimation + ekfb = dk - ykfb; + %if sum(abs(ekfb)) < sum(abs(dk)) + %ekfb = dk - ykfb; + % erfb(pos:pos+N-1) = ekfb; + %else + %ekfb = dk; + % erfb(pos:pos+N-1) = dk; + %end + %(kk-1)*(N*2)+1 + erfb(pos:pos+N-1) = ekfb; + tmp = fft([zm;ekfb]); % FD version for cancelling part (overlap-save) + Ek = tmp(1:N+1); + + % ------------------------ Adaptation + Ek2 = Ek ./(M*pn + 0.001); % Normalized error + %Ek2 = Ek ./(pn + 0.001); % Normalized error + %Ek2 = Ek ./(100*pn + 0.001); % Normalized error + + absEf = max(abs(Ek2), threshold); + absEf = ones(N+1,1)*threshold./absEf; + Ek2 = Ek2.*absEf; + + mEk = mufb.*Ek2; + PP = conj(XFm).*(ones(M,1) * mEk')'; + tmp = [PP ; flipud(conj(PP(2:N,:)))]; + IFPP = real(ifft(tmp)); + PH = IFPP(1:N,:); + tmp = fft([PH;zeros(N,M)]); + FPH = tmp(1:N+1,:); + WFb = WFb + FPH; + + if mod(kk, 10*mult) == 0 + WFbEn = sum(real(WFb.*conj(WFb))); + %WFbEn = sum(abs(WFb)); + [tmp, dIdx] = max(WFbEn); + + WFbD = sum(abs(WFb(:, dIdx)),2); + %WFbD = WFbD / (mean(WFbD) + 1e-10); + WFbD = min(max(WFbD, 0.5), 4); + end + dIdxV(kk) = dIdx; + + % NLP + if (NLPon) + + ee = [eo;ekfb]; + eo = ekfb; + window = wins; + if fs == 8000 + %gamma = 0.88; + gamma = 0.9; + else + %gamma = 0.92; + gamma = 0.93; + end + %gamma = 0.9; + + tmp = fft(xx.*window); + xf = tmp(1:N+1); + tmp = fft(dd.*window); + df = tmp(1:N+1); + tmp = fft(ee.*window); + ef = tmp(1:N+1); + + xfwm(:,1) = xf; + xf = xfwm(:,dIdx); + %fprintf(1,'%d: %f\n', kk, xf(4)); + dfm(:,1) = df; + + SxOld = Sx; + + Se = gamma*Se + (1-gamma)*real(ef.*conj(ef)); + Sd = gamma*Sd + (1-gamma)*real(df.*conj(df)); + Sx = gamma*Sx + (1 - gamma)*real(xf.*conj(xf)); + + %xRatio = real(xfwm(:,1).*conj(xfwm(:,1))) ./ ... + % (real(xfwm(:,2).*conj(xfwm(:,2))) + 1e-10); + %xRatio = Sx ./ (SxOld + 1e-10); + %SLx = log(1/(N+1)*sum(xRatio)) - 1/(N+1)*sum(log(xRatio)); + %SLxV(kk) = SLx; + + %freqSm = 0.9; + %Sx = filter(freqSm, [1 -(1-freqSm)], Sx); + %Sx(end:1) = filter(freqSm, [1 -(1-freqSm)], Sx(end:1)); + %Se = filter(freqSm, [1 -(1-freqSm)], Se); + %Se(end:1) = filter(freqSm, [1 -(1-freqSm)], Se(end:1)); + %Sd = filter(freqSm, [1 -(1-freqSm)], Sd); + %Sd(end:1) = filter(freqSm, [1 -(1-freqSm)], Sd(end:1)); + + %SeFast = ef.*conj(ef); + %SdFast = df.*conj(df); + %SxFast = xf.*conj(xf); + %cohedFast = 0.9*cohedFast + 0.1*SeFast ./ (SdFast + 1e-10); + %cohedFast(find(cohedFast > 1)) = 1; + %cohedFast(find(cohedFast > 1)) = 1 ./ cohedFast(find(cohedFast>1)); + %cohedFastAvg(kk) = mean(cohedFast(echoBandRange)); + %cohedFastAvg(kk) = min(cohedFast); + + %cohxdFast = 0.8*cohxdFast + 0.2*log(SdFast ./ (SxFast + 1e-10)); + %cohxdFastAvg(kk) = mean(cohxdFast(echoBandRange)); + + % coherence + Sxd = gamma*Sxd + (1 - gamma)*xf.*conj(df); + Sed = gamma*Sed + (1-gamma)*ef.*conj(df); + + %Sxd = filter(freqSm, [1 -(1-freqSm)], Sxd); + %Sxd(end:1) = filter(freqSm, [1 -(1-freqSm)], Sxd(end:1)); + %Sed = filter(freqSm, [1 -(1-freqSm)], Sed); + %Sed(end:1) = filter(freqSm, [1 -(1-freqSm)], Sed(end:1)); + + cohed = real(Sed.*conj(Sed))./(Se.*Sd + 1e-10); + %cohedAvg(kk) = mean(cohed(echoBandRange)); + %cohedAvg(kk) = cohed(6); + %cohedAvg(kk) = min(cohed); + + cohxd = real(Sxd.*conj(Sxd))./(Sx.*Sd + 1e-10); + %freqSm = 0.5; + %cohxd(3:end) = filter(freqSm, [1 -(1-freqSm)], cohxd(3:end)); + %cohxd(end:3) = filter(freqSm, [1 -(1-freqSm)], cohxd(end:3)); + %cohxdAvg(kk) = mean(cohxd(echoBandRange)); + %cohxdAvg(kk) = (cohxd(32)); + %cohxdAvg(kk) = max(cohxd); + + %xf = xfm(:,dIdx); + %SxBad = gamma*SxBad + (1 - gamma)*real(xf.*conj(xf)); + %SxdBad = gamma*SxdBad + (1 - gamma)*xf.*conj(df); + %cohxdBad = real(SxdBad.*conj(SxdBad))./(SxBad.*Sd + 0.01); + %cohxdAvgBad(kk) = mean(cohxdBad); + + %for j=1:N+1 + % mutInf(j) = 0.9*mutInf(j) + 0.1*information(abs(xfm(j,:)), abs(dfm(j,:))); + %end + %mutInfAvg(kk) = mean(mutInf); + + %hnled = cohedFast; + %xIdx = find(cohxd > 1 - cohed); + %hnled(xIdx) = 1 - cohxd(xIdx); + %hnled = 1 - max(cohxd, 1-cohedFast); + hnled = min(1 - cohxd, cohed); + %hnled = 1 - cohxd; + %hnled = max(1 - (cohxd + (1-cohedFast)), 0); + %hnled = 1 - max(cohxd, 1-cohed); + + if kk > 1 + cohxdSlow(kk,:) = 0.99*cohxdSlow(kk-1,:) + 0.01*cohxd'; + cohedSlow(kk,:) = 0.99*cohedSlow(kk-1,:) + 0.01*(1-cohed)'; + end + + + if 0 + %if kk > 50 + %idx = find(hnled > 0.3); + hnlMax = hnlMax*0.9999; + %hnlMax(idx) = max(hnlMax(idx), hnled(idx)); + hnlMax = max(hnlMax, hnled); + %overdrive(idx) = max(log(hnlMax(idx))/log(0.99), 1); + avgHnl = mean(hnlMax(echoBandRange)); + if avgHnl > 0.3 + overdrive = max(log(avgHnl)/log(0.99), 1); + end + weight(4:end) = max(hnlMax) - hnlMax(4:end); + end + + + + %[hg, gidx] = max(hnled); + %fnrg = Sx(gidx) / (Sd(gidx) + 1e-10); + + %[tmp, bidx] = find((Sx / Sd + 1e-10) > fnrg); + %hnled(bidx) = hg; + + + %cohed1 = mean(cohed(cohRange)); % range depends on bandwidth + %cohed1 = cohed1^2; + %echoBands(kk) = length(find(cohed(echoBandRange) < 0.25))/length(echoBandRange); + + %if (fbSupp == 0) + % if (echoBands(kk) > 0.8) + % fbSupp = 1; + % end + %else + % if (echoBands(kk) < 0.6) + % fbSupp = 0; + % end + %end + %overdrive(kk) = 7.5*echoBands(kk) + 0.5; + + % Factor by which to weight other bands + %if (cohed1 < 0.1) + % w = 0.8 - cohed1*10*0.4; + %else + % w = 0.4; + %end + + % Weight coherence subbands + %hnled = w*cohed1 + (1 - w)*cohed; + %hnled = (hnled).^2; + %cohed(floor(N/2):end) = cohed(floor(N/2):end).^2; + %if fbSupp == 1 + % cohed = zeros(size(cohed)); + %end + %cohed = cohed.^overdrive(kk); + + %hnled = gamma*hnled + (1 - gamma)*cohed; + % Additional hf suppression + %hnledp = [hnledp ; mean(hnled)]; + %hnled(floor(N/2):end) = hnled(floor(N/2):end).^2; + %ef = ef.*((weight*(min(1 - hnled)).^2 + (1 - weight).*(1 - hnled)).^2); + + cohedMean = mean(cohed(echoBandRange)); + %aggrFact = 4*(1-mean(hnled(echoBandRange))) + 1; + %[hnlSort, hnlSortIdx] = sort(hnled(echoBandRange)); + [hnlSort, hnlSortIdx] = sort(1-cohxd(echoBandRange)); + [xSort, xSortIdx] = sort(Sx); + %aggrFact = (1-mean(hnled(echoBandRange))); + %hnlSortQ = hnlSort(qIdx); + hnlSortQ = mean(1 - cohxd(echoBandRange)); + %hnlSortQ = mean(1 - cohxd); + + [hnlSort2, hnlSortIdx2] = sort(hnled(echoBandRange)); + %[hnlSort2, hnlSortIdx2] = sort(hnled); + hnlQuant = 0.75; + hnlQuantLow = 0.5; + qIdx = floor(hnlQuant*length(hnlSort2)); + qIdxLow = floor(hnlQuantLow*length(hnlSort2)); + hnlPrefAvg = hnlSort2(qIdx); + hnlPrefAvgLow = hnlSort2(qIdxLow); + %hnlPrefAvgLow = mean(hnled); + %hnlPrefAvg = max(hnlSort2); + %hnlPrefAvgLow = min(hnlSort2); + + %hnlPref = hnled(echoBandRange); + %hnlPrefAvg = mean(hnlPref(xSortIdx((0.5*length(xSortIdx)):end))); + + %hnlPrefAvg = min(hnlPrefAvg, hnlSortQ); + + %hnlSortQIdx = hnlSortIdx(qIdx); + %SeQ = Se(qIdx + echoBandRange(1) - 1); + %SdQ = Sd(qIdx + echoBandRange(1) - 1); + %SeQ = Se(qIdxLow + echoBandRange(1) - 1); + %SdQ = Sd(qIdxLow + echoBandRange(1) - 1); + %propLow = length(find(hnlSort < 0.1))/length(hnlSort); + %aggrFact = min((1 - hnlSortQ)/2, 0.5); + %aggrTerm = 1/aggrFact; + + %hnlg = mean(hnled(echoBandRange)); + %hnlg = hnlSortQ; + %if suppState == 0 + % if hnlg < 0.05 + % suppState = 2; + % transCtr = 0; + % elseif hnlg < 0.75 + % suppState = 1; + % transCtr = 0; + % end + %elseif suppState == 1 + % if hnlg > 0.8 + % suppState = 0; + % transCtr = 0; + % elseif hnlg < 0.05 + % suppState = 2; + % transCtr = 0; + % end + %else + % if hnlg > 0.8 + % suppState = 0; + % transCtr = 0; + % elseif hnlg > 0.25 + % suppState = 1; + % transCtr = 0; + % end + %end + %if kk > 50 + + if cohedMean > 0.98 & hnlSortQ > 0.9 + %if suppState == 1 + % hnled = 0.5*hnled + 0.5*cohed; + % %hnlSortQ = 0.5*hnlSortQ + 0.5*cohedMean; + % hnlPrefAvg = 0.5*hnlPrefAvg + 0.5*cohedMean; + %else + % hnled = cohed; + % %hnlSortQ = cohedMean; + % hnlPrefAvg = cohedMean; + %end + suppState = 0; + elseif cohedMean < 0.95 | hnlSortQ < 0.8 + %if suppState == 0 + % hnled = 0.5*hnled + 0.5*cohed; + % %hnlSortQ = 0.5*hnlSortQ + 0.5*cohedMean; + % hnlPrefAvg = 0.5*hnlPrefAvg + 0.5*cohedMean; + %end + suppState = 1; + end + + if hnlSortQ < cohxdLocalMin & hnlSortQ < 0.75 + cohxdLocalMin = hnlSortQ; + end + + if cohxdLocalMin == 1 + ovrd = 3; + hnled = 1-cohxd; + hnlPrefAvg = hnlSortQ; + hnlPrefAvgLow = hnlSortQ; + end + + if suppState == 0 + hnled = cohed; + hnlPrefAvg = cohedMean; + hnlPrefAvgLow = cohedMean; + end + + %if hnlPrefAvg < hnlLocalMin & hnlPrefAvg < 0.6 + if hnlPrefAvgLow < hnlLocalMin & hnlPrefAvgLow < 0.6 + %hnlLocalMin = hnlPrefAvg; + %hnlMin = hnlPrefAvg; + hnlLocalMin = hnlPrefAvgLow; + hnlMin = hnlPrefAvgLow; + hnlNewMin = 1; + hnlMinCtr = 0; + %if hnlMinCtr == 0 + % hnlMinCtr = hnlMinCtr + 1; + %else + % hnlMinCtr = 0; + % hnlMin = hnlLocalMin; + %SeLocalMin = SeQ; + %SdLocalMin = SdQ; + %SeLocalAvg = 0; + %minCtr = 0; + % ovrd = max(log(0.0001)/log(hnlMin), 2); + %divergeFact = hnlLocalMin; + end + + if hnlNewMin == 1 + hnlMinCtr = hnlMinCtr + 1; + end + if hnlMinCtr == 2 + hnlNewMin = 0; + hnlMinCtr = 0; + %ovrd = max(log(0.0001)/log(hnlMin), 2); + ovrd = max(log(0.00001)/(log(hnlMin + 1e-10) + 1e-10), 3); + %ovrd = max(log(0.00000001)/(log(hnlMin + 1e-10) + 1e-10), 5); + %ovrd = max(log(0.0001)/log(hnlPrefAvg), 2); + %ovrd = max(log(0.001)/log(hnlMin), 2); + end + hnlLocalMin = min(hnlLocalMin + 0.0008/mult, 1); + cohxdLocalMin = min(cohxdLocalMin + 0.0004/mult, 1); + %divergeFact = hnlSortQ; + + + %if minCtr > 0 & hnlLocalMin < 1 + % hnlMin = hnlLocalMin; + % %SeMin = 0.9*SeMin + 0.1*sqrt(SeLocalMin); + % SdMin = sqrt(SdLocalMin); + % %SeMin = sqrt(SeLocalMin)*hnlSortQ; + % SeMin = sqrt(SeLocalMin); + % %ovrd = log(100/SeMin)/log(hnlSortQ); + % %ovrd = log(100/SeMin)/log(hnlSortQ); + % ovrd = log(0.01)/log(hnlMin); + % ovrd = max(ovrd, 2); + % ovrdPos = hnlSortQIdx; + % %ovrd = max(ovrd, 1); + % %SeMin = sqrt(SeLocalAvg/5); + % minCtr = 0; + %else + % %SeLocalMin = 0.9*SeLocalMin +0.1*SeQ; + % SeLocalAvg = SeLocalAvg + SeQ; + % minCtr = minCtr + 1; + %end + + if ovrd < ovrdSm + ovrdSm = 0.99*ovrdSm + 0.01*ovrd; + else + ovrdSm = 0.9*ovrdSm + 0.1*ovrd; + end + %end + + %ekEn = sum(real(ekfb.^2)); + %dkEn = sum(real(dk.^2)); + ekEn = sum(Se); + dkEn = sum(Sd); + + if divergeState == 0 + if ekEn > dkEn + ef = df; + divergeState = 1; + %hnlPrefAvg = hnlSortQ; + %hnled = (1 - cohxd); + end + else + %if ekEn*1.1 < dkEn + %if ekEn*1.26 < dkEn + if ekEn*1.05 < dkEn + divergeState = 0; + else + ef = df; + end + end + + if ekEn > dkEn*19.95 + WFb=zeros(N+1,M); % Block-based FD NLMS + end + + ekEnV(kk) = ekEn; + dkEnV(kk) = dkEn; + + hnlLocalMinV(kk) = hnlLocalMin; + cohxdLocalMinV(kk) = cohxdLocalMin; + hnlMinV(kk) = hnlMin; + %cohxdMaxLocal = max(cohxdSlow(kk,:)); + %if kk > 50 + %cohxdMaxLocal = 1-hnlSortQ; + %if cohxdMaxLocal > 0.5 + % %if cohxdMaxLocal > cohxdMax + % odScale = max(log(cohxdMaxLocal)/log(0.95), 1); + % %overdrive(7:end) = max(log(cohxdSlow(kk,7:end))/log(0.9), 1); + % cohxdMax = cohxdMaxLocal; + % end + %end + %end + %cohxdMax = cohxdMax*0.999; + + %overdriveM(kk,:) = max(overdrive, 1); + %aggrFact = 0.25; + aggrFact = 0.3; + %aggrFact = 0.5*propLow; + %if fs == 8000 + % wCurve = [0 ; 0 ; aggrFact*sqrt(linspace(0,1,N-1))' + 0.1]; + %else + % wCurve = [0; 0; 0; aggrFact*sqrt(linspace(0,1,N-2))' + 0.1]; + %end + wCurve = [0; aggrFact*sqrt(linspace(0,1,N))' + 0.1]; + % For sync with C + %if fs == 8000 + % wCurve = wCurve(2:end); + %else + % wCurve = wCurve(1:end-1); + %end + %weight = aggrFact*(sqrt(linspace(0,1,N+1)')); + %weight = aggrFact*wCurve; + weight = wCurve; + %weight = aggrFact*ones(N+1,1); + %weight = zeros(N+1,1); + %hnled = weight.*min(hnled) + (1 - weight).*hnled; + %hnled = weight.*min(mean(hnled(echoBandRange)), hnled) + (1 - weight).*hnled; + %hnled = weight.*min(hnlSortQ, hnled) + (1 - weight).*hnled; + + %hnlSortQV(kk) = mean(hnled); + %hnlPrefAvgV(kk) = mean(hnled(echoBandRange)); + + hnled = weight.*min(hnlPrefAvg, hnled) + (1 - weight).*hnled; + + %od = aggrFact*(sqrt(linspace(0,1,N+1)') + aggrTerm); + %od = 4*(sqrt(linspace(0,1,N+1)') + 1/4); + + %ovrdFact = (ovrdSm - 1) / sqrt(ovrdPos/(N+1)); + %ovrdFact = ovrdSm / sqrt(echoBandRange(floor(length(echoBandRange)/2))/(N+1)); + %od = ovrdFact*sqrt(linspace(0,1,N+1))' + 1; + %od = ovrdSm*ones(N+1,1).*abs(WFb(:,dIdx))/(max(abs(WFb(:,dIdx)))+1e-10); + + %od = ovrdSm*ones(N+1,1); + %od = ovrdSm*WFbD.*(sqrt(linspace(0,1,N+1))' + 1); + + od = ovrdSm*(sqrt(linspace(0,1,N+1))' + 1); + %od = 4*(sqrt(linspace(0,1,N+1))' + 1); + + %od = 2*ones(N+1,1); + %od = 2*ones(N+1,1); + %sshift = ((1-hnled)*2-1).^3+1; + sshift = ones(N+1,1); + + hnled = hnled.^(od.*sshift); + + %if hnlg > 0.75 + %if (suppState ~= 0) + % transCtr = 0; + %end + % suppState = 0; + %elseif hnlg < 0.6 & hnlg > 0.2 + % suppState = 1; + %elseif hnlg < 0.1 + %hnled = zeros(N+1, 1); + %if (suppState ~= 2) + % transCtr = 0; + %end + % suppState = 2; + %else + % if (suppState ~= 2) + % transCtr = 0; + % end + % suppState = 2; + %end + %if suppState == 0 + % hnled = ones(N+1, 1); + %elseif suppState == 2 + % hnled = zeros(N+1, 1); + %end + %hnled(find(hnled < 0.1)) = 0; + %hnled = hnled.^2; + %if transCtr < 5 + %hnl = 0.75*hnl + 0.25*hnled; + % transCtr = transCtr + 1; + %else + hnl = hnled; + %end + %hnled(find(hnled < 0.05)) = 0; + ef = ef.*(hnl); + + %ef = ef.*(min(1 - cohxd, cohed).^2); + %ef = ef.*((1-cohxd).^2); + + ovrdV(kk) = ovrdSm; + %ovrdV(kk) = dIdx; + %ovrdV(kk) = divergeFact; + %hnledAvg(kk) = 1-mean(1-cohedFast(echoBandRange)); + hnledAvg(kk) = 1-mean(1-cohed(echoBandRange)); + hnlxdAvg(kk) = 1-mean(cohxd(echoBandRange)); + %hnlxdAvg(kk) = cohxd(5); + %hnlSortQV(kk) = mean(hnled); + hnlSortQV(kk) = hnlPrefAvgLow; + hnlPrefAvgV(kk) = hnlPrefAvg; + %hnlAvg(kk) = propLow; + %ef(N/2:end) = 0; + %ner = (sum(Sd) ./ (sum(Se.*(hnl.^2)) + 1e-10)); + + % Comfort noise + if (CNon) + snn=sqrt(Sym); + snn(1)=0; % Reject LF noise + Un=snn.*exp(j*2*pi.*[0;rand(N-1,1);0]); + + % Weight comfort noise by suppression + Un = sqrt(1-hnled.^2).*Un; + Fmix = ef + Un; + else + Fmix = ef; + end + + % Overlap and add in time domain for smoothness + tmp = [Fmix ; flipud(conj(Fmix(2:N)))]; + mixw = wins.*real(ifft(tmp)); + mola = mbuf(end-N+1:end) + mixw(1:N); + mbuf = mixw; + ercn(pos:pos+N-1) = mola; + end % NLPon + + % Filter update + %Ek2 = Ek ./(12*pn + 0.001); % Normalized error + %Ek2 = Ek2 * divergeFact; + %Ek2 = Ek ./(pn + 0.001); % Normalized error + %Ek2 = Ek ./(100*pn + 0.001); % Normalized error + + %divergeIdx = find(abs(Ek) > abs(DD)); + %divergeIdx = find(Se > Sd); + %threshMod = threshold*ones(N+1,1); + %if length(divergeIdx) > 0 + %if sum(abs(Ek)) > sum(abs(DD)) + %WFb(divergeIdx,:) = WFb(divergeIdx,:) .* repmat(sqrt(Sd(divergeIdx)./(Se(divergeIdx)+1e-10))),1,M); + %Ek2(divergeIdx) = Ek2(divergeIdx) .* sqrt(Sd(divergeIdx)./(Se(divergeIdx)+1e-10)); + %Ek2(divergeIdx) = Ek2(divergeIdx) .* abs(DD(divergeIdx))./(abs(Ek(divergeIdx))+1e-10); + %WFb(divergeIdx,:) = WFbOld(divergeIdx,:); + %WFb = WFbOld; + %threshMod(divergeIdx) = threshMod(divergeIdx) .* abs(DD(divergeIdx))./(abs(Ek(divergeIdx))+1e-10); + % threshMod(divergeIdx) = threshMod(divergeIdx) .* sqrt(Sd(divergeIdx)./(Se(divergeIdx)+1e-10)); + %end + + %absEf = max(abs(Ek2), threshold); + %absEf = ones(N+1,1)*threshold./absEf; + %absEf = max(abs(Ek2), threshMod); + %absEf = threshMod./absEf; + %Ek2 = Ek2.*absEf; + + %if sum(Se) <= sum(Sd) + + % mEk = mufb.*Ek2; + % PP = conj(XFm).*(ones(M,1) * mEk')'; + % tmp = [PP ; flipud(conj(PP(2:N,:)))]; + % IFPP = real(ifft(tmp)); + % PH = IFPP(1:N,:); + % tmp = fft([PH;zeros(N,M)]); + % FPH = tmp(1:N+1,:); + % %WFbOld = WFb; + % WFb = WFb + FPH; + + %else + % WF = WFbOld; + %end + + % Shift old FFTs + %for m=M:-1:2 + % XFm(:,m) = XFm(:,m-1); + % YFm(:,m) = YFm(:,m-1); + %end + XFm(:,2:end) = XFm(:,1:end-1); + YFm(:,2:end) = YFm(:,1:end-1); + xfwm(:,2:end) = xfwm(:,1:end-1); + dfm(:,2:end) = dfm(:,1:end-1); + + %if mod(kk, floor(Nb/50)) == 0 + % fprintf(1, '.'); + %end + + if mod(kk, floor(Nb/100)) == 0 + %if mod(kk, floor(Nb/500)) == 0 + progressbar(kk/Nb); + %figure(5) + %plot(abs(WFb)); + %legend('1','2','3','4','5','6','7','8','9','10','11','12'); + %title(kk*N/fs); + %figure(6) + %plot(WFbD); + %figure(6) + %plot(threshMod) + %if length(divergeIdx) > 0 + % plot(abs(DD)) + % hold on + % plot(abs(Ek), 'r') + % hold off + %plot(min(sqrt(Sd./(Se+1e-10)),1)) + %axis([0 N 0 1]); + %end + %figure(6) + %plot(cohedFast); + %axis([1 N+1 0 1]); + %plot(WFbEn); + + %figure(7) + %plot(weight); + %plot([cohxd 1-cohed]); + %plot([cohxd 1-cohed 1-cohedFast hnled]); + %plot([cohxd cohxdFast/max(cohxdFast)]); + %legend('cohxd', '1-cohed', '1-cohedFast'); + %axis([1 65 0 1]); + %pause(0.5); + %overdrive + end +end +progressbar(1); + +%figure(2); +%plot([feat(:,1) feat(:,2)+1 feat(:,3)+2 mfeat+3]); +%plot([feat(:,1) mfeat+1]); + +%figure(3); +%plot(10*log10([dri erifb erifb3 ericn])); +%legend('Near-end','Error','Post NLP','Final',4); +% Compensate for delay +%ercn=[ercn(N+1:end);zeros(N,1)]; +%ercn_=[ercn_(N+1:end);zeros(N,1)]; + +%figure(11); +%plot(cohxdSlow); + +%figure(12); +%surf(cohxdSlow); +%shading interp; + +%figure(13); +%plot(overdriveM); + +%figure(14); +%surf(overdriveM); +%shading interp; + +figure(10); +t = (0:Nb)*N/fs; +rrinSubSamp = rrin(N*(1:(Nb+1))); +plot(t, rrinSubSamp/max(abs(rrinSubSamp)),'b'); +hold on +plot(t, hnledAvg, 'r'); +plot(t, hnlxdAvg, 'g'); +plot(t, hnlSortQV, 'y'); +plot(t, hnlLocalMinV, 'k'); +plot(t, cohxdLocalMinV, 'c'); +plot(t, hnlPrefAvgV, 'm'); +%plot(t, cohxdAvg, 'r'); +%plot(cohxdFastAvg, 'r'); +%plot(cohxdAvgBad, 'k'); +%plot(t, cohedAvg, 'k'); +%plot(t, 1-cohedFastAvg, 'k'); +%plot(ssin(N*(1:floor(length(ssin)/N)))/max(abs(ssin))); +%plot(echoBands,'r'); +%plot(overdrive, 'g'); +%plot(erfb(N*(1:floor(length(erfb)/N)))/max(abs(erfb))); +hold off +tightx; + +figure(11) +plot(t, ovrdV); +tightx; +%plot(mfeat,'r'); +%plot(1-cohxyp_,'r'); +%plot(Hnlxydp,'y'); +%plot(hnledp,'k'); +%plot(Hnlxydp, 'c'); +%plot(ccohpd_,'k'); +%plot(supplot_, 'g'); +%plot(ones(length(mfeat),1)*rr1_, 'k'); +%plot(ones(length(mfeat),1)*rr2_, 'k'); +%plot(N*(1:length(feat)), feat); +%plot(Sep_,'r'); +%axis([1 floor(length(erfb)/N) -1 1]) +%hold off +%plot(10*log10([Se_, Sx_, Seu_, real(sf_.*conj(sf_))])); +%legend('Se','Sx','Seu','S'); +%figure(5) +%plot([ercn ercn_]); + +figure(12) +plot(t, dIdxV); +%plot(t, SLxV); +tightx; + +%figure(13) +%plot(t, [ekEnV dkEnV]); +%plot(t, dkEnV./(ekEnV+1e-10)); +%tightx; + +%close(hh); +%spclab(fs,ssin,erfb,ercn,'outxd.pcm'); +%spclab(fs,rrin,ssin,erfb,1.78*ercn,'vqeOut-1.pcm'); +%spclab(fs,erfb,'aecOutLp.pcm'); +%spclab(fs,rrin,ssin,erfb,1.78*ercn,'aecOut25.pcm','vqeOut-1.pcm'); +%spclab(fs,rrin,ssin,erfb,ercn,'aecOut-mba.pcm'); +%spclab(fs,rrin,ssin,erfb,ercn,'aecOut.pcm'); +%spclab(fs, ssin, erfb, ercn, 'out0.pcm'); diff --git a/src/modules/audio_processing/aec/main/source/Android.mk b/src/modules/audio_processing/aec/main/source/Android.mk new file mode 100644 index 0000000000..f16f26b723 --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/Android.mk @@ -0,0 +1,61 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +LOCAL_MODULE := libwebrtc_aec +LOCAL_MODULE_TAGS := optional +LOCAL_GENERATED_SOURCES := +LOCAL_SRC_FILES := \ + echo_cancellation.c \ + resampler.c \ + aec_core.c \ + aec_rdft.c + +# Flags passed to both C and C++ files. +MY_CFLAGS := +MY_CFLAGS_C := +MY_DEFS := '-DNO_TCMALLOC' \ + '-DNO_HEAPCHECKER' \ + '-DWEBRTC_TARGET_PC' \ + '-DWEBRTC_LINUX' \ + '-DWEBRTC_THREAD_RR' +ifeq ($(TARGET_ARCH),arm) +MY_DEFS += \ + '-DWEBRTC_ANDROID' \ + '-DANDROID' +else +LOCAL_SRC_FILES += \ + aec_core_sse2.c \ + aec_rdft_sse2.c +endif +LOCAL_CFLAGS := $(MY_CFLAGS_C) $(MY_CFLAGS) $(MY_DEFS) + +# Include paths placed before CFLAGS/CPPFLAGS +LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../../.. \ + $(LOCAL_PATH)/../interface \ + $(LOCAL_PATH)/../../../utility \ + $(LOCAL_PATH)/../../../../../common_audio/signal_processing_library/main/interface + +# Flags passed to only C++ (and not C) files. +LOCAL_CPPFLAGS := + +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := + +LOCAL_SHARED_LIBRARIES := libcutils \ + libdl \ + libstlport +LOCAL_ADDITIONAL_DEPENDENCIES := + +include external/stlport/libstlport.mk +include $(BUILD_STATIC_LIBRARY) diff --git a/src/modules/audio_processing/aec/main/source/aec.gyp b/src/modules/audio_processing/aec/main/source/aec.gyp new file mode 100644 index 0000000000..0427e0021d --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/aec.gyp @@ -0,0 +1,49 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +{ + 'includes': [ + '../../../../../common_settings.gypi', + ], + 'targets': [ + { + 'target_name': 'aec', + 'type': '<(library)', + 'dependencies': [ + '../../../../../common_audio/signal_processing_library/main/source/spl.gyp:spl', + '../../../utility/util.gyp:apm_util' + ], + 'include_dirs': [ + '../interface', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../interface', + ], + }, + 'sources': [ + '../interface/echo_cancellation.h', + 'echo_cancellation.c', + 'aec_core.c', + 'aec_core_sse2.c', + 'aec_rdft.h', + 'aec_rdft.c', + 'aec_rdft_sse2.c', + 'aec_core.h', + 'resampler.c', + 'resampler.h', + ], + }, + ], +} + +# Local Variables: +# tab-width:2 +# indent-tabs-mode:nil +# End: +# vim: set expandtab tabstop=2 shiftwidth=2: diff --git a/src/modules/audio_processing/aec/main/source/aec_core.c b/src/modules/audio_processing/aec/main/source/aec_core.c new file mode 100644 index 0000000000..81197ea328 --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/aec_core.c @@ -0,0 +1,1456 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, which is presented with time-aligned signals. + */ + +#include <math.h> +#include <stdlib.h> +#include <string.h> + +#include "aec_core.h" +#include "aec_rdft.h" +#include "ring_buffer.h" +#include "system_wrappers/interface/cpu_features_wrapper.h" + +// Noise suppression +static const int converged = 250; + +// Metrics +static const int subCountLen = 4; +static const int countLen = 50; + +// Quantities to control H band scaling for SWB input +static const int flagHbandCn = 1; // flag for adding comfort noise in H band +static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band +// Initial bin for averaging nlp gain in low band +static const int freqAvgIc = PART_LEN / 2; + +/* Matlab code to produce table: +win = sqrt(hanning(63)); win = [0 ; win(1:32)]; +fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); +*/ +/* +static const float sqrtHanning[33] = { + 0.00000000000000, 0.04906767432742, 0.09801714032956, + 0.14673047445536, 0.19509032201613, 0.24298017990326, + 0.29028467725446, 0.33688985339222, 0.38268343236509, + 0.42755509343028, 0.47139673682600, 0.51410274419322, + 0.55557023301960, 0.59569930449243, 0.63439328416365, + 0.67155895484702, 0.70710678118655, 0.74095112535496, + 0.77301045336274, 0.80320753148064, 0.83146961230255, + 0.85772861000027, 0.88192126434835, 0.90398929312344, + 0.92387953251129, 0.94154406518302, 0.95694033573221, + 0.97003125319454, 0.98078528040323, 0.98917650996478, + 0.99518472667220, 0.99879545620517, 1.00000000000000 +}; +*/ + +static const float sqrtHanning[65] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, + 0.07356456359967f, 0.09801714032956f, 0.12241067519922f, + 0.14673047445536f, 0.17096188876030f, 0.19509032201613f, + 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, + 0.35989503653499f, 0.38268343236509f, 0.40524131400499f, + 0.42755509343028f, 0.44961132965461f, 0.47139673682600f, + 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, + 0.61523159058063f, 0.63439328416365f, 0.65317284295378f, + 0.67155895484702f, 0.68954054473707f, 0.70710678118655f, + 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, + 0.81758481315158f, 0.83146961230255f, 0.84485356524971f, + 0.85772861000027f, 0.87008699110871f, 0.88192126434835f, + 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, + 0.94952818059304f, 0.95694033573221f, 0.96377606579544f, + 0.97003125319454f, 0.97570213003853f, 0.98078528040323f, + 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, + 0.99969881869620f, 1.00000000000000f +}; + +/* Matlab code to produce table: +weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1]; +fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve); +*/ +const float WebRtcAec_weightCurve[65] = { + 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, + 0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f, + 0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f, + 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, + 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, + 0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f, + 0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f, + 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, + 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, + 0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f, + 0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f +}; + +/* Matlab code to produce table: +overDriveCurve = [sqrt(linspace(0,1,65))' + 1]; +fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve); +*/ +const float WebRtcAec_overDriveCurve[65] = { + 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, + 1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f, + 1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f, + 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, + 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, + 1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f, + 1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f, + 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, + 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, + 1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f, + 1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f +}; + +// "Private" function prototypes. +static void ProcessBlock(aec_t *aec, const short *farend, + const short *nearend, const short *nearendH, + short *out, short *outH); + +static void BufferFar(aec_t *aec, const short *farend, int farLen); +static void FetchFar(aec_t *aec, short *farend, int farLen, int knownDelay); + +static void NonLinearProcessing(aec_t *aec, short *output, short *outputH); + +static void GetHighbandGain(const float *lambda, float *nlpGainHband); + +// Comfort_noise also computes noise for H band returned in comfortNoiseHband +static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1], + complex_t *comfortNoiseHband, + const float *noisePow, const float *lambda); + +static void WebRtcAec_InitLevel(power_level_t *level); +static void WebRtcAec_InitStats(stats_t *stats); +static void UpdateLevel(power_level_t *level, const short *in); +static void UpdateMetrics(aec_t *aec); + +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) +{ + return aRe * bRe - aIm * bIm; +} + +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) +{ + return aRe * bIm + aIm * bRe; +} + +static int CmpFloat(const void *a, const void *b) +{ + const float *da = (const float *)a; + const float *db = (const float *)b; + + return (*da > *db) - (*da < *db); +} + +int WebRtcAec_CreateAec(aec_t **aecInst) +{ + aec_t *aec = malloc(sizeof(aec_t)); + *aecInst = aec; + if (aec == NULL) { + return -1; + } + + if (WebRtcApm_CreateBuffer(&aec->farFrBuf, FRAME_LEN + PART_LEN) == -1) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } + + if (WebRtcApm_CreateBuffer(&aec->nearFrBuf, FRAME_LEN + PART_LEN) == -1) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } + + if (WebRtcApm_CreateBuffer(&aec->outFrBuf, FRAME_LEN + PART_LEN) == -1) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } + + if (WebRtcApm_CreateBuffer(&aec->nearFrBufH, FRAME_LEN + PART_LEN) == -1) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } + + if (WebRtcApm_CreateBuffer(&aec->outFrBufH, FRAME_LEN + PART_LEN) == -1) { + WebRtcAec_FreeAec(aec); + aec = NULL; + return -1; + } + + return 0; +} + +int WebRtcAec_FreeAec(aec_t *aec) +{ + if (aec == NULL) { + return -1; + } + + WebRtcApm_FreeBuffer(aec->farFrBuf); + WebRtcApm_FreeBuffer(aec->nearFrBuf); + WebRtcApm_FreeBuffer(aec->outFrBuf); + + WebRtcApm_FreeBuffer(aec->nearFrBufH); + WebRtcApm_FreeBuffer(aec->outFrBufH); + + free(aec); + return 0; +} + +static void FilterFar(aec_t *aec, float yf[2][PART_LEN1]) +{ + int i; + for (i = 0; i < NR_PART; i++) { + int j; + int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + aec->xfBufBlockPos >= NR_PART) { + xPos -= NR_PART*(PART_LEN1); + } + + for (j = 0; j < PART_LEN1; j++) { + yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], + aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); + yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], + aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); + } + } +} + +static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1]) +{ + int i; + float absEf; + for (i = 0; i < (PART_LEN1); i++) { + ef[0][i] /= (aec->xPow[i] + 1e-10f); + ef[1][i] /= (aec->xPow[i] + 1e-10f); + absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + + if (absEf > aec->errThresh) { + absEf = aec->errThresh / (absEf + 1e-10f); + ef[0][i] *= absEf; + ef[1][i] *= absEf; + } + + // Stepsize factor + ef[0][i] *= aec->mu; + ef[1][i] *= aec->mu; + } +} + +static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { + int i, j; + for (i = 0; i < NR_PART; i++) { + int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); + int pos; + // Check for wrap + if (i + aec->xfBufBlockPos >= NR_PART) { + xPos -= NR_PART * PART_LEN1; + } + + pos = i * PART_LEN1; + +#ifdef UNCONSTR + for (j = 0; j < PART_LEN1; j++) { + aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0], + -aec->xfBuf[xPos + j][1], + ef[j][0], ef[j][1]); + aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0], + -aec->xfBuf[xPos + j][1], + ef[j][0], ef[j][1]); + } +#else + for (j = 0; j < PART_LEN; j++) { + + fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j], + -aec->xfBuf[1][xPos + j], + ef[0][j], ef[1][j]); + fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j], + -aec->xfBuf[1][xPos + j], + ef[0][j], ef[1][j]); + } + fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], + -aec->xfBuf[1][xPos + PART_LEN], + ef[0][PART_LEN], ef[1][PART_LEN]); + + aec_rdft_inverse_128(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + float scale = 2.0f / PART_LEN2; + for (j = 0; j < PART_LEN; j++) { + fft[j] *= scale; + } + } + aec_rdft_forward_128(fft); + + aec->wfBuf[0][pos] += fft[0]; + aec->wfBuf[0][pos + PART_LEN] += fft[1]; + + for (j = 1; j < PART_LEN; j++) { + aec->wfBuf[0][pos + j] += fft[2 * j]; + aec->wfBuf[1][pos + j] += fft[2 * j + 1]; + } +#endif // UNCONSTR + } +} + +static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]) { + int i; + for (i = 0; i < PART_LEN1; i++) { + // Weight subbands + if (hNl[i] > hNlFb) { + hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + } + hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); + + // Suppress error signal + efw[0][i] *= hNl[i]; + efw[1][i] *= hNl[i]; + + // Ooura fft returns incorrect sign on imaginary component. It matters here + // because we are making an additive change with comfort noise. + efw[1][i] *= -1; + } +} + +WebRtcAec_FilterFar_t WebRtcAec_FilterFar; +WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal; +WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation; +WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress; + +int WebRtcAec_InitAec(aec_t *aec, int sampFreq) +{ + int i; + + aec->sampFreq = sampFreq; + + if (sampFreq == 8000) { + aec->mu = 0.6f; + aec->errThresh = 2e-6f; + } + else { + aec->mu = 0.5f; + aec->errThresh = 1.5e-6f; + } + + if (WebRtcApm_InitBuffer(aec->farFrBuf) == -1) { + return -1; + } + + if (WebRtcApm_InitBuffer(aec->nearFrBuf) == -1) { + return -1; + } + + if (WebRtcApm_InitBuffer(aec->outFrBuf) == -1) { + return -1; + } + + if (WebRtcApm_InitBuffer(aec->nearFrBufH) == -1) { + return -1; + } + + if (WebRtcApm_InitBuffer(aec->outFrBufH) == -1) { + return -1; + } + + // Default target suppression level + aec->targetSupp = -11.5; + aec->minOverDrive = 2.0; + + // Sampling frequency multiplier + // SWB is processed as 160 frame size + if (aec->sampFreq == 32000) { + aec->mult = (short)aec->sampFreq / 16000; + } + else { + aec->mult = (short)aec->sampFreq / 8000; + } + + aec->farBufWritePos = 0; + aec->farBufReadPos = 0; + + aec->inSamples = 0; + aec->outSamples = 0; + aec->knownDelay = 0; + + // Initialize buffers + memset(aec->farBuf, 0, sizeof(aec->farBuf)); + memset(aec->xBuf, 0, sizeof(aec->xBuf)); + memset(aec->dBuf, 0, sizeof(aec->dBuf)); + memset(aec->eBuf, 0, sizeof(aec->eBuf)); + // For H band + memset(aec->dBufH, 0, sizeof(aec->dBufH)); + + memset(aec->xPow, 0, sizeof(aec->xPow)); + memset(aec->dPow, 0, sizeof(aec->dPow)); + memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); + aec->noisePow = aec->dInitMinPow; + aec->noiseEstCtr = 0; + + // Initial comfort noise power + for (i = 0; i < PART_LEN1; i++) { + aec->dMinPow[i] = 1.0e6f; + } + + // Holds the last block written to + aec->xfBufBlockPos = 0; + // TODO: Investigate need for these initializations. Deleting them doesn't + // change the output at all and yields 0.4% overall speedup. + memset(aec->xfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); + memset(aec->wfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); + memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1); + memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1); + memset(aec->xfwBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); + memset(aec->se, 0, sizeof(float) * PART_LEN1); + + // To prevent numerical instability in the first block. + for (i = 0; i < PART_LEN1; i++) { + aec->sd[i] = 1; + } + for (i = 0; i < PART_LEN1; i++) { + aec->sx[i] = 1; + } + + memset(aec->hNs, 0, sizeof(aec->hNs)); + memset(aec->outBuf, 0, sizeof(float) * PART_LEN); + + aec->hNlFbMin = 1; + aec->hNlFbLocalMin = 1; + aec->hNlXdAvgMin = 1; + aec->hNlNewMin = 0; + aec->hNlMinCtr = 0; + aec->overDrive = 2; + aec->overDriveSm = 2; + aec->delayIdx = 0; + aec->stNearState = 0; + aec->echoState = 0; + aec->divergeState = 0; + + aec->seed = 777; + aec->delayEstCtr = 0; + + // Features on by default (G.167) +#ifdef G167 + aec->adaptToggle = 1; + aec->nlpToggle = 1; + aec->cnToggle = 1; +#endif + + // Metrics disabled by default + aec->metricsMode = 0; + WebRtcAec_InitMetrics(aec); + + // Assembly optimization + WebRtcAec_FilterFar = FilterFar; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; + WebRtcAec_FilterAdaptation = FilterAdaptation; + WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; + if (WebRtc_GetCPUInfo(kSSE2)) { +#if defined(__SSE2__) + WebRtcAec_InitAec_SSE2(); +#endif + } + aec_rdft_init(); + + return 0; +} + +void WebRtcAec_InitMetrics(aec_t *aec) +{ + aec->stateCounter = 0; + WebRtcAec_InitLevel(&aec->farlevel); + WebRtcAec_InitLevel(&aec->nearlevel); + WebRtcAec_InitLevel(&aec->linoutlevel); + WebRtcAec_InitLevel(&aec->nlpoutlevel); + + WebRtcAec_InitStats(&aec->erl); + WebRtcAec_InitStats(&aec->erle); + WebRtcAec_InitStats(&aec->aNlp); + WebRtcAec_InitStats(&aec->rerl); +} + + +void WebRtcAec_ProcessFrame(aec_t *aec, const short *farend, + const short *nearend, const short *nearendH, + short *out, short *outH, + int knownDelay) +{ + short farBl[PART_LEN], nearBl[PART_LEN], outBl[PART_LEN]; + short farFr[FRAME_LEN]; + // For H band + short nearBlH[PART_LEN], outBlH[PART_LEN]; + + int size = 0; + + // initialize: only used for SWB + memset(nearBlH, 0, sizeof(nearBlH)); + memset(outBlH, 0, sizeof(outBlH)); + + // Buffer the current frame. + // Fetch an older one corresponding to the delay. + BufferFar(aec, farend, FRAME_LEN); + FetchFar(aec, farFr, FRAME_LEN, knownDelay); + + // Buffer the synchronized far and near frames, + // to pass the smaller blocks individually. + WebRtcApm_WriteBuffer(aec->farFrBuf, farFr, FRAME_LEN); + WebRtcApm_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN); + // For H band + if (aec->sampFreq == 32000) { + WebRtcApm_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN); + } + + // Process as many blocks as possible. + while (WebRtcApm_get_buffer_size(aec->farFrBuf) >= PART_LEN) { + + WebRtcApm_ReadBuffer(aec->farFrBuf, farBl, PART_LEN); + WebRtcApm_ReadBuffer(aec->nearFrBuf, nearBl, PART_LEN); + + // For H band + if (aec->sampFreq == 32000) { + WebRtcApm_ReadBuffer(aec->nearFrBufH, nearBlH, PART_LEN); + } + + ProcessBlock(aec, farBl, nearBl, nearBlH, outBl, outBlH); + + WebRtcApm_WriteBuffer(aec->outFrBuf, outBl, PART_LEN); + // For H band + if (aec->sampFreq == 32000) { + WebRtcApm_WriteBuffer(aec->outFrBufH, outBlH, PART_LEN); + } + } + + // Stuff the out buffer if we have less than a frame to output. + // This should only happen for the first frame. + size = WebRtcApm_get_buffer_size(aec->outFrBuf); + if (size < FRAME_LEN) { + WebRtcApm_StuffBuffer(aec->outFrBuf, FRAME_LEN - size); + if (aec->sampFreq == 32000) { + WebRtcApm_StuffBuffer(aec->outFrBufH, FRAME_LEN - size); + } + } + + // Obtain an output frame. + WebRtcApm_ReadBuffer(aec->outFrBuf, out, FRAME_LEN); + // For H band + if (aec->sampFreq == 32000) { + WebRtcApm_ReadBuffer(aec->outFrBufH, outH, FRAME_LEN); + } +} + +static void ProcessBlock(aec_t *aec, const short *farend, + const short *nearend, const short *nearendH, + short *output, short *outputH) +{ + int i; + float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN]; + short eInt16[PART_LEN]; + float scale; + + float fft[PART_LEN2]; + float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; + complex_t df[PART_LEN1]; + + const float gPow[2] = {0.9f, 0.1f}; + + // Noise estimate constants. + const int noiseInitBlocks = 500 * aec->mult; + const float step = 0.1f; + const float ramp = 1.0002f; + const float gInitNoise[2] = {0.999f, 0.001f}; + +#ifdef AEC_DEBUG + fwrite(farend, sizeof(short), PART_LEN, aec->farFile); + fwrite(nearend, sizeof(short), PART_LEN, aec->nearFile); +#endif + + memset(dH, 0, sizeof(dH)); + + // ---------- Ooura fft ---------- + // Concatenate old and new farend blocks. + for (i = 0; i < PART_LEN; i++) { + aec->xBuf[i + PART_LEN] = (float)farend[i]; + d[i] = (float)nearend[i]; + } + + if (aec->sampFreq == 32000) { + for (i = 0; i < PART_LEN; i++) { + dH[i] = (float)nearendH[i]; + } + } + + + memcpy(fft, aec->xBuf, sizeof(float) * PART_LEN2); + memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN); + // For H band + if (aec->sampFreq == 32000) { + memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN); + } + + aec_rdft_forward_128(fft); + + // Far fft + xf[1][0] = 0; + xf[1][PART_LEN] = 0; + xf[0][0] = fft[0]; + xf[0][PART_LEN] = fft[1]; + + for (i = 1; i < PART_LEN; i++) { + xf[0][i] = fft[2 * i]; + xf[1][i] = fft[2 * i + 1]; + } + + // Near fft + memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); + aec_rdft_forward_128(fft); + df[0][1] = 0; + df[PART_LEN][1] = 0; + df[0][0] = fft[0]; + df[PART_LEN][0] = fft[1]; + + for (i = 1; i < PART_LEN; i++) { + df[i][0] = fft[2 * i]; + df[i][1] = fft[2 * i + 1]; + } + + // Power smoothing + for (i = 0; i < PART_LEN1; i++) { + aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * + (xf[0][i] * xf[0][i] + xf[1][i] * xf[1][i]); + aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * + (df[i][0] * df[i][0] + df[i][1] * df[i][1]); + } + + // Estimate noise power. Wait until dPow is more stable. + if (aec->noiseEstCtr > 50) { + for (i = 0; i < PART_LEN1; i++) { + if (aec->dPow[i] < aec->dMinPow[i]) { + aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] - + aec->dPow[i])) * ramp; + } + else { + aec->dMinPow[i] *= ramp; + } + } + } + + // Smooth increasing noise power from zero at the start, + // to avoid a sudden burst of comfort noise. + if (aec->noiseEstCtr < noiseInitBlocks) { + aec->noiseEstCtr++; + for (i = 0; i < PART_LEN1; i++) { + if (aec->dMinPow[i] > aec->dInitMinPow[i]) { + aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + + gInitNoise[1] * aec->dMinPow[i]; + } + else { + aec->dInitMinPow[i] = aec->dMinPow[i]; + } + } + aec->noisePow = aec->dInitMinPow; + } + else { + aec->noisePow = aec->dMinPow; + } + + + // Update the xfBuf block position. + aec->xfBufBlockPos--; + if (aec->xfBufBlockPos == -1) { + aec->xfBufBlockPos = NR_PART - 1; + } + + // Buffer xf + memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf[0], + sizeof(float) * PART_LEN1); + memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, xf[1], + sizeof(float) * PART_LEN1); + + memset(yf[0], 0, sizeof(float) * (PART_LEN1 * 2)); + + // Filter far + WebRtcAec_FilterFar(aec, yf); + + // Inverse fft to obtain echo estimate and error. + fft[0] = yf[0][0]; + fft[1] = yf[0][PART_LEN]; + for (i = 1; i < PART_LEN; i++) { + fft[2 * i] = yf[0][i]; + fft[2 * i + 1] = yf[1][i]; + } + aec_rdft_inverse_128(fft); + + scale = 2.0f / PART_LEN2; + for (i = 0; i < PART_LEN; i++) { + y[i] = fft[PART_LEN + i] * scale; // fft scaling + } + + for (i = 0; i < PART_LEN; i++) { + e[i] = d[i] - y[i]; + } + + // Error fft + memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); + memset(fft, 0, sizeof(float) * PART_LEN); + memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); + aec_rdft_forward_128(fft); + + ef[1][0] = 0; + ef[1][PART_LEN] = 0; + ef[0][0] = fft[0]; + ef[0][PART_LEN] = fft[1]; + for (i = 1; i < PART_LEN; i++) { + ef[0][i] = fft[2 * i]; + ef[1][i] = fft[2 * i + 1]; + } + + // Scale error signal inversely with far power. + WebRtcAec_ScaleErrorSignal(aec, ef); +#ifdef G167 + if (aec->adaptToggle) { +#endif + // Filter adaptation + WebRtcAec_FilterAdaptation(aec, fft, ef); +#ifdef G167 + } +#endif + + NonLinearProcessing(aec, output, outputH); + +#if defined(AEC_DEBUG) || defined(G167) + for (i = 0; i < PART_LEN; i++) { + eInt16[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i], + WEBRTC_SPL_WORD16_MIN); + } +#endif +#ifdef G167 + if (aec->nlpToggle == 0) { + memcpy(output, eInt16, sizeof(eInt16)); + } +#endif + + if (aec->metricsMode == 1) { + for (i = 0; i < PART_LEN; i++) { + eInt16[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i], + WEBRTC_SPL_WORD16_MIN); + } + + // Update power levels and echo metrics + UpdateLevel(&aec->farlevel, farend); + UpdateLevel(&aec->nearlevel, nearend); + UpdateLevel(&aec->linoutlevel, eInt16); + UpdateLevel(&aec->nlpoutlevel, output); + UpdateMetrics(aec); + } + +#ifdef AEC_DEBUG + fwrite(eInt16, sizeof(short), PART_LEN, aec->outLpFile); + fwrite(output, sizeof(short), PART_LEN, aec->outFile); +#endif +} + +static void NonLinearProcessing(aec_t *aec, short *output, short *outputH) +{ + float efw[2][PART_LEN1], dfw[2][PART_LEN1]; + complex_t xfw[PART_LEN1]; + complex_t comfortNoiseHband[PART_LEN1]; + float fft[PART_LEN2]; + float scale, dtmp; + float nlpGainHband; + int i, j, pos; + + // Coherence and non-linear filter + float cohde[PART_LEN1], cohxd[PART_LEN1]; + float hNlDeAvg, hNlXdAvg; + float hNl[PART_LEN1]; + float hNlPref[PREF_BAND_SIZE]; + float hNlFb = 0, hNlFbLow = 0; + const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; + const int prefBandSize = PREF_BAND_SIZE / aec->mult; + const int minPrefBand = 4 / aec->mult; + + // Near and error power sums + float sdSum = 0, seSum = 0; + + // Power estimate smoothing coefficients + const float gCoh[2][2] = {{0.9f, 0.1f}, {0.93f, 0.07f}}; + const float *ptrGCoh = gCoh[aec->mult - 1]; + + // Filter energey + float wfEnMax = 0, wfEn = 0; + const int delayEstInterval = 10 * aec->mult; + + aec->delayEstCtr++; + if (aec->delayEstCtr == delayEstInterval) { + aec->delayEstCtr = 0; + } + + // initialize comfort noise for H band + memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); + nlpGainHband = (float)0.0; + dtmp = (float)0.0; + + // Measure energy in each filter partition to determine delay. + // TODO: Spread by computing one partition per block? + if (aec->delayEstCtr == 0) { + wfEnMax = 0; + aec->delayIdx = 0; + for (i = 0; i < NR_PART; i++) { + pos = i * PART_LEN1; + wfEn = 0; + for (j = 0; j < PART_LEN1; j++) { + wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + + aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; + } + + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + aec->delayIdx = i; + } + } + } + + // NLP + // Windowed far fft + for (i = 0; i < PART_LEN; i++) { + fft[i] = aec->xBuf[i] * sqrtHanning[i]; + fft[PART_LEN + i] = aec->xBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; + } + aec_rdft_forward_128(fft); + + xfw[0][1] = 0; + xfw[PART_LEN][1] = 0; + xfw[0][0] = fft[0]; + xfw[PART_LEN][0] = fft[1]; + for (i = 1; i < PART_LEN; i++) { + xfw[i][0] = fft[2 * i]; + xfw[i][1] = fft[2 * i + 1]; + } + + // Buffer far. + memcpy(aec->xfwBuf, xfw, sizeof(xfw)); + + // Use delayed far. + memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw)); + + // Windowed near fft + for (i = 0; i < PART_LEN; i++) { + fft[i] = aec->dBuf[i] * sqrtHanning[i]; + fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; + } + aec_rdft_forward_128(fft); + + dfw[1][0] = 0; + dfw[1][PART_LEN] = 0; + dfw[0][0] = fft[0]; + dfw[0][PART_LEN] = fft[1]; + for (i = 1; i < PART_LEN; i++) { + dfw[0][i] = fft[2 * i]; + dfw[1][i] = fft[2 * i + 1]; + } + + // Windowed error fft + for (i = 0; i < PART_LEN; i++) { + fft[i] = aec->eBuf[i] * sqrtHanning[i]; + fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; + } + aec_rdft_forward_128(fft); + efw[1][0] = 0; + efw[1][PART_LEN] = 0; + efw[0][0] = fft[0]; + efw[0][PART_LEN] = fft[1]; + for (i = 1; i < PART_LEN; i++) { + efw[0][i] = fft[2 * i]; + efw[1][i] = fft[2 * i + 1]; + } + + // Smoothed PSD + for (i = 0; i < PART_LEN1; i++) { + aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] * + (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] * + (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO: investigate further why this is so sensitive. + aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] * + WEBRTC_SPL_MAX(xfw[i][0] * xfw[i][0] + xfw[i][1] * xfw[i][1], 15); + + aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] * + (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] * + (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + + aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] * + (dfw[0][i] * xfw[i][0] + dfw[1][i] * xfw[i][1]); + aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] * + (dfw[0][i] * xfw[i][1] - dfw[1][i] * xfw[i][0]); + + sdSum += aec->sd[i]; + seSum += aec->se[i]; + } + + // Divergent filter safeguard. + if (aec->divergeState == 0) { + if (seSum > sdSum) { + aec->divergeState = 1; + } + } + else { + if (seSum * 1.05f < sdSum) { + aec->divergeState = 0; + } + } + + if (aec->divergeState == 1) { + memcpy(efw, dfw, sizeof(efw)); + } + + // Reset if error is significantly larger than nearend (13 dB). + if (seSum > (19.95f * sdSum)) { + memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); + } + + // Subband coherence + for (i = 0; i < PART_LEN1; i++) { + cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / + (aec->sd[i] * aec->se[i] + 1e-10f); + cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / + (aec->sx[i] * aec->sd[i] + 1e-10f); + } + + hNlXdAvg = 0; + for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { + hNlXdAvg += cohxd[i]; + } + hNlXdAvg /= prefBandSize; + hNlXdAvg = 1 - hNlXdAvg; + + hNlDeAvg = 0; + for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { + hNlDeAvg += cohde[i]; + } + hNlDeAvg /= prefBandSize; + + if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { + aec->hNlXdAvgMin = hNlXdAvg; + } + + if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { + aec->stNearState = 1; + } + else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { + aec->stNearState = 0; + } + + if (aec->hNlXdAvgMin == 1) { + aec->echoState = 0; + aec->overDrive = aec->minOverDrive; + + if (aec->stNearState == 1) { + memcpy(hNl, cohde, sizeof(hNl)); + hNlFb = hNlDeAvg; + hNlFbLow = hNlDeAvg; + } + else { + for (i = 0; i < PART_LEN1; i++) { + hNl[i] = 1 - cohxd[i]; + } + hNlFb = hNlXdAvg; + hNlFbLow = hNlXdAvg; + } + } + else { + + if (aec->stNearState == 1) { + aec->echoState = 0; + memcpy(hNl, cohde, sizeof(hNl)); + hNlFb = hNlDeAvg; + hNlFbLow = hNlDeAvg; + } + else { + aec->echoState = 1; + for (i = 0; i < PART_LEN1; i++) { + hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); + } + + // Select an order statistic from the preferred bands. + // TODO: Using quicksort now, but a selection algorithm may be preferred. + memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); + qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); + hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))]; + hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))]; + } + } + + // Track the local filter minimum to determine suppression overdrive. + if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { + aec->hNlFbLocalMin = hNlFbLow; + aec->hNlFbMin = hNlFbLow; + aec->hNlNewMin = 1; + aec->hNlMinCtr = 0; + } + aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); + aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); + + if (aec->hNlNewMin == 1) { + aec->hNlMinCtr++; + } + if (aec->hNlMinCtr == 2) { + aec->hNlNewMin = 0; + aec->hNlMinCtr = 0; + aec->overDrive = WEBRTC_SPL_MAX(aec->targetSupp / + ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), aec->minOverDrive); + } + + // Smooth the overdrive. + if (aec->overDrive < aec->overDriveSm) { + aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; + } + else { + aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; + } + + WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); + +#ifdef G167 + if (aec->cnToggle) { + ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); + } +#else + // Add comfort noise. + ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); +#endif + + // Inverse error fft. + fft[0] = efw[0][0]; + fft[1] = efw[0][PART_LEN]; + for (i = 1; i < PART_LEN; i++) { + fft[2*i] = efw[0][i]; + // Sign change required by Ooura fft. + fft[2*i + 1] = -efw[1][i]; + } + aec_rdft_inverse_128(fft); + + // Overlap and add to obtain output. + scale = 2.0f / PART_LEN2; + for (i = 0; i < PART_LEN; i++) { + fft[i] *= scale; // fft scaling + fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i]; + + // Saturation protection + output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i], + WEBRTC_SPL_WORD16_MIN); + + fft[PART_LEN + i] *= scale; // fft scaling + aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i]; + } + + // For H band + if (aec->sampFreq == 32000) { + + // H band gain + // average nlp over low band: average over second half of freq spectrum + // (4->8khz) + GetHighbandGain(hNl, &nlpGainHband); + + // Inverse comfort_noise + if (flagHbandCn == 1) { + fft[0] = comfortNoiseHband[0][0]; + fft[1] = comfortNoiseHband[PART_LEN][0]; + for (i = 1; i < PART_LEN; i++) { + fft[2*i] = comfortNoiseHband[i][0]; + fft[2*i + 1] = comfortNoiseHband[i][1]; + } + aec_rdft_inverse_128(fft); + scale = 2.0f / PART_LEN2; + } + + // compute gain factor + for (i = 0; i < PART_LEN; i++) { + dtmp = (float)aec->dBufH[i]; + dtmp = (float)dtmp * nlpGainHband; // for variable gain + + // add some comfort noise where Hband is attenuated + if (flagHbandCn == 1) { + fft[i] *= scale; // fft scaling + dtmp += cnScaleHband * fft[i]; + } + + // Saturation protection + outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp, + WEBRTC_SPL_WORD16_MIN); + } + } + + // Copy the current block to the old position. + memcpy(aec->xBuf, aec->xBuf + PART_LEN, sizeof(float) * PART_LEN); + memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); + memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); + + // Copy the current block to the old position for H band + if (aec->sampFreq == 32000) { + memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN); + } + + memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) - + sizeof(complex_t) * PART_LEN1); +} + +static void GetHighbandGain(const float *lambda, float *nlpGainHband) +{ + int i; + + nlpGainHband[0] = (float)0.0; + for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { + nlpGainHband[0] += lambda[i]; + } + nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); +} + +static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1], + complex_t *comfortNoiseHband, const float *noisePow, const float *lambda) +{ + int i, num; + float rand[PART_LEN]; + float noise, noiseAvg, tmp, tmpAvg; + WebRtc_Word16 randW16[PART_LEN]; + complex_t u[PART_LEN1]; + + const float pi2 = 6.28318530717959f; + + // Generate a uniform random array on [0 1] + WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); + for (i = 0; i < PART_LEN; i++) { + rand[i] = ((float)randW16[i]) / 32768; + } + + // Reject LF noise + u[0][0] = 0; + u[0][1] = 0; + for (i = 1; i < PART_LEN1; i++) { + tmp = pi2 * rand[i - 1]; + + noise = sqrtf(noisePow[i]); + u[i][0] = noise * (float)cos(tmp); + u[i][1] = -noise * (float)sin(tmp); + } + u[PART_LEN][1] = 0; + + for (i = 0; i < PART_LEN1; i++) { + // This is the proper weighting to match the background noise power + tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); + //tmp = 1 - lambda[i]; + efw[0][i] += tmp * u[i][0]; + efw[1][i] += tmp * u[i][1]; + } + + // For H band comfort noise + // TODO: don't compute noise and "tmp" twice. Use the previous results. + noiseAvg = 0.0; + tmpAvg = 0.0; + num = 0; + if (aec->sampFreq == 32000 && flagHbandCn == 1) { + + // average noise scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + noiseAvg += sqrtf(noisePow[i]); + } + noiseAvg /= (float)num; + + // average nlp scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + num = 0; + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); + } + tmpAvg /= (float)num; + + // Use average noise for H band + // TODO: we should probably have a new random vector here. + // Reject LF noise + u[0][0] = 0; + u[0][1] = 0; + for (i = 1; i < PART_LEN1; i++) { + tmp = pi2 * rand[i - 1]; + + // Use average noise for H band + u[i][0] = noiseAvg * (float)cos(tmp); + u[i][1] = -noiseAvg * (float)sin(tmp); + } + u[PART_LEN][1] = 0; + + for (i = 0; i < PART_LEN1; i++) { + // Use average NLP weight for H band + comfortNoiseHband[i][0] = tmpAvg * u[i][0]; + comfortNoiseHband[i][1] = tmpAvg * u[i][1]; + } + } +} + +// Buffer the farend to account for knownDelay +static void BufferFar(aec_t *aec, const short *farend, int farLen) +{ + int writeLen = farLen, writePos = 0; + + // Check if the write position must be wrapped. + while (aec->farBufWritePos + writeLen > FAR_BUF_LEN) { + + // Write to remaining buffer space before wrapping. + writeLen = FAR_BUF_LEN - aec->farBufWritePos; + memcpy(aec->farBuf + aec->farBufWritePos, farend + writePos, + sizeof(short) * writeLen); + aec->farBufWritePos = 0; + writePos = writeLen; + writeLen = farLen - writeLen; + } + + memcpy(aec->farBuf + aec->farBufWritePos, farend + writePos, + sizeof(short) * writeLen); + aec->farBufWritePos += writeLen; +} + +static void FetchFar(aec_t *aec, short *farend, int farLen, int knownDelay) +{ + int readLen = farLen, readPos = 0, delayChange = knownDelay - aec->knownDelay; + + aec->farBufReadPos -= delayChange; + + // Check if delay forces a read position wrap. + while(aec->farBufReadPos < 0) { + aec->farBufReadPos += FAR_BUF_LEN; + } + while(aec->farBufReadPos > FAR_BUF_LEN - 1) { + aec->farBufReadPos -= FAR_BUF_LEN; + } + + aec->knownDelay = knownDelay; + + // Check if read position must be wrapped. + while (aec->farBufReadPos + readLen > FAR_BUF_LEN) { + + // Read from remaining buffer space before wrapping. + readLen = FAR_BUF_LEN - aec->farBufReadPos; + memcpy(farend + readPos, aec->farBuf + aec->farBufReadPos, + sizeof(short) * readLen); + aec->farBufReadPos = 0; + readPos = readLen; + readLen = farLen - readLen; + } + memcpy(farend + readPos, aec->farBuf + aec->farBufReadPos, + sizeof(short) * readLen); + aec->farBufReadPos += readLen; +} + +static void WebRtcAec_InitLevel(power_level_t *level) +{ + const float bigFloat = 1E17f; + + level->averagelevel = 0; + level->framelevel = 0; + level->minlevel = bigFloat; + level->frsum = 0; + level->sfrsum = 0; + level->frcounter = 0; + level->sfrcounter = 0; +} + +static void WebRtcAec_InitStats(stats_t *stats) +{ + stats->instant = offsetLevel; + stats->average = offsetLevel; + stats->max = offsetLevel; + stats->min = offsetLevel * (-1); + stats->sum = 0; + stats->hisum = 0; + stats->himean = offsetLevel; + stats->counter = 0; + stats->hicounter = 0; +} + +static void UpdateLevel(power_level_t *level, const short *in) +{ + int k; + + for (k = 0; k < PART_LEN; k++) { + level->sfrsum += in[k] * in[k]; + } + level->sfrcounter++; + + if (level->sfrcounter > subCountLen) { + level->framelevel = level->sfrsum / (subCountLen * PART_LEN); + level->sfrsum = 0; + level->sfrcounter = 0; + + if (level->framelevel > 0) { + if (level->framelevel < level->minlevel) { + level->minlevel = level->framelevel; // New minimum + } else { + level->minlevel *= (1 + 0.001f); // Small increase + } + } + level->frcounter++; + level->frsum += level->framelevel; + + if (level->frcounter > countLen) { + level->averagelevel = level->frsum / countLen; + level->frsum = 0; + level->frcounter = 0; + } + + } +} + +static void UpdateMetrics(aec_t *aec) +{ + float dtmp, dtmp2, dtmp3; + + const float actThresholdNoisy = 8.0f; + const float actThresholdClean = 40.0f; + const float safety = 0.99995f; + const float noisyPower = 300000.0f; + + float actThreshold; + float echo, suppressedEcho; + + if (aec->echoState) { // Check if echo is likely present + aec->stateCounter++; + } + + if (aec->farlevel.frcounter == countLen) { + + if (aec->farlevel.minlevel < noisyPower) { + actThreshold = actThresholdClean; + } + else { + actThreshold = actThresholdNoisy; + } + + if ((aec->stateCounter > (0.5f * countLen * subCountLen)) + && (aec->farlevel.sfrcounter == 0) + + // Estimate in active far-end segments only + && (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel)) + ) { + + // Subtract noise power + echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; + + // ERL + dtmp = 10 * (float)log10(aec->farlevel.averagelevel / + aec->nearlevel.averagelevel + 1e-10f); + dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f); + + aec->erl.instant = dtmp; + if (dtmp > aec->erl.max) { + aec->erl.max = dtmp; + } + + if (dtmp < aec->erl.min) { + aec->erl.min = dtmp; + } + + aec->erl.counter++; + aec->erl.sum += dtmp; + aec->erl.average = aec->erl.sum / aec->erl.counter; + + // Upper mean + if (dtmp > aec->erl.average) { + aec->erl.hicounter++; + aec->erl.hisum += dtmp; + aec->erl.himean = aec->erl.hisum / aec->erl.hicounter; + } + + // A_NLP + dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / + aec->linoutlevel.averagelevel + 1e-10f); + + // subtract noise power + suppressedEcho = aec->linoutlevel.averagelevel - safety * aec->linoutlevel.minlevel; + + dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); + dtmp3 = 10 * (float)log10(aec->nearlevel.averagelevel / suppressedEcho + 1e-10f); + + aec->aNlp.instant = dtmp2; + if (dtmp > aec->aNlp.max) { + aec->aNlp.max = dtmp; + } + + if (dtmp < aec->aNlp.min) { + aec->aNlp.min = dtmp; + } + + aec->aNlp.counter++; + aec->aNlp.sum += dtmp; + aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter; + + // Upper mean + if (dtmp > aec->aNlp.average) { + aec->aNlp.hicounter++; + aec->aNlp.hisum += dtmp; + aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter; + } + + // ERLE + + // subtract noise power + suppressedEcho = aec->nlpoutlevel.averagelevel - safety * aec->nlpoutlevel.minlevel; + + dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / + aec->nlpoutlevel.averagelevel + 1e-10f); + dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); + + dtmp = dtmp2; + aec->erle.instant = dtmp; + if (dtmp > aec->erle.max) { + aec->erle.max = dtmp; + } + + if (dtmp < aec->erle.min) { + aec->erle.min = dtmp; + } + + aec->erle.counter++; + aec->erle.sum += dtmp; + aec->erle.average = aec->erle.sum / aec->erle.counter; + + // Upper mean + if (dtmp > aec->erle.average) { + aec->erle.hicounter++; + aec->erle.hisum += dtmp; + aec->erle.himean = aec->erle.hisum / aec->erle.hicounter; + } + } + + aec->stateCounter = 0; + } +} + diff --git a/src/modules/audio_processing/aec/main/source/aec_core.h b/src/modules/audio_processing/aec/main/source/aec_core.h new file mode 100644 index 0000000000..3386b92fca --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/aec_core.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Specifies the interface for the AEC core. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_ + +#include <stdio.h> +#include "typedefs.h" +#include "signal_processing_library.h" + +//#define G167 // for running G167 tests +//#define UNCONSTR // time-unconstrained filter +//#define AEC_DEBUG // for recording files + +#define FRAME_LEN 80 +#define PART_LEN 64 // Length of partition +#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients +#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2 +#define NR_PART 12 // Number of partitions +#define FILT_LEN (PART_LEN * NR_PART) // Filter length +#define FILT_LEN2 (FILT_LEN * 2) // Double filter length +#define FAR_BUF_LEN (FILT_LEN2 * 2) +#define PREF_BAND_SIZE 24 + +#define BLOCKL_MAX FRAME_LEN + +typedef float complex_t[2]; +// For performance reasons, some arrays of complex numbers are replaced by twice +// as long arrays of float, all the real parts followed by all the imaginary +// ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and +// is better than two arrays (one for the real parts and one for the imaginary +// parts) as this other way would require two pointers instead of one and cause +// extra register spilling. This also allows the offsets to be calculated at +// compile time. + +// Metrics +enum {offsetLevel = -100}; + +typedef struct { + float sfrsum; + int sfrcounter; + float framelevel; + float frsum; + int frcounter; + float minlevel; + float averagelevel; +} power_level_t; + +typedef struct { + float instant; + float average; + float min; + float max; + float sum; + float hisum; + float himean; + int counter; + int hicounter; +} stats_t; + +typedef struct { + int farBufWritePos, farBufReadPos; + + int knownDelay; + int inSamples, outSamples; + int delayEstCtr; + + void *farFrBuf, *nearFrBuf, *outFrBuf; + + void *nearFrBufH; + void *outFrBufH; + + float xBuf[PART_LEN2]; // farend + float dBuf[PART_LEN2]; // nearend + float eBuf[PART_LEN2]; // error + + float dBufH[PART_LEN2]; // nearend + + float xPow[PART_LEN1]; + float dPow[PART_LEN1]; + float dMinPow[PART_LEN1]; + float dInitMinPow[PART_LEN1]; + float *noisePow; +#ifdef FFTW + float fftR[PART_LEN2]; + fftw_complex fftC[PART_LEN2]; + fftw_plan fftPlan, ifftPlan; + + fftw_complex xfBuf[NR_PART * PART_LEN1]; + fftw_complex wfBuf[NR_PART * PART_LEN1]; + fftw_complex sde[PART_LEN1]; +#else + float xfBuf[2][NR_PART * PART_LEN1]; // farend fft buffer + float wfBuf[2][NR_PART * PART_LEN1]; // filter fft + complex_t sde[PART_LEN1]; // cross-psd of nearend and error + complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend + complex_t xfwBuf[NR_PART * PART_LEN1]; // farend windowed fft buffer +#endif + float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near and error psd + float hNs[PART_LEN1]; + float hNlFbMin, hNlFbLocalMin; + float hNlXdAvgMin; + int hNlNewMin, hNlMinCtr; + float overDrive, overDriveSm; + float targetSupp, minOverDrive; + float outBuf[PART_LEN]; + int delayIdx; + + short stNearState, echoState; + short divergeState; + + int xfBufBlockPos; + + short farBuf[FILT_LEN2 * 2]; + + short mult; // sampling frequency multiple + int sampFreq; + WebRtc_UWord32 seed; + + float mu; // stepsize + float errThresh; // error threshold + + int noiseEstCtr; + + // Toggles for G.167 testing +#ifdef G167 + short adaptToggle; // Filter adaptation + short nlpToggle; // Nonlinear processing + short cnToggle; // Comfort noise +#endif + + power_level_t farlevel; + power_level_t nearlevel; + power_level_t linoutlevel; + power_level_t nlpoutlevel; + + int metricsMode; + int stateCounter; + stats_t erl; + stats_t erle; + stats_t aNlp; + stats_t rerl; + + // Quantities to control H band scaling for SWB input + int freq_avg_ic; //initial bin for averaging nlp gain + int flag_Hband_cn; //for comfort noise + float cn_scale_Hband; //scale for comfort noise in H band + +#ifdef AEC_DEBUG + FILE *farFile; + FILE *nearFile; + FILE *outFile; + FILE *outLpFile; +#endif +} aec_t; + +typedef void (*WebRtcAec_FilterFar_t)(aec_t *aec, float yf[2][PART_LEN1]); +extern WebRtcAec_FilterFar_t WebRtcAec_FilterFar; +typedef void (*WebRtcAec_ScaleErrorSignal_t)(aec_t *aec, float ef[2][PART_LEN1]); +extern WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal; +#define IP_LEN PART_LEN // this must be at least ceil(2 + sqrt(PART_LEN)) +#define W_LEN PART_LEN +typedef void (*WebRtcAec_FilterAdaptation_t) + (aec_t *aec, float *fft, float ef[2][PART_LEN1]); +extern WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation; +typedef void (*WebRtcAec_OverdriveAndSuppress_t) + (aec_t *aec, float hNl[PART_LEN1], const float hNlFb, float efw[2][PART_LEN1]); +extern WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress; + +int WebRtcAec_CreateAec(aec_t **aec); +int WebRtcAec_FreeAec(aec_t *aec); +int WebRtcAec_InitAec(aec_t *aec, int sampFreq); +void WebRtcAec_InitAec_SSE2(void); + +void WebRtcAec_InitMetrics(aec_t *aec); +void WebRtcAec_ProcessFrame(aec_t *aec, const short *farend, + const short *nearend, const short *nearendH, + short *out, short *outH, + int knownDelay); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_ + diff --git a/src/modules/audio_processing/aec/main/source/aec_core_sse2.c b/src/modules/audio_processing/aec/main/source/aec_core_sse2.c new file mode 100644 index 0000000000..524669fe90 --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/aec_core_sse2.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, SSE2 version of speed-critical functions. + */ + +#if defined(__SSE2__) +#include <emmintrin.h> +#include <math.h> + +#include "aec_core.h" +#include "aec_rdft.h" + +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) +{ + return aRe * bRe - aIm * bIm; +} + +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) +{ + return aRe * bIm + aIm * bRe; +} + +static void FilterFarSSE2(aec_t *aec, float yf[2][PART_LEN1]) +{ + int i; + for (i = 0; i < NR_PART; i++) { + int j; + int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + aec->xfBufBlockPos >= NR_PART) { + xPos -= NR_PART*(PART_LEN1); + } + + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); + const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); + const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); + const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); + const __m128 yf_re = _mm_loadu_ps(&yf[0][j]); + const __m128 yf_im = _mm_loadu_ps(&yf[1][j]); + const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re); + const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im); + const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im); + const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re); + const __m128 e = _mm_sub_ps(a, b); + const __m128 f = _mm_add_ps(c, d); + const __m128 g = _mm_add_ps(yf_re, e); + const __m128 h = _mm_add_ps(yf_im, f); + _mm_storeu_ps(&yf[0][j], g); + _mm_storeu_ps(&yf[1][j], h); + } + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], + aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); + yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], + aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); + } + } +} + +static void ScaleErrorSignalSSE2(aec_t *aec, float ef[2][PART_LEN1]) +{ + const __m128 k1e_10f = _mm_set1_ps(1e-10f); + const __m128 kThresh = _mm_set1_ps(aec->errThresh); + const __m128 kMu = _mm_set1_ps(aec->mu); + + int i; + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]); + const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]); + const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]); + + const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f); + __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus); + __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus); + const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re); + const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im); + const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2); + const __m128 absEf = _mm_sqrt_ps(ef_sum2); + const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh); + __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f); + const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus); + __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv); + __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv); + ef_re_if = _mm_and_ps(bigger, ef_re_if); + ef_im_if = _mm_and_ps(bigger, ef_im_if); + ef_re = _mm_andnot_ps(bigger, ef_re); + ef_im = _mm_andnot_ps(bigger, ef_im); + ef_re = _mm_or_ps(ef_re, ef_re_if); + ef_im = _mm_or_ps(ef_im, ef_im_if); + ef_re = _mm_mul_ps(ef_re, kMu); + ef_im = _mm_mul_ps(ef_im, kMu); + + _mm_storeu_ps(&ef[0][i], ef_re); + _mm_storeu_ps(&ef[1][i], ef_im); + } + // scalar code for the remaining items. + for (; i < (PART_LEN1); i++) { + float absEf; + ef[0][i] /= (aec->xPow[i] + 1e-10f); + ef[1][i] /= (aec->xPow[i] + 1e-10f); + absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + + if (absEf > aec->errThresh) { + absEf = aec->errThresh / (absEf + 1e-10f); + ef[0][i] *= absEf; + ef[1][i] *= absEf; + } + + // Stepsize factor + ef[0][i] *= aec->mu; + ef[1][i] *= aec->mu; + } +} + +static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { + int i, j; + for (i = 0; i < NR_PART; i++) { + int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); + int pos = i * PART_LEN1; + // Check for wrap + if (i + aec->xfBufBlockPos >= NR_PART) { + xPos -= NR_PART * PART_LEN1; + } + +#ifdef UNCONSTR + for (j = 0; j < PART_LEN1; j++) { + aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0], + -aec->xfBuf[xPos + j][1], + ef[j][0], ef[j][1]); + aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0], + -aec->xfBuf[xPos + j][1], + ef[j][0], ef[j][1]); + } +#else + // Process the whole array... + for (j = 0; j < PART_LEN; j+= 4) { + // Load xfBuf and ef. + const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); + const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); + const __m128 ef_re = _mm_loadu_ps(&ef[0][j]); + const __m128 ef_im = _mm_loadu_ps(&ef[1][j]); + // Calculate the product of conjugate(xfBuf) by ef. + // re(conjugate(a) * b) = aRe * bRe + aIm * bIm + // im(conjugate(a) * b)= aRe * bIm - aIm * bRe + const __m128 a = _mm_mul_ps(xfBuf_re, ef_re); + const __m128 b = _mm_mul_ps(xfBuf_im, ef_im); + const __m128 c = _mm_mul_ps(xfBuf_re, ef_im); + const __m128 d = _mm_mul_ps(xfBuf_im, ef_re); + const __m128 e = _mm_add_ps(a, b); + const __m128 f = _mm_sub_ps(c, d); + // Interleave real and imaginary parts. + const __m128 g = _mm_unpacklo_ps(e, f); + const __m128 h = _mm_unpackhi_ps(e, f); + // Store + _mm_storeu_ps(&fft[2*j + 0], g); + _mm_storeu_ps(&fft[2*j + 4], h); + } + // ... and fixup the first imaginary entry. + fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], + -aec->xfBuf[1][xPos + PART_LEN], + ef[0][PART_LEN], ef[1][PART_LEN]); + + aec_rdft_inverse_128(fft); + memset(fft + PART_LEN, 0, sizeof(float)*PART_LEN); + + // fft scaling + { + float scale = 2.0f / PART_LEN2; + const __m128 scale_ps = _mm_load_ps1(&scale); + for (j = 0; j < PART_LEN; j+=4) { + const __m128 fft_ps = _mm_loadu_ps(&fft[j]); + const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps); + _mm_storeu_ps(&fft[j], fft_scale); + } + } + aec_rdft_forward_128(fft); + + { + float wt1 = aec->wfBuf[1][pos]; + aec->wfBuf[0][pos + PART_LEN] += fft[1]; + for (j = 0; j < PART_LEN; j+= 4) { + __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); + __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); + const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); + const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); + const __m128 fft_re = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2 ,0)); + const __m128 fft_im = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3 ,1)); + wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); + wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); + _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re); + _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im); + } + aec->wfBuf[1][pos] = wt1; + } +#endif // UNCONSTR + } +} + +#ifdef _MSC_VER /* visual c++ */ +# define ALIGN16_BEG __declspec(align(16)) +# define ALIGN16_END +#else /* gcc or icc */ +# define ALIGN16_BEG +# define ALIGN16_END __attribute__((aligned(16))) +#endif + +static __m128 mm_pow_ps(__m128 a, __m128 b) +{ + // a^b = exp2(b * log2(a)) + // exp2(x) and log2(x) are calculated using polynomial approximations. + __m128 log2_a, b_log2_a, a_exp_b; + + // Calculate log2(x), x = a. + { + // To calculate log2(x), we decompose x like this: + // x = y * 2^n + // n is an integer + // y is in the [1.0, 2.0) range + // + // log2(x) = log2(y) + n + // n can be evaluated by playing with float representation. + // log2(y) in a small range can be approximated, this code uses an order + // five polynomial approximation. The coefficients have been + // estimated with the Remez algorithm and the resulting + // polynomial has a maximum relative error of 0.00086%. + + // Compute n. + // This is done by masking the exponent, shifting it into the top bit of + // the mantissa, putting eight into the biased exponent (to shift/ + // compensate the fact that the exponent has been shifted in the top/ + // fractional part and finally getting rid of the implicit leading one + // from the mantissa by substracting it out. + static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = + {0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}; + static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = + {0x43800000, 0x43800000, 0x43800000, 0x43800000}; + static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = + {0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000}; + static const int shift_exponent_into_top_mantissa = 8; + const __m128 two_n = _mm_and_ps(a, *((__m128 *)float_exponent_mask)); + const __m128 n_1 = (__m128)_mm_srli_epi32((__m128i)two_n, + shift_exponent_into_top_mantissa); + const __m128 n_0 = _mm_or_ps( + (__m128)n_1, *((__m128 *)eight_biased_exponent)); + const __m128 n = _mm_sub_ps(n_0, *((__m128 *)implicit_leading_one)); + + // Compute y. + static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = + {0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF}; + static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = + {0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000}; + const __m128 mantissa = _mm_and_ps(a, *((__m128 *)mantissa_mask)); + const __m128 y = _mm_or_ps( + mantissa, *((__m128 *)zero_biased_exponent_is_one)); + + // Approximate log2(y) ~= (y - 1) * pol5(y). + // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0 + static const ALIGN16_BEG float ALIGN16_END C5[4] = + {-3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f}; + static const ALIGN16_BEG float ALIGN16_END C4[4] = + {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f}; + static const ALIGN16_BEG float ALIGN16_END C3[4] = + {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f}; + static const ALIGN16_BEG float ALIGN16_END C2[4] = + {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f}; + static const ALIGN16_BEG float ALIGN16_END C1[4] = + {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f}; + static const ALIGN16_BEG float ALIGN16_END C0[4] = + {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f}; + const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128 *)C5)); + const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128 *)C4)); + const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y); + const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128 *)C3)); + const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y); + const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128 *)C2)); + const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y); + const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128 *)C1)); + const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y); + const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128 *)C0)); + const __m128 y_minus_one = _mm_sub_ps( + y, *((__m128 *)zero_biased_exponent_is_one)); + const __m128 log2_y = _mm_mul_ps(y_minus_one , pol5_y); + + // Combine parts. + log2_a = _mm_add_ps(n, log2_y); + } + + // b * log2(a) + b_log2_a = _mm_mul_ps(b, log2_a); + + // Calculate exp2(x), x = b * log2(a). + { + // To calculate 2^x, we decompose x like this: + // x = n + y + // n is an integer, the value of x - 0.5 rounded down, therefore + // y is in the [0.5, 1.5) range + // + // 2^x = 2^n * 2^y + // 2^n can be evaluated by playing with float representation. + // 2^y in a small range can be approximated, this code uses an order two + // polynomial approximation. The coefficients have been estimated + // with the Remez algorithm and the resulting polynomial has a + // maximum relative error of 0.17%. + + // To avoid over/underflow, we reduce the range of input to ]-127, 129]. + static const ALIGN16_BEG float max_input[4] ALIGN16_END = + {129.f, 129.f, 129.f, 129.f}; + static const ALIGN16_BEG float min_input[4] ALIGN16_END = + {-126.99999f, -126.99999f, -126.99999f, -126.99999f}; + const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128 *)max_input)); + const __m128 x_max = _mm_max_ps(x_min, *((__m128 *)min_input)); + // Compute n. + static const ALIGN16_BEG float half[4] ALIGN16_END = + {0.5f, 0.5f, 0.5f, 0.5f}; + const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128 *)half)); + const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half); + // Compute 2^n. + static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = + {127, 127, 127, 127}; + static const int float_exponent_shift = 23; + const __m128i two_n_exponent = _mm_add_epi32( + x_minus_half_floor, *((__m128i *)float_exponent_bias)); + const __m128 two_n = (__m128)_mm_slli_epi32( + two_n_exponent, float_exponent_shift); + // Compute y. + const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor)); + // Approximate 2^y ~= C2 * y^2 + C1 * y + C0. + static const ALIGN16_BEG float C2[4] ALIGN16_END = + {3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f}; + static const ALIGN16_BEG float C1[4] ALIGN16_END = + {6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f}; + static const ALIGN16_BEG float C0[4] ALIGN16_END = + {1.0017247f, 1.0017247f, 1.0017247f, 1.0017247f}; + const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128 *)C2)); + const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128 *)C1)); + const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y); + const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128 *)C0)); + + // Combine parts. + a_exp_b = _mm_mul_ps(exp2_y, two_n); + } + return a_exp_b; +} + +extern const float WebRtcAec_weightCurve[65]; +extern const float WebRtcAec_overDriveCurve[65]; + +static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]) { + int i; + const __m128 vec_hNlFb = _mm_set1_ps(hNlFb); + const __m128 vec_one = _mm_set1_ps(1.0f); + const __m128 vec_minus_one = _mm_set1_ps(-1.0f); + const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm); + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i+=4) { + // Weight subbands + __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); + const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]); + const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb); + const __m128 vec_weightCurve_hNlFb = _mm_mul_ps( + vec_weightCurve, vec_hNlFb); + const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve); + const __m128 vec_one_weightCurve_hNl = _mm_mul_ps( + vec_one_weightCurve, vec_hNl); + const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl); + const __m128 vec_if1 = _mm_and_ps( + bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); + vec_hNl = _mm_or_ps(vec_if0, vec_if1); + + { + const __m128 vec_overDriveCurve = _mm_loadu_ps( + &WebRtcAec_overDriveCurve[i]); + const __m128 vec_overDriveSm_overDriveCurve = _mm_mul_ps( + vec_overDriveSm, vec_overDriveCurve); + vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); + _mm_storeu_ps(&hNl[i], vec_hNl); + } + + // Suppress error signal + { + __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]); + __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]); + vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl); + vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl); + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one); + _mm_storeu_ps(&efw[0][i], vec_efw_re); + _mm_storeu_ps(&efw[1][i], vec_efw_im); + } + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + // Weight subbands + if (hNl[i] > hNlFb) { + hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + } + hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); + + // Suppress error signal + efw[0][i] *= hNl[i]; + efw[1][i] *= hNl[i]; + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + efw[1][i] *= -1; + } +} + +void WebRtcAec_InitAec_SSE2(void) { + WebRtcAec_FilterFar = FilterFarSSE2; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; + WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; + WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; +} + +#endif //__SSE2__ diff --git a/src/modules/audio_processing/aec/main/source/aec_rdft.c b/src/modules/audio_processing/aec/main/source/aec_rdft.c new file mode 100644 index 0000000000..072a1c45c1 --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/aec_rdft.c @@ -0,0 +1,522 @@ +/* + * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html + * Copyright Takuya OOURA, 1996-2001 + * + * You may use, copy, modify and distribute this code for any purpose (include + * commercial use) and without fee. Please refer to this package when you modify + * this code. + * + * Changes by the WebRTC authors: + * - Trivial type modifications. + * - Minimal code subset to do rdft of length 128. + * - Optimizations because of known length. + * + * All changes are covered by the WebRTC license and IP grant: + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> + +#include "aec_rdft.h" +#include "system_wrappers/interface/cpu_features_wrapper.h" + +float rdft_w[64]; +static int ip[16]; + +static void bitrv2_32or128(int n, int *ip, float *a) { + // n is 32 or 128 + int j, j1, k, k1, m, m2; + float xr, xi, yr, yi; + + ip[0] = 0; + { + int l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + } + m2 = 2 * m; + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 -= m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + j1 = 2 * k + m2 + ip[k]; + k1 = j1 + m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } +} + +static void makewt_32() { + const int nw = 32; + int j, nwh; + float delta, x, y; + + ip[0] = nw; + ip[1] = 1; + nwh = nw >> 1; + delta = atanf(1.0f) / nwh; + rdft_w[0] = 1; + rdft_w[1] = 0; + rdft_w[nwh] = cosf(delta * nwh); + rdft_w[nwh + 1] = rdft_w[nwh]; + for (j = 2; j < nwh; j += 2) { + x = cosf(delta * j); + y = sinf(delta * j); + rdft_w[j] = x; + rdft_w[j + 1] = y; + rdft_w[nw - j] = y; + rdft_w[nw - j + 1] = x; + } + bitrv2_32or128(nw, ip + 2, rdft_w); +} + +static void makect_32() { + float *c = rdft_w + 32; + const int nc = 32; + int j, nch; + float delta; + + ip[1] = nc; + nch = nc >> 1; + delta = atanf(1.0f) / nch; + c[0] = cosf(delta * nch); + c[nch] = 0.5f * c[0]; + for (j = 1; j < nch; j++) { + c[j] = 0.5f * cosf(delta * j); + c[nc - j] = 0.5f * sinf(delta * j); + } +} + +static void cft1st_128(float *a) { + const int n = 128; + int j, k1, k2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[2]; + x0i = a[1] + a[3]; + x1r = a[0] - a[2]; + x1i = a[1] - a[3]; + x2r = a[4] + a[6]; + x2i = a[5] + a[7]; + x3r = a[4] - a[6]; + x3i = a[5] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; + wk1r = rdft_w[2]; + x0r = a[8] + a[10]; + x0i = a[9] + a[11]; + x1r = a[8] - a[10]; + x1i = a[9] - a[11]; + x2r = a[12] + a[14]; + x2i = a[13] + a[15]; + x3r = a[12] - a[14]; + x3i = a[13] - a[15]; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[12] = x2i - x0i; + a[13] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[10] = wk1r * (x0r - x0i); + a[11] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[14] = wk1r * (x0i - x0r); + a[15] = wk1r * (x0i + x0r); + k1 = 0; + for (j = 16; j < n; j += 16) { + k1 += 2; + k2 = 2 * k1; + wk2r = rdft_w[k1]; + wk2i = rdft_w[k1 + 1]; + wk1r = rdft_w[k2]; + wk1i = rdft_w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + x0r = a[j] + a[j + 2]; + x0i = a[j + 1] + a[j + 3]; + x1r = a[j] - a[j + 2]; + x1i = a[j + 1] - a[j + 3]; + x2r = a[j + 4] + a[j + 6]; + x2i = a[j + 5] + a[j + 7]; + x3r = a[j + 4] - a[j + 6]; + x3i = a[j + 5] - a[j + 7]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 4] = wk2r * x0r - wk2i * x0i; + a[j + 5] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 2] = wk1r * x0r - wk1i * x0i; + a[j + 3] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 6] = wk3r * x0r - wk3i * x0i; + a[j + 7] = wk3r * x0i + wk3i * x0r; + wk1r = rdft_w[k2 + 2]; + wk1i = rdft_w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + x0r = a[j + 8] + a[j + 10]; + x0i = a[j + 9] + a[j + 11]; + x1r = a[j + 8] - a[j + 10]; + x1i = a[j + 9] - a[j + 11]; + x2r = a[j + 12] + a[j + 14]; + x2i = a[j + 13] + a[j + 15]; + x3r = a[j + 12] - a[j + 14]; + x3i = a[j + 13] - a[j + 15]; + a[j + 8] = x0r + x2r; + a[j + 9] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 12] = -wk2i * x0r - wk2r * x0i; + a[j + 13] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 10] = wk1r * x0r - wk1i * x0i; + a[j + 11] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 14] = wk3r * x0r - wk3i * x0i; + a[j + 15] = wk3r * x0i + wk3i * x0r; + } +} + +static void cftmdl_128(int l, float *a) { + const int n = 128; + int j, j1, j2, j3, k, k1, k2, m, m2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + m = l << 2; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + wk1r = rdft_w[2]; + for (j = m; j < l + m; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x2i - x0i; + a[j2 + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * (x0r - x0i); + a[j1 + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[j3] = wk1r * (x0i - x0r); + a[j3 + 1] = wk1r * (x0i + x0r); + } + k1 = 0; + m2 = 2 * m; + for (k = m2; k < n; k += m2) { + k1 += 2; + k2 = 2 * k1; + wk2r = rdft_w[k1]; + wk2i = rdft_w[k1 + 1]; + wk1r = rdft_w[k2]; + wk1i = rdft_w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j < l + k; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = wk2r * x0r - wk2i * x0i; + a[j2 + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + wk1r = rdft_w[k2 + 2]; + wk1i = rdft_w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + for (j = k + m; j < l + (k + m); j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = -wk2i * x0r - wk2r * x0i; + a[j2 + 1] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + } +} + +static void cftfsub_128(float *a) { + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + cft1st_128(a); + cftmdl_128(8, a); + l = 32; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } +} + +static void cftbsub_128(float *a) { + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + cft1st_128(a); + cftmdl_128(8, a); + l = 32; + + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = -a[j + 1] - a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = -a[j + 1] + a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i + x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i - x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i + x3r; + } +} + +static void rftfsub_128_C(float *a) { + const float *c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j2 + 0] -= yr; + a[j2 + 1] -= yi; + a[k2 + 0] += yr; + a[k2 + 1] -= yi; + } +} + +static void rftbsub_128_C(float *a) { + const float *c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + a[1] = -a[1]; + for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j2 + 0] = a[j2 + 0] - yr; + a[j2 + 1] = yi - a[j2 + 1]; + a[k2 + 0] = yr + a[k2 + 0]; + a[k2 + 1] = yi - a[k2 + 1]; + } + a[65] = -a[65]; +} + +void aec_rdft_forward_128(float *a) { + const int n = 128; + int nw; + float xi; + + nw = ip[0]; + bitrv2_32or128(n, ip + 2, a); + cftfsub_128(a); + rftfsub_128(a); + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; +} + +void aec_rdft_inverse_128(float *a) { + const int n = 128; + int nw; + float xi; + + nw = ip[0]; + a[1] = 0.5f * (a[0] - a[1]); + a[0] -= a[1]; + rftbsub_128(a); + bitrv2_32or128(n, ip + 2, a); + cftbsub_128(a); +} + +// code path selection +rft_sub_128_t rftfsub_128; +rft_sub_128_t rftbsub_128; + +void aec_rdft_init(void) { + rftfsub_128 = rftfsub_128_C; + rftbsub_128 = rftbsub_128_C; + if (WebRtc_GetCPUInfo(kSSE2)) { +#if defined(__SSE2__) + aec_rdft_init_sse2(); +#endif + } + // init library constants. + makewt_32(); + makect_32(); +} diff --git a/src/modules/audio_processing/aec/main/source/aec_rdft.h b/src/modules/audio_processing/aec/main/source/aec_rdft.h new file mode 100644 index 0000000000..cf908822a6 --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/aec_rdft.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// constants shared by all paths (C, SSE2). +extern float rdft_w[64]; + +// code path selection function pointers +typedef void (*rft_sub_128_t)(float *a); +extern rft_sub_128_t rftfsub_128; +extern rft_sub_128_t rftbsub_128; + +// entry points +void aec_rdft_init(void); +void aec_rdft_init_sse2(void); +void aec_rdft_forward_128(float *a); +void aec_rdft_inverse_128(float *a); diff --git a/src/modules/audio_processing/aec/main/source/aec_rdft_sse2.c b/src/modules/audio_processing/aec/main/source/aec_rdft_sse2.c new file mode 100644 index 0000000000..901a1b1462 --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/aec_rdft_sse2.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <emmintrin.h> + +#include "aec_rdft.h" + +#ifdef _MSC_VER /* visual c++ */ +# define ALIGN16_BEG __declspec(align(16)) +# define ALIGN16_END +#else /* gcc or icc */ +# define ALIGN16_BEG +# define ALIGN16_END __attribute__((aligned(16))) +#endif + +static void rftfsub_128_SSE2(float *a) { + const float *c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + static const ALIGN16_BEG float ALIGN16_END k_half[4] = + {0.5f, 0.5f, 0.5f, 0.5f}; + const __m128 mm_half = _mm_load_ps(k_half); + + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const __m128 c_j1 = _mm_loadu_ps(&c[ j1]); // 1, 2, 3, 4, + const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, + const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, + const __m128 wkr_ = + _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, + const __m128 wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, + const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, + const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, + const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, + const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4, + _MM_SHUFFLE(2, 0, 2 ,0)); // 2, 4, 6, 8, + const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4, + _MM_SHUFFLE(3, 1, 3 ,1)); // 3, 5, 7, 9, + const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0, + _MM_SHUFFLE(0, 2, 0 ,2)); // 126, 124, 122, 120, + const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0, + _MM_SHUFFLE(1, 3, 1 ,3)); // 127, 125, 123, 121, + // Calculate 'x'. + const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr - wki * xi; + // yi = wkr * xi + wki * xr; + const __m128 a_ = _mm_mul_ps(wkr_, xr_); + const __m128 b_ = _mm_mul_ps(wki_, xi_); + const __m128 c_ = _mm_mul_ps(wkr_, xi_); + const __m128 d_ = _mm_mul_ps(wki_, xr_); + const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; + // a[k2 + 1] -= yi; + const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, + const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_); // 3, 5, 7, 9, + const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120, + const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_); // 127, 125, 123, 121, + // Shuffle in right order and store. + const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); + // 2, 3, 4, 5, + const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); + // 6, 7, 8, 9, + const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); + // 122, 123, 120, 121, + const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); + // 126, 127, 124, 125, + const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt, + _MM_SHUFFLE(1, 0, 3 ,2)); // 120, 121, 122, 123, + const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt, + _MM_SHUFFLE(1, 0, 3 ,2)); // 124, 125, 126, 127, + _mm_storeu_ps(&a[0 + j2], a_j2_0n); + _mm_storeu_ps(&a[4 + j2], a_j2_4n); + _mm_storeu_ps(&a[122 - j2], a_k2_0n); + _mm_storeu_ps(&a[126 - j2], a_k2_4n); + } + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j2 + 0] -= yr; + a[j2 + 1] -= yi; + a[k2 + 0] += yr; + a[k2 + 1] -= yi; + } +} + +static void rftbsub_128_SSE2(float *a) { + const float *c = rdft_w + 32; + int j1, j2, k1, k2; + float wkr, wki, xr, xi, yr, yi; + + static const ALIGN16_BEG float ALIGN16_END k_half[4] = + {0.5f, 0.5f, 0.5f, 0.5f}; + const __m128 mm_half = _mm_load_ps(k_half); + + a[1] = -a[1]; + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const __m128 c_j1 = _mm_loadu_ps(&c[ j1]); // 1, 2, 3, 4, + const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, + const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, + const __m128 wkr_ = + _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, + const __m128 wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, + const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, + const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, + const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, + const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4, + _MM_SHUFFLE(2, 0, 2 ,0)); // 2, 4, 6, 8, + const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4, + _MM_SHUFFLE(3, 1, 3 ,1)); // 3, 5, 7, 9, + const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0, + _MM_SHUFFLE(0, 2, 0 ,2)); // 126, 124, 122, 120, + const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0, + _MM_SHUFFLE(1, 3, 1 ,3)); // 127, 125, 123, 121, + // Calculate 'x'. + const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr + wki * xi; + // yi = wkr * xi - wki * xr; + const __m128 a_ = _mm_mul_ps(wkr_, xr_); + const __m128 b_ = _mm_mul_ps(wki_, xi_); + const __m128 c_ = _mm_mul_ps(wkr_, xi_); + const __m128 d_ = _mm_mul_ps(wki_, xr_); + const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] = a[j2 + 0] - yr; + // a[j2 + 1] = yi - a[j2 + 1]; + // a[k2 + 0] = yr + a[k2 + 0]; + // a[k2 + 1] = yi - a[k2 + 1]; + const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, + const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1); // 3, 5, 7, 9, + const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120, + const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1); // 127, 125, 123, 121, + // Shuffle in right order and store. + // Shuffle in right order and store. + const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); + // 2, 3, 4, 5, + const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); + // 6, 7, 8, 9, + const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); + // 122, 123, 120, 121, + const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); + // 126, 127, 124, 125, + const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt, + _MM_SHUFFLE(1, 0, 3 ,2)); // 120, 121, 122, 123, + const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt, + _MM_SHUFFLE(1, 0, 3 ,2)); // 124, 125, 126, 127, + _mm_storeu_ps(&a[0 + j2], a_j2_0n); + _mm_storeu_ps(&a[4 + j2], a_j2_4n); + _mm_storeu_ps(&a[122 - j2], a_k2_0n); + _mm_storeu_ps(&a[126 - j2], a_k2_4n); + } + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + k2 = 128 - j2; + k1 = 32 - j1; + wkr = 0.5f - c[k1]; + wki = c[j1]; + xr = a[j2 + 0] - a[k2 + 0]; + xi = a[j2 + 1] + a[k2 + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j2 + 0] = a[j2 + 0] - yr; + a[j2 + 1] = yi - a[j2 + 1]; + a[k2 + 0] = yr + a[k2 + 0]; + a[k2 + 1] = yi - a[k2 + 1]; + } + a[65] = -a[65]; +} + +void aec_rdft_init_sse2(void) { + rftfsub_128 = rftfsub_128_SSE2; + rftbsub_128 = rftbsub_128_SSE2; +} diff --git a/src/modules/audio_processing/aec/main/source/echo_cancellation.c b/src/modules/audio_processing/aec/main/source/echo_cancellation.c new file mode 100644 index 0000000000..1313e358f9 --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/echo_cancellation.c @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Contains the API functions for the AEC. + */ +#include <stdlib.h> +#include <string.h> + +#include "echo_cancellation.h" +#include "aec_core.h" +#include "ring_buffer.h" +#include "resampler.h" +#ifdef AEC_DEBUG + #include <stdio.h> +#endif + +#define BUF_SIZE_FRAMES 50 // buffer size (frames) +// Maximum length of resampled signal. Must be an integer multiple of frames +// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN +// The factor of 2 handles wb, and the + 1 is as a safety margin +#define MAX_RESAMP_LEN (5 * FRAME_LEN) + +static const int bufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples) +static const int sampMsNb = 8; // samples per ms in nb +// Target suppression levels for nlp modes +// log{0.001, 0.00001, 0.00000001} +static const float targetSupp[3] = {-6.9f, -11.5f, -18.4f}; +static const float minOverDrive[3] = {1.0f, 2.0f, 5.0f}; +static const int initCheck = 42; + +typedef struct { + int delayCtr; + int sampFreq; + int splitSampFreq; + int scSampFreq; + float sampFactor; // scSampRate / sampFreq + short nlpMode; + short autoOnOff; + short activity; + short skewMode; + short bufSizeStart; + //short bufResetCtr; // counts number of noncausal frames + int knownDelay; + + // Stores the last frame added to the farend buffer + short farendOld[2][FRAME_LEN]; + short initFlag; // indicates if AEC has been initialized + + // Variables used for averaging far end buffer size + short counter; + short sum; + short firstVal; + short checkBufSizeCtr; + + // Variables used for delay shifts + short msInSndCardBuf; + short filtDelay; + int timeForDelayChange; + int ECstartup; + int checkBuffSize; + int delayChange; + short lastDelayDiff; + +#ifdef AEC_DEBUG + FILE *bufFile; + FILE *delayFile; + FILE *skewFile; + FILE *preCompFile; + FILE *postCompFile; +#endif // AEC_DEBUG + + // Structures + void *farendBuf; + void *resampler; + + int skewFrCtr; + int resample; // if the skew is small enough we don't resample + int highSkewCtr; + float skew; + + int lastError; + + aec_t *aec; +} aecpc_t; + +// Estimates delay to set the position of the farend buffer read pointer +// (controlled by knownDelay) +static int EstBufDelay(aecpc_t *aecInst, short msInSndCardBuf); + +// Stuffs the farend buffer if the estimated delay is too large +static int DelayComp(aecpc_t *aecInst); + +WebRtc_Word32 WebRtcAec_Create(void **aecInst) +{ + aecpc_t *aecpc; + if (aecInst == NULL) { + return -1; + } + + aecpc = malloc(sizeof(aecpc_t)); + *aecInst = aecpc; + if (aecpc == NULL) { + return -1; + } + + if (WebRtcAec_CreateAec(&aecpc->aec) == -1) { + WebRtcAec_Free(aecpc); + aecpc = NULL; + return -1; + } + + if (WebRtcApm_CreateBuffer(&aecpc->farendBuf, bufSizeSamp) == -1) { + WebRtcAec_Free(aecpc); + aecpc = NULL; + return -1; + } + + if (WebRtcAec_CreateResampler(&aecpc->resampler) == -1) { + WebRtcAec_Free(aecpc); + aecpc = NULL; + return -1; + } + + aecpc->initFlag = 0; + aecpc->lastError = 0; + +#ifdef AEC_DEBUG + aecpc->aec->farFile = fopen("aecFar.pcm","wb"); + aecpc->aec->nearFile = fopen("aecNear.pcm","wb"); + aecpc->aec->outFile = fopen("aecOut.pcm","wb"); + aecpc->aec->outLpFile = fopen("aecOutLp.pcm","wb"); + + aecpc->bufFile = fopen("aecBuf.dat", "wb"); + aecpc->skewFile = fopen("aecSkew.dat", "wb"); + aecpc->delayFile = fopen("aecDelay.dat", "wb"); + aecpc->preCompFile = fopen("preComp.pcm", "wb"); + aecpc->postCompFile = fopen("postComp.pcm", "wb"); +#endif // AEC_DEBUG + + return 0; +} + +WebRtc_Word32 WebRtcAec_Free(void *aecInst) +{ + aecpc_t *aecpc = aecInst; + + if (aecpc == NULL) { + return -1; + } + +#ifdef AEC_DEBUG + fclose(aecpc->aec->farFile); + fclose(aecpc->aec->nearFile); + fclose(aecpc->aec->outFile); + fclose(aecpc->aec->outLpFile); + + fclose(aecpc->bufFile); + fclose(aecpc->skewFile); + fclose(aecpc->delayFile); + fclose(aecpc->preCompFile); + fclose(aecpc->postCompFile); +#endif // AEC_DEBUG + + WebRtcAec_FreeAec(aecpc->aec); + WebRtcApm_FreeBuffer(aecpc->farendBuf); + WebRtcAec_FreeResampler(aecpc->resampler); + free(aecpc); + + return 0; +} + +WebRtc_Word32 WebRtcAec_Init(void *aecInst, WebRtc_Word32 sampFreq, WebRtc_Word32 scSampFreq) +{ + aecpc_t *aecpc = aecInst; + AecConfig aecConfig; + + if (aecpc == NULL) { + return -1; + } + + if (sampFreq != 8000 && sampFreq != 16000 && sampFreq != 32000) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + aecpc->sampFreq = sampFreq; + + if (scSampFreq < 1 || scSampFreq > 96000) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + aecpc->scSampFreq = scSampFreq; + + // Initialize echo canceller core + if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + + // Initialize farend buffer + if (WebRtcApm_InitBuffer(aecpc->farendBuf) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + + if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + + aecpc->initFlag = initCheck; // indicates that initilisation has been done + + if (aecpc->sampFreq == 32000) { + aecpc->splitSampFreq = 16000; + } + else { + aecpc->splitSampFreq = sampFreq; + } + + aecpc->skewFrCtr = 0; + aecpc->activity = 0; + + aecpc->delayChange = 1; + aecpc->delayCtr = 0; + + aecpc->sum = 0; + aecpc->counter = 0; + aecpc->checkBuffSize = 1; + aecpc->firstVal = 0; + + aecpc->ECstartup = 1; + aecpc->bufSizeStart = 0; + aecpc->checkBufSizeCtr = 0; + aecpc->filtDelay = 0; + aecpc->timeForDelayChange =0; + aecpc->knownDelay = 0; + aecpc->lastDelayDiff = 0; + + aecpc->skew = 0; + aecpc->resample = kAecFalse; + aecpc->highSkewCtr = 0; + aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq; + + memset(&aecpc->farendOld[0][0], 0, 160); + + // Default settings. + aecConfig.nlpMode = kAecNlpModerate; + aecConfig.skewMode = kAecFalse; + aecConfig.metricsMode = kAecFalse; + + if (WebRtcAec_set_config(aecpc, aecConfig) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + + return 0; +} + +// only buffer L band for farend +WebRtc_Word32 WebRtcAec_BufferFarend(void *aecInst, const WebRtc_Word16 *farend, + WebRtc_Word16 nrOfSamples) +{ + aecpc_t *aecpc = aecInst; + WebRtc_Word32 retVal = 0; + short newNrOfSamples; + short newFarend[MAX_RESAMP_LEN]; + float skew; + + if (aecpc == NULL) { + return -1; + } + + if (farend == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + skew = aecpc->skew; + + // TODO: Is this really a good idea? + if (!aecpc->ECstartup) { + DelayComp(aecpc); + } + + if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { + // Resample and get a new number of samples + newNrOfSamples = WebRtcAec_ResampleLinear(aecpc->resampler, + farend, + nrOfSamples, + skew, + newFarend); + WebRtcApm_WriteBuffer(aecpc->farendBuf, newFarend, newNrOfSamples); + +#ifdef AEC_DEBUG + fwrite(farend, 2, nrOfSamples, aecpc->preCompFile); + fwrite(newFarend, 2, newNrOfSamples, aecpc->postCompFile); +#endif + } + else { + WebRtcApm_WriteBuffer(aecpc->farendBuf, farend, nrOfSamples); + } + + return retVal; +} + +WebRtc_Word32 WebRtcAec_Process(void *aecInst, const WebRtc_Word16 *nearend, + const WebRtc_Word16 *nearendH, WebRtc_Word16 *out, WebRtc_Word16 *outH, + WebRtc_Word16 nrOfSamples, WebRtc_Word16 msInSndCardBuf, WebRtc_Word32 skew) +{ + aecpc_t *aecpc = aecInst; + WebRtc_Word32 retVal = 0; + short i; + short farend[FRAME_LEN]; + short nmbrOfFilledBuffers; + short nBlocks10ms; + short nFrames; +#ifdef AEC_DEBUG + short msInAECBuf; +#endif + // Limit resampling to doubling/halving of signal + const float minSkewEst = -0.5f; + const float maxSkewEst = 1.0f; + + if (aecpc == NULL) { + return -1; + } + + if (nearend == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (out == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + // Check for valid pointers based on sampling rate + if (aecpc->sampFreq == 32000 && nearendH == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (msInSndCardBuf < 0) { + msInSndCardBuf = 0; + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + retVal = -1; + } + else if (msInSndCardBuf > 500) { + msInSndCardBuf = 500; + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + retVal = -1; + } + msInSndCardBuf += 10; + aecpc->msInSndCardBuf = msInSndCardBuf; + + if (aecpc->skewMode == kAecTrue) { + if (aecpc->skewFrCtr < 25) { + aecpc->skewFrCtr++; + } + else { + retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew); + if (retVal == -1) { + aecpc->skew = 0; + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + } + + aecpc->skew /= aecpc->sampFactor*nrOfSamples; + + if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) { + aecpc->resample = kAecFalse; + } + else { + aecpc->resample = kAecTrue; + } + + if (aecpc->skew < minSkewEst) { + aecpc->skew = minSkewEst; + } + else if (aecpc->skew > maxSkewEst) { + aecpc->skew = maxSkewEst; + } + +#ifdef AEC_DEBUG + fwrite(&aecpc->skew, sizeof(aecpc->skew), 1, aecpc->skewFile); +#endif + } + } + + nFrames = nrOfSamples / FRAME_LEN; + nBlocks10ms = nFrames / aecpc->aec->mult; + + if (aecpc->ECstartup) { + memcpy(out, nearend, sizeof(short) * nrOfSamples); + nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecpc->farendBuf) / FRAME_LEN; + + // The AEC is in the start up mode + // AEC is disabled until the soundcard buffer and farend buffers are OK + + // Mechanism to ensure that the soundcard buffer is reasonably stable. + if (aecpc->checkBuffSize) { + + aecpc->checkBufSizeCtr++; + // Before we fill up the far end buffer we require the amount of data on the + // sound card to be stable (+/-8 ms) compared to the first value. This + // comparison is made during the following 4 consecutive frames. If it seems + // to be stable then we start to fill up the far end buffer. + + if (aecpc->counter == 0) { + aecpc->firstVal = aecpc->msInSndCardBuf; + aecpc->sum = 0; + } + + if (abs(aecpc->firstVal - aecpc->msInSndCardBuf) < + WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) { + aecpc->sum += aecpc->msInSndCardBuf; + aecpc->counter++; + } + else { + aecpc->counter = 0; + } + + if (aecpc->counter*nBlocks10ms >= 6) { + // The farend buffer size is determined in blocks of 80 samples + // Use 75% of the average value of the soundcard buffer + aecpc->bufSizeStart = WEBRTC_SPL_MIN((int) (0.75 * (aecpc->sum * + aecpc->aec->mult) / (aecpc->counter * 10)), BUF_SIZE_FRAMES); + // buffersize has now been determined + aecpc->checkBuffSize = 0; + } + + if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) { + // for really bad sound cards, don't disable echocanceller for more than 0.5 sec + aecpc->bufSizeStart = WEBRTC_SPL_MIN((int) (0.75 * (aecpc->msInSndCardBuf * + aecpc->aec->mult) / 10), BUF_SIZE_FRAMES); + aecpc->checkBuffSize = 0; + } + } + + // if checkBuffSize changed in the if-statement above + if (!aecpc->checkBuffSize) { + // soundcard buffer is now reasonably stable + // When the far end buffer is filled with approximately the same amount of + // data as the amount on the sound card we end the start up phase and start + // to cancel echoes. + + if (nmbrOfFilledBuffers == aecpc->bufSizeStart) { + aecpc->ECstartup = 0; // Enable the AEC + } + else if (nmbrOfFilledBuffers > aecpc->bufSizeStart) { + WebRtcApm_FlushBuffer(aecpc->farendBuf, WebRtcApm_get_buffer_size(aecpc->farendBuf) - + aecpc->bufSizeStart * FRAME_LEN); + aecpc->ECstartup = 0; + } + } + + } + else { + // AEC is enabled + + // Note only 1 block supported for nb and 2 blocks for wb + for (i = 0; i < nFrames; i++) { + nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecpc->farendBuf) / FRAME_LEN; + + // Check that there is data in the far end buffer + if (nmbrOfFilledBuffers > 0) { + // Get the next 80 samples from the farend buffer + WebRtcApm_ReadBuffer(aecpc->farendBuf, farend, FRAME_LEN); + + // Always store the last frame for use when we run out of data + memcpy(&(aecpc->farendOld[i][0]), farend, FRAME_LEN * sizeof(short)); + } + else { + // We have no data so we use the last played frame + memcpy(farend, &(aecpc->farendOld[i][0]), FRAME_LEN * sizeof(short)); + } + + // Call buffer delay estimator when all data is extracted, + // i.e. i = 0 for NB and i = 1 for WB or SWB + if ((i == 0 && aecpc->splitSampFreq == 8000) || + (i == 1 && (aecpc->splitSampFreq == 16000))) { + EstBufDelay(aecpc, aecpc->msInSndCardBuf); + } + + // Call the AEC + WebRtcAec_ProcessFrame(aecpc->aec, farend, &nearend[FRAME_LEN * i], &nearendH[FRAME_LEN * i], + &out[FRAME_LEN * i], &outH[FRAME_LEN * i], aecpc->knownDelay); + } + } + +#ifdef AEC_DEBUG + msInAECBuf = WebRtcApm_get_buffer_size(aecpc->farendBuf) / (sampMsNb*aecpc->aec->mult); + fwrite(&msInAECBuf, 2, 1, aecpc->bufFile); + fwrite(&(aecpc->knownDelay), sizeof(aecpc->knownDelay), 1, aecpc->delayFile); +#endif + + return retVal; +} + +WebRtc_Word32 WebRtcAec_set_config(void *aecInst, AecConfig config) +{ + aecpc_t *aecpc = aecInst; + + if (aecpc == NULL) { + return -1; + } + + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + aecpc->skewMode = config.skewMode; + + if (config.nlpMode != kAecNlpConservative && config.nlpMode != + kAecNlpModerate && config.nlpMode != kAecNlpAggressive) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + aecpc->nlpMode = config.nlpMode; + aecpc->aec->targetSupp = targetSupp[aecpc->nlpMode]; + aecpc->aec->minOverDrive = minOverDrive[aecpc->nlpMode]; + + if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + aecpc->aec->metricsMode = config.metricsMode; + if (aecpc->aec->metricsMode == kAecTrue) { + WebRtcAec_InitMetrics(aecpc->aec); + } + + return 0; +} + +WebRtc_Word32 WebRtcAec_get_config(void *aecInst, AecConfig *config) +{ + aecpc_t *aecpc = aecInst; + + if (aecpc == NULL) { + return -1; + } + + if (config == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + config->nlpMode = aecpc->nlpMode; + config->skewMode = aecpc->skewMode; + config->metricsMode = aecpc->aec->metricsMode; + + return 0; +} + +WebRtc_Word32 WebRtcAec_get_echo_status(void *aecInst, WebRtc_Word16 *status) +{ + aecpc_t *aecpc = aecInst; + + if (aecpc == NULL) { + return -1; + } + + if (status == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + *status = aecpc->aec->echoState; + + return 0; +} + +WebRtc_Word32 WebRtcAec_GetMetrics(void *aecInst, AecMetrics *metrics) +{ + const float upweight = 0.7f; + float dtmp; + short stmp; + aecpc_t *aecpc = aecInst; + + if (aecpc == NULL) { + return -1; + } + + if (metrics == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + // ERL + metrics->erl.instant = (short) aecpc->aec->erl.instant; + + if ((aecpc->aec->erl.himean > offsetLevel) && (aecpc->aec->erl.average > offsetLevel)) { + // Use a mix between regular average and upper part average + dtmp = upweight * aecpc->aec->erl.himean + (1 - upweight) * aecpc->aec->erl.average; + metrics->erl.average = (short) dtmp; + } + else { + metrics->erl.average = offsetLevel; + } + + metrics->erl.max = (short) aecpc->aec->erl.max; + + if (aecpc->aec->erl.min < (offsetLevel * (-1))) { + metrics->erl.min = (short) aecpc->aec->erl.min; + } + else { + metrics->erl.min = offsetLevel; + } + + // ERLE + metrics->erle.instant = (short) aecpc->aec->erle.instant; + + if ((aecpc->aec->erle.himean > offsetLevel) && (aecpc->aec->erle.average > offsetLevel)) { + // Use a mix between regular average and upper part average + dtmp = upweight * aecpc->aec->erle.himean + (1 - upweight) * aecpc->aec->erle.average; + metrics->erle.average = (short) dtmp; + } + else { + metrics->erle.average = offsetLevel; + } + + metrics->erle.max = (short) aecpc->aec->erle.max; + + if (aecpc->aec->erle.min < (offsetLevel * (-1))) { + metrics->erle.min = (short) aecpc->aec->erle.min; + } else { + metrics->erle.min = offsetLevel; + } + + // RERL + if ((metrics->erl.average > offsetLevel) && (metrics->erle.average > offsetLevel)) { + stmp = metrics->erl.average + metrics->erle.average; + } + else { + stmp = offsetLevel; + } + metrics->rerl.average = stmp; + + // No other statistics needed, but returned for completeness + metrics->rerl.instant = stmp; + metrics->rerl.max = stmp; + metrics->rerl.min = stmp; + + // A_NLP + metrics->aNlp.instant = (short) aecpc->aec->aNlp.instant; + + if ((aecpc->aec->aNlp.himean > offsetLevel) && (aecpc->aec->aNlp.average > offsetLevel)) { + // Use a mix between regular average and upper part average + dtmp = upweight * aecpc->aec->aNlp.himean + (1 - upweight) * aecpc->aec->aNlp.average; + metrics->aNlp.average = (short) dtmp; + } + else { + metrics->aNlp.average = offsetLevel; + } + + metrics->aNlp.max = (short) aecpc->aec->aNlp.max; + + if (aecpc->aec->aNlp.min < (offsetLevel * (-1))) { + metrics->aNlp.min = (short) aecpc->aec->aNlp.min; + } + else { + metrics->aNlp.min = offsetLevel; + } + + return 0; +} + +WebRtc_Word32 WebRtcAec_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len) +{ + const char version[] = "AEC 2.5.0"; + const short versionLen = (short)strlen(version) + 1; // +1 for null-termination + + if (versionStr == NULL) { + return -1; + } + + if (versionLen > len) { + return -1; + } + + strncpy(versionStr, version, versionLen); + return 0; +} + +WebRtc_Word32 WebRtcAec_get_error_code(void *aecInst) +{ + aecpc_t *aecpc = aecInst; + + if (aecpc == NULL) { + return -1; + } + + return aecpc->lastError; +} + +static int EstBufDelay(aecpc_t *aecpc, short msInSndCardBuf) +{ + short delayNew, nSampFar, nSampSndCard; + short diff; + + nSampFar = WebRtcApm_get_buffer_size(aecpc->farendBuf); + nSampSndCard = msInSndCardBuf * sampMsNb * aecpc->aec->mult; + + delayNew = nSampSndCard - nSampFar; + + // Account for resampling frame delay + if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { + delayNew -= kResamplingDelay; + } + + if (delayNew < FRAME_LEN) { + WebRtcApm_FlushBuffer(aecpc->farendBuf, FRAME_LEN); + delayNew += FRAME_LEN; + } + + aecpc->filtDelay = WEBRTC_SPL_MAX(0, (short)(0.8*aecpc->filtDelay + 0.2*delayNew)); + + diff = aecpc->filtDelay - aecpc->knownDelay; + if (diff > 224) { + if (aecpc->lastDelayDiff < 96) { + aecpc->timeForDelayChange = 0; + } + else { + aecpc->timeForDelayChange++; + } + } + else if (diff < 96 && aecpc->knownDelay > 0) { + if (aecpc->lastDelayDiff > 224) { + aecpc->timeForDelayChange = 0; + } + else { + aecpc->timeForDelayChange++; + } + } + else { + aecpc->timeForDelayChange = 0; + } + aecpc->lastDelayDiff = diff; + + if (aecpc->timeForDelayChange > 25) { + aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0); + } + return 0; +} + +static int DelayComp(aecpc_t *aecpc) +{ + int nSampFar, nSampSndCard, delayNew, nSampAdd; + const int maxStuffSamp = 10 * FRAME_LEN; + + nSampFar = WebRtcApm_get_buffer_size(aecpc->farendBuf); + nSampSndCard = aecpc->msInSndCardBuf * sampMsNb * aecpc->aec->mult; + delayNew = nSampSndCard - nSampFar; + + // Account for resampling frame delay + if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { + delayNew -= kResamplingDelay; + } + + if (delayNew > FAR_BUF_LEN - FRAME_LEN*aecpc->aec->mult) { + // The difference of the buffersizes is larger than the maximum + // allowed known delay. Compensate by stuffing the buffer. + nSampAdd = (int)(WEBRTC_SPL_MAX((int)(0.5 * nSampSndCard - nSampFar), + FRAME_LEN)); + nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp); + + WebRtcApm_StuffBuffer(aecpc->farendBuf, nSampAdd); + aecpc->delayChange = 1; // the delay needs to be updated + } + + return 0; +} diff --git a/src/modules/audio_processing/aec/main/source/resampler.c b/src/modules/audio_processing/aec/main/source/resampler.c new file mode 100644 index 0000000000..4caa6f4c87 --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/resampler.c @@ -0,0 +1,235 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for clock + * skew by resampling the farend signal. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "resampler.h" +#include "aec_core.h" + +enum { kFrameBufferSize = FRAME_LEN * 4 }; +enum { kEstimateLengthFrames = 400 }; + +typedef struct { + short buffer[kFrameBufferSize]; + float position; + + int deviceSampleRateHz; + int skewData[kEstimateLengthFrames]; + int skewDataIndex; + float skewEstimate; +} resampler_t; + +static int EstimateSkew(const int* rawSkew, + int size, + int absLimit, + float *skewEst); + +int WebRtcAec_CreateResampler(void **resampInst) +{ + resampler_t *obj = malloc(sizeof(resampler_t)); + *resampInst = obj; + if (obj == NULL) { + return -1; + } + + return 0; +} + +int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz) +{ + resampler_t *obj = (resampler_t*) resampInst; + memset(obj->buffer, 0, sizeof(obj->buffer)); + obj->position = 0.0; + + obj->deviceSampleRateHz = deviceSampleRateHz; + memset(obj->skewData, 0, sizeof(obj->skewData)); + obj->skewDataIndex = 0; + obj->skewEstimate = 0.0; + + return 0; +} + +int WebRtcAec_FreeResampler(void *resampInst) +{ + resampler_t *obj = (resampler_t*) resampInst; + free(obj); + + return 0; +} + +int WebRtcAec_ResampleLinear(void *resampInst, + const short *inspeech, + int size, + float skew, + short *outspeech) +{ + resampler_t *obj = (resampler_t*) resampInst; + + short *y; + float be, tnew, interp; + int tn, outsize, mm; + + if (size < 0 || size > 2 * FRAME_LEN) { + return -1; + } + + // Add new frame data in lookahead + memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay], + inspeech, + size * sizeof(short)); + + // Sample rate ratio + be = 1 + skew; + + // Loop over input frame + mm = 0; + y = &obj->buffer[FRAME_LEN]; // Point at current frame + + tnew = be * mm + obj->position; + tn = (int) tnew; + + while (tn < size) { + + // Interpolation + interp = y[tn] + (tnew - tn) * (y[tn+1] - y[tn]); + + if (interp > 32767) { + interp = 32767; + } + else if (interp < -32768) { + interp = -32768; + } + + outspeech[mm] = (short) interp; + mm++; + + tnew = be * mm + obj->position; + tn = (int) tnew; + } + + outsize = mm; + obj->position += outsize * be - size; + + // Shift buffer + memmove(obj->buffer, + &obj->buffer[size], + (kFrameBufferSize - size) * sizeof(short)); + + return outsize; +} + +int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst) +{ + resampler_t *obj = (resampler_t*)resampInst; + int err = 0; + + if (obj->skewDataIndex < kEstimateLengthFrames) { + obj->skewData[obj->skewDataIndex] = rawSkew; + obj->skewDataIndex++; + } + else if (obj->skewDataIndex == kEstimateLengthFrames) { + err = EstimateSkew(obj->skewData, + kEstimateLengthFrames, + obj->deviceSampleRateHz, + skewEst); + obj->skewEstimate = *skewEst; + obj->skewDataIndex++; + } + else { + *skewEst = obj->skewEstimate; + } + + return err; +} + +int EstimateSkew(const int* rawSkew, + const int size, + const int deviceSampleRateHz, + float *skewEst) +{ + const int absLimitOuter = (int)(0.04f * deviceSampleRateHz); + const int absLimitInner = (int)(0.0025f * deviceSampleRateHz); + int i = 0; + int n = 0; + float rawAvg = 0; + float err = 0; + float rawAbsDev = 0; + int upperLimit = 0; + int lowerLimit = 0; + float cumSum = 0; + float x = 0; + float x2 = 0; + float y = 0; + float xy = 0; + float xAvg = 0; + float yAvg = 0; + float denom = 0; + float skew = 0; + + *skewEst = 0; // Set in case of error below. + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { + n++; + rawAvg += rawSkew[i]; + } + } + + if (n == 0) { + return -1; + } + assert(n > 0); + rawAvg /= n; + + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { + err = rawSkew[i] - rawAvg; + rawAbsDev += err >= 0 ? err : -err; + } + } + assert(n > 0); + rawAbsDev /= n; + upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling. + lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor. + + n = 0; + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) || + (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) { + n++; + cumSum += rawSkew[i]; + x += n; + x2 += n*n; + y += cumSum; + xy += n * cumSum; + } + } + + if (n == 0) { + return -1; + } + assert(n > 0); + xAvg = x / n; + yAvg = y / n; + denom = x2 - xAvg*x; + + if (denom != 0) { + skew = (xy - xAvg*y) / denom; + } + + *skewEst = skew; + return 0; +} diff --git a/src/modules/audio_processing/aec/main/source/resampler.h b/src/modules/audio_processing/aec/main/source/resampler.h new file mode 100644 index 0000000000..9cb2837293 --- /dev/null +++ b/src/modules/audio_processing/aec/main/source/resampler.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_ + +enum { kResamplingDelay = 1 }; + +// Unless otherwise specified, functions return 0 on success and -1 on error +int WebRtcAec_CreateResampler(void **resampInst); +int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz); +int WebRtcAec_FreeResampler(void *resampInst); + +// Estimates skew from raw measurement. +int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst); + +// Resamples input using linear interpolation. +// Returns size of resampled array. +int WebRtcAec_ResampleLinear(void *resampInst, + const short *inspeech, + int size, + float skew, + short *outspeech); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_ diff --git a/src/modules/audio_processing/aecm/main/interface/echo_control_mobile.h b/src/modules/audio_processing/aecm/main/interface/echo_control_mobile.h new file mode 100644 index 0000000000..26b1172726 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/interface/echo_control_mobile.h @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_ + +#include "typedefs.h" + +enum { + AecmFalse = 0, + AecmTrue +}; + +// Errors +#define AECM_UNSPECIFIED_ERROR 12000 +#define AECM_UNSUPPORTED_FUNCTION_ERROR 12001 +#define AECM_UNINITIALIZED_ERROR 12002 +#define AECM_NULL_POINTER_ERROR 12003 +#define AECM_BAD_PARAMETER_ERROR 12004 + +// Warnings +#define AECM_BAD_PARAMETER_WARNING 12100 + +typedef struct { + WebRtc_Word16 cngMode; // AECM_FALSE, AECM_TRUE (default) + WebRtc_Word16 echoMode; // 0, 1, 2, 3 (default), 4 +} AecmConfig; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allocates the memory needed by the AECM. The memory needs to be + * initialized separately using the WebRtcAecm_Init() function. + * + * Inputs Description + * ------------------------------------------------------------------- + * void **aecmInst Pointer to the AECM instance to be + * created and initialized + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAecm_Create(void **aecmInst); + +/* + * This function releases the memory allocated by WebRtcAecm_Create() + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecmInst Pointer to the AECM instance + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAecm_Free(void *aecmInst); + +/* + * Initializes an AECM instance. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecmInst Pointer to the AECM instance + * WebRtc_Word32 sampFreq Sampling frequency of data + * WebRtc_Word32 scSampFreq Soundcard sampling frequency + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAecm_Init(void* aecmInst, + WebRtc_Word32 sampFreq, + WebRtc_Word32 scSampFreq); + +/* + * Inserts an 80 or 160 sample block of data into the farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecmInst Pointer to the AECM instance + * WebRtc_Word16 *farend In buffer containing one frame of + * farend signal + * WebRtc_Word16 nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAecm_BufferFarend(void* aecmInst, + const WebRtc_Word16* farend, + WebRtc_Word16 nrOfSamples); + +/* + * Runs the AECM on an 80 or 160 sample blocks of data. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecmInst Pointer to the AECM instance + * WebRtc_Word16 *nearendNoisy In buffer containing one frame of + * reference nearend+echo signal. If + * noise reduction is active, provide + * the noisy signal here. + * WebRtc_Word16 *nearendClean In buffer containing one frame of + * nearend+echo signal. If noise + * reduction is active, provide the + * clean signal here. Otherwise pass a + * NULL pointer. + * WebRtc_Word16 nrOfSamples Number of samples in nearend buffer + * WebRtc_Word16 msInSndCardBuf Delay estimate for sound card and + * system buffers + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word16 *out Out buffer, one frame of processed nearend + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAecm_Process(void* aecmInst, + const WebRtc_Word16* nearendNoisy, + const WebRtc_Word16* nearendClean, + WebRtc_Word16* out, + WebRtc_Word16 nrOfSamples, + WebRtc_Word16 msInSndCardBuf); + +/* + * This function enables the user to set certain parameters on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecmInst Pointer to the AECM instance + * AecmConfig config Config instance that contains all + * properties to be set + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAecm_set_config(void* aecmInst, + AecmConfig config); + +/* + * This function enables the user to set certain parameters on-the-fly + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecmInst Pointer to the AECM instance + * + * Outputs Description + * ------------------------------------------------------------------- + * AecmConfig *config Pointer to the config instance that + * all properties will be written to + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst, + AecmConfig *config); + +/* + * Gets the last error code. + * + * Inputs Description + * ------------------------------------------------------------------- + * void *aecmInst Pointer to the AECM instance + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word32 return 11000-11100: error code + */ +WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst); + +/* + * Gets a version string + * + * Inputs Description + * ------------------------------------------------------------------- + * char *versionStr Pointer to a string array + * WebRtc_Word16 len The maximum length of the string + * + * Outputs Description + * ------------------------------------------------------------------- + * WebRtc_Word8 *versionStr Pointer to a string array + * WebRtc_Word32 return 0: OK + * -1: error + */ +WebRtc_Word32 WebRtcAecm_get_version(WebRtc_Word8 *versionStr, + WebRtc_Word16 len); + +#ifdef __cplusplus +} +#endif +#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_ */ diff --git a/src/modules/audio_processing/aecm/main/matlab/compsup.m b/src/modules/audio_processing/aecm/main/matlab/compsup.m new file mode 100644 index 0000000000..9575ec40fc --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/compsup.m @@ -0,0 +1,447 @@ +function [emicrophone,aaa]=compsup(microphone,TheFarEnd,avtime,samplingfreq); +% microphone = microphone signal +% aaa = nonlinearity input variable +% TheFarEnd = far end signal +% avtime = interval to compute suppression from (seconds) +% samplingfreq = sampling frequency + +%if(nargin==6) +% fprintf(1,'suppress has received a delay sequence\n'); +%end + + +Ap500=[ 1.00, -4.95, 9.801, -9.70299, 4.80298005, -0.9509900499]; +Bp500=[ 0.662743088639636, -2.5841655608125, 3.77668102146288, -2.45182477425154, 0.596566274575251, 0.0]; + + +Ap200=[ 1.00, -4.875, 9.50625, -9.26859375, 4.518439453125, -0.881095693359375]; +Bp200=[ 0.862545460994275, -3.2832804496114, 4.67892032308828, -2.95798023879133, 0.699796870041299, 0.0]; + +maxDelay=0.4; %[s] +histLen=1; %[s] + + +% CONSTANTS THAT YOU CAN EXPERIMENT WITH +A_GAIN=10.0; % for the suppress case +oversampling = 2; % must be power of 2; minimum is 2; 4 works +% fine for support=64, but for support=128, +% 8 gives better results. +support=64; %512 % fft support (frequency resolution; at low +% settings you can hear more distortion +% (e.g. pitch that is left-over from far-end)) +% 128 works well, 64 is ok) + +lowlevel = mean(abs(microphone))*0.0001; + +G_ol = 0; % Use overlapping sets of estimates + +% ECHO SUPPRESSION SPECIFIC PARAMETERS +suppress_overdrive=1.0; % overdrive factor for suppression 1.4 is good +gamma_echo=1.0; % same as suppress_overdrive but at different place +de_echo_bound=0.0; +mLim=10; % rank of matrix G +%limBW = 1; % use bandwidth-limited response for G +if mLim > (support/2+1) + error('mLim in suppress.m too large\n'); +end + + +dynrange=1.0000e-004; + +% other, constants +hsupport = support/2; +hsupport1 = hsupport+1; +factor = 2 / oversampling; +updatel = support/oversampling; +win=sqrt(designwindow(0,support)); +estLen = round(avtime * samplingfreq/updatel) + +runningfmean =0.0; + +mLim = floor(hsupport1/2); +V = sqrt(2/hsupport1)*cos(pi/hsupport1*(repmat((0:hsupport1-1) + 0.5, mLim, 1).* ... + repmat((0:mLim-1)' + 0.5, 1, hsupport1))); + +fprintf(1,'updatel is %5.3f s\n', updatel/samplingfreq); + + + +bandfirst=8; bandlast=25; +dosmooth=0; % to get rid of wavy bin counts (can be worse or better) + +% compute some constants +blockLen = support/oversampling; +maxDelayb = floor(samplingfreq*maxDelay/updatel); % in blocks +histLenb = floor(samplingfreq*histLen/updatel); % in blocks + +x0=TheFarEnd; +y0=microphone; + + +%input +tlength=min([length(microphone),length(TheFarEnd)]); +updateno=floor(tlength/updatel); +tlength=updatel*updateno; +updateno = updateno - oversampling + 1; + +TheFarEnd =TheFarEnd(1:tlength); +microphone =microphone(1:tlength); + +TheFarEnd =[zeros(hsupport,1);TheFarEnd(1:tlength)]; +microphone =[zeros(hsupport,1);microphone(1:tlength)]; + + +% signal length +n = min([floor(length(x0)/support)*support,floor(length(y0)/support)*support]); +nb = n/blockLen - oversampling + 1; % in blocks + +% initialize space +win = sqrt([0 ; hanning(support-1)]); +sxAll2 = zeros(hsupport1,nb); +syAll2 = zeros(hsupport1,nb); + +z500=zeros(5,maxDelayb+1); +z200=zeros(5,hsupport1); + +bxspectrum=uint32(zeros(nb,1)); +bxhist=uint32(zeros(maxDelayb+1,1)); +byspectrum=uint32(zeros(nb,1)); +bcount=zeros(1+maxDelayb,nb); +fcount=zeros(1+maxDelayb,nb); +fout=zeros(1+maxDelayb,nb); +delay=zeros(nb,1); +tdelay=zeros(nb,1); +nlgains=zeros(nb,1); + +% create space (mainly for debugging) +emicrophone=zeros(tlength,1); +femicrophone=complex(zeros(hsupport1,updateno)); +thefilter=zeros(hsupport1,updateno); +thelimiter=ones(hsupport1,updateno); +fTheFarEnd=complex(zeros(hsupport1,updateno)); +afTheFarEnd=zeros(hsupport1,updateno); +fmicrophone=complex(zeros(hsupport1,updateno)); +afmicrophone=zeros(hsupport1,updateno); + +G = zeros(hsupport1, hsupport1); +zerovec = zeros(hsupport1,1); +zeromat = zeros(hsupport1); + +% Reset sums +mmxs_a = zerovec; +mmys_a = zerovec; +s2xs_a = zerovec; +s2ys_a = zerovec; +Rxxs_a = zeromat; +Ryxs_a = zeromat; +count_a = 1; + +mmxs_b = zerovec; +mmys_b = zerovec; +s2xs_b = zerovec; +s2ys_b = zerovec; +Rxxs_b = zeromat; +Ryxs_b = zeromat; +count_b = 1; + +nog=0; + +aaa=zeros(size(TheFarEnd)); + +% loop over signal blocks +fprintf(1,'.. Suppression; averaging G over %5.1f seconds; file length %5.1f seconds ..\n',avtime, length(microphone)/samplingfreq); +fprintf(1,'.. SUPPRESSING ONLY AFTER %5.1f SECONDS! ..\n',avtime); +fprintf(1,'.. 20 seconds is good ..\n'); +hh = waitbar_j(0,'Please wait...'); + + +for i=1:updateno + + sb = (i-1)*updatel + 1; + se=sb+support-1; + + % analysis FFTs + temp=fft(win .* TheFarEnd(sb:se)); + fTheFarEnd(:,i)=temp(1:hsupport1); + xf=fTheFarEnd(:,i); + afTheFarEnd(:,i)= abs(fTheFarEnd(:,i)); + + temp=win .* microphone(sb:se); + + temp=fft(win .* microphone(sb:se)); + fmicrophone(:,i)=temp(1:hsupport1); + yf=fmicrophone(:,i); + afmicrophone(:,i)= abs(fmicrophone(:,i)); + + + ener_orig = afmicrophone(:,i)'*afmicrophone(:,i); + if( ener_orig == 0) + afmicrophone(:,i)=lowlevel*ones(size(afmicrophone(:,i))); + end + + + % use log domain (showed improved performance) +xxf= sqrt(real(xf.*conj(xf))+1e-20); +yyf= sqrt(real(yf.*conj(yf))+1e-20); + sxAll2(:,i) = 20*log10(xxf); + syAll2(:,i) = 20*log10(yyf); + + mD=min(i-1,maxDelayb); + xthreshold = sum(sxAll2(:,i-mD:i),2)/(maxDelayb+1); + + [yout, z200] = filter(Bp200,Ap200,syAll2(:,i),z200,2); + yout=yout/(maxDelayb+1); + ythreshold = mean(syAll2(:,i-mD:i),2); + + + bxspectrum(i)=getBspectrum(sxAll2(:,i),xthreshold,bandfirst,bandlast); + byspectrum(i)=getBspectrum(syAll2(:,i),yout,bandfirst,bandlast); + + bxhist(end-mD:end)=bxspectrum(i-mD:i); + + bcount(:,i)=hisser2( ... + byspectrum(i),flipud(bxhist),bandfirst,bandlast); + + + [fout(:,i), z500] = filter(Bp500,Ap500,bcount(:,i),z500,2); + fcount(:,i)=sum(bcount(:,max(1,i-histLenb+1):i),2); % using the history range + fout(:,i)=round(fout(:,i)); + [value,delay(i)]=min(fout(:,i),[],1); + tdelay(i)=(delay(i)-1)*support/(samplingfreq*oversampling); + + % compensate + + idel = max(i - delay(i) + 1,1); + + + % echo suppression + + noisyspec = afmicrophone(:,i); + + % Estimate G using covariance matrices + + % Cumulative estimates + xx = afTheFarEnd(:,idel); + yy = afmicrophone(:,i); + + % Means + mmxs_a = mmxs_a + xx; + mmys_a = mmys_a + yy; + if (G_ol) + mmxs_b = mmxs_b + xx; + mmys_b = mmys_b + yy; + mmy = mean([mmys_a/count_a mmys_b/count_b],2); + mmx = mean([mmxs_a/count_a mmxs_b/count_b],2); + else + mmx = mmxs_a/count_a; + mmy = mmys_a/count_a; + end + count_a = count_a + 1; + count_b = count_b + 1; + + % Mean removal + xxm = xx - mmx; + yym = yy - mmy; + + % Variances + s2xs_a = s2xs_a + xxm .* xxm; + s2ys_a = s2ys_a + yym .* yym; + s2xs_b = s2xs_b + xxm .* xxm; + s2ys_b = s2ys_b + yym .* yym; + + % Correlation matrices + Rxxs_a = Rxxs_a + xxm * xxm'; + Ryxs_a = Ryxs_a + yym * xxm'; + Rxxs_b = Rxxs_b + xxm * xxm'; + Ryxs_b = Ryxs_b + yym * xxm'; + + + % Gain matrix A + + if mod(i, estLen) == 0 + + + % Cumulative based estimates + Rxxf = Rxxs_a / (estLen - 1); + Ryxf = Ryxs_a / (estLen - 1); + + % Variance normalization + s2x2 = s2xs_a / (estLen - 1); + s2x2 = sqrt(s2x2); + % Sx = diag(max(s2x2,dynrange*max(s2x2))); + Sx = diag(s2x2); + if (sum(s2x2) > 0) + iSx = inv(Sx); + else + iSx= Sx + 0.01; + end + + s2y2 = s2ys_a / (estLen - 1); + s2y2 = sqrt(s2y2); + % Sy = diag(max(s2y2,dynrange*max(s2y2))); + Sy = diag(s2y2); + iSy = inv(Sy); + rx = iSx * Rxxf * iSx; + ryx = iSy * Ryxf * iSx; + + + + dbd= 7; % Us less than the full matrix + + % k x m + % Bandlimited structure on G + LSEon = 0; % Default is using MMSE + if (LSEon) + ryx = ryx*rx; + rx = rx*rx; + end + p = dbd-1; + gaj = min(min(hsupport1,2*p+1),min([p+(1:hsupport1); hsupport1+p+1-(1:hsupport1)])); + cgaj = [0 cumsum(gaj)]; + + G3 = zeros(hsupport1); + for kk=1:hsupport1 + ki = max(0,kk-p-1); + if (sum(sum(rx(ki+1:ki+gaj(kk),ki+1:ki+gaj(kk))))>0) + G3(kk,ki+1:ki+gaj(kk)) = ryx(kk,ki+1:ki+gaj(kk))/rx(ki+1:ki+gaj(kk),ki+1:ki+gaj(kk)); + else + G3(kk,ki+1:ki+gaj(kk)) = ryx(kk,ki+1:ki+gaj(kk)); + end + end + % End Bandlimited structure + + G = G3; + G(abs(G)<0.01)=0; + G = suppress_overdrive * Sy * G * iSx; + + if 1 + figure(32); mi=2; + surf(max(min(G,mi),-mi)); view(2) + title('Unscaled Masked Limited-bandwidth G'); + end + pause(0.05); + + % Reset sums + mmxs_a = zerovec; + mmys_a = zerovec; + s2xs_a = zerovec; + s2ys_a = zerovec; + Rxxs_a = zeromat; + Ryxs_a = zeromat; + count_a = 1; + + end + + if (G_ol) + % Gain matrix B + + if ((mod((i-estLen/2), estLen) == 0) & i>estLen) + + + % Cumulative based estimates + Rxxf = Rxxs_b / (estLen - 1); + Ryxf = Ryxs_b / (estLen - 1); + + % Variance normalization + s2x2 = s2xs_b / (estLen - 1); + s2x2 = sqrt(s2x2); + Sx = diag(max(s2x2,dynrange*max(s2x2))); + iSx = inv(Sx); + s2y2 = s2ys_b / (estLen - 1); + s2y2 = sqrt(s2y2); + Sy = diag(max(s2y2,dynrange*max(s2y2))); + iSy = inv(Sy); + rx = iSx * Rxxf * iSx; + ryx = iSy * Ryxf * iSx; + + + % Bandlimited structure on G + LSEon = 0; % Default is using MMSE + if (LSEon) + ryx = ryx*rx; + rx = rx*rx; + end + p = dbd-1; + gaj = min(min(hsupport1,2*p+1),min([p+(1:hsupport1); hsupport1+p+1-(1:hsupport1)])); + cgaj = [0 cumsum(gaj)]; + + G3 = zeros(hsupport1); + for kk=1:hsupport1 + ki = max(0,kk-p-1); + G3(kk,ki+1:ki+gaj(kk)) = ryx(kk,ki+1:ki+gaj(kk))/rx(ki+1:ki+gaj(kk),ki+1:ki+gaj(kk)); + end + % End Bandlimited structure + + G = G3; + G(abs(G)<0.01)=0; + G = suppress_overdrive * Sy * G * iSx; + + if 1 + figure(32); mi=2; + surf(max(min(G,mi),-mi)); view(2) + title('Unscaled Masked Limited-bandwidth G'); + end + pause(0.05); + + + % Reset sums + mmxs_b = zerovec; + mmys_b = zerovec; + s2xs_b = zerovec; + s2ys_b = zerovec; + Rxxs_b = zeromat; + Ryxs_b = zeromat; + count_b = 1; + + end + + end + + FECestimate2 = G*afTheFarEnd(:,idel); + + % compute Wiener filter and suppressor function + thefilter(:,i) = (noisyspec - gamma_echo*FECestimate2) ./ noisyspec; + ix0 = find(thefilter(:,i)<de_echo_bound); % bounding trick 1 + thefilter(ix0,i) = de_echo_bound; % bounding trick 2 + ix0 = find(thefilter(:,i)>1); % bounding in reasonable range + thefilter(ix0,i) = 1; + + % NONLINEARITY + nl_alpha=0.8; % memory; seems not very critical + nlSeverity=0.3; % nonlinearity severity: 0 does nothing; 1 suppresses all + thefmean=mean(thefilter(8:16,i)); + if (thefmean<1) + disp(''); + end + runningfmean = nl_alpha*runningfmean + (1-nl_alpha)*thefmean; + aaa(sb+20+1:sb+20+updatel)=10000*runningfmean* ones(updatel,1); % debug + slope0=1.0/(1.0-nlSeverity); % + thegain = max(0.0,min(1.0,slope0*(runningfmean-nlSeverity))); + % END NONLINEARITY + thefilter(:,i) = thegain*thefilter(:,i); + + + % Wiener filtering + femicrophone(:,i) = fmicrophone(:,i) .* thefilter(:,i); + thelimiter(:,i) = (noisyspec - A_GAIN*FECestimate2) ./ noisyspec; + index = find(thelimiter(:,i)>1.0); + thelimiter(index,i) = 1.0; + index = find(thelimiter(:,i)<0.0); + thelimiter(index,i) = 0.0; + + if (rem(i,floor(updateno/20))==0) + fprintf(1,'.'); + end + if mod(i,50)==0 + waitbar_j(i/updateno,hh); + end + + + % reconstruction; first make spectrum odd + temp=[femicrophone(:,i);flipud(conj(femicrophone(2:hsupport,i)))]; + emicrophone(sb:se) = emicrophone(sb:se) + factor * win .* real(ifft(temp)); + +end +fprintf(1,'\n'); + +close(hh);
\ No newline at end of file diff --git a/src/modules/audio_processing/aecm/main/matlab/getBspectrum.m b/src/modules/audio_processing/aecm/main/matlab/getBspectrum.m new file mode 100644 index 0000000000..a4a533d600 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/getBspectrum.m @@ -0,0 +1,22 @@ +function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast) +% function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast) +% compute binary spectrum using threshold spectrum as pivot +% bspectrum = binary spectrum (binary) +% ps=current power spectrum (float) +% threshold=threshold spectrum (float) +% bandfirst = first band considered +% bandlast = last band considered + +% initialization stuff + if( length(ps)<bandlast | bandlast>32 | length(ps)~=length(threshold)) + error('BinDelayEst:spectrum:invalid','Dimensionality error'); +end + +% get current binary spectrum +diff = ps - threshold; +bspectrum=uint32(0); +for(i=bandfirst:bandlast) + if( diff(i)>0 ) + bspectrum = bitset(bspectrum,i); + end +end diff --git a/src/modules/audio_processing/aecm/main/matlab/hisser2.m b/src/modules/audio_processing/aecm/main/matlab/hisser2.m new file mode 100644 index 0000000000..5a414f9da8 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/hisser2.m @@ -0,0 +1,21 @@ +function bcount=hisser2(bs,bsr,bandfirst,bandlast) +% function bcount=hisser(bspectrum,bandfirst,bandlast) +% histogram for the binary spectra +% bcount= array of bit counts +% bs=binary spectrum (one int32 number each) +% bsr=reference binary spectra (one int32 number each) +% blockSize = histogram over blocksize blocks +% bandfirst = first band considered +% bandlast = last band considered + +% weight all delays equally +maxDelay = length(bsr); + +% compute counts (two methods; the first works better and is operational) +bcount=zeros(maxDelay,1); +for(i=1:maxDelay) + % the delay should have low count for low-near&high-far and high-near&low-far + bcount(i)= sum(bitget(bitxor(bs,bsr(i)),bandfirst:bandlast)); + % the delay should have low count for low-near&high-far (works less well) +% bcount(i)= sum(bitget(bitand(bsr(i),bitxor(bs,bsr(i))),bandfirst:bandlast)); +end diff --git a/src/modules/audio_processing/aecm/main/matlab/main2.m b/src/modules/audio_processing/aecm/main/matlab/main2.m new file mode 100644 index 0000000000..7e24c69ccf --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/main2.m @@ -0,0 +1,19 @@ + +fid=fopen('aecfar.pcm'); far=fread(fid,'short'); fclose(fid); +fid=fopen('aecnear.pcm'); mic=fread(fid,'short'); fclose(fid); + +%fid=fopen('QA1far.pcm'); far=fread(fid,'short'); fclose(fid); +%fid=fopen('QA1near.pcm'); mic=fread(fid,'short'); fclose(fid); + +start=0 * 8000+1; +stop= 30 * 8000; +microphone=mic(start:stop); +TheFarEnd=far(start:stop); +avtime=1; + +% 16000 to make it compatible with the C-version +[emicrophone,tdel]=compsup(microphone,TheFarEnd,avtime,16000); + +spclab(8000,TheFarEnd,microphone,emicrophone); + + diff --git a/src/modules/audio_processing/aecm/main/matlab/matlab/AECMobile.m b/src/modules/audio_processing/aecm/main/matlab/matlab/AECMobile.m new file mode 100644 index 0000000000..2d3e6867df --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/matlab/AECMobile.m @@ -0,0 +1,269 @@ +function [femicrophone, aecmStructNew, enerNear, enerFar] = AECMobile(fmicrophone, afTheFarEnd, setupStruct, aecmStruct) +global NEARENDFFT; +global F; + +aecmStructNew = aecmStruct; + +% Magnitude spectrum of near end signal +afmicrophone = abs(fmicrophone); +%afmicrophone = NEARENDFFT(setupStruct.currentBlock,:)'/2^F(setupStruct.currentBlock,end); +% Near end energy level +ener_orig = afmicrophone'*afmicrophone; +if( ener_orig == 0) + lowlevel = 0.01; + afmicrophone = lowlevel*ones(size(afmicrophone)); +end +%adiff = max(abs(afmicrophone - afTheFarEnd)); +%if (adiff > 0) +% disp([setupStruct.currentBlock adiff]) +%end + +% Store the near end energy +%aecmStructNew.enerNear(setupStruct.currentBlock) = log(afmicrophone'*afmicrophone); +aecmStructNew.enerNear(setupStruct.currentBlock) = log(sum(afmicrophone)); +% Store the far end energy +%aecmStructNew.enerFar(setupStruct.currentBlock) = log(afTheFarEnd'*afTheFarEnd); +aecmStructNew.enerFar(setupStruct.currentBlock) = log(sum(afTheFarEnd)); + +% Update subbands (We currently use all frequency bins, hence .useSubBand is turned off) +if aecmStructNew.useSubBand + internalIndex = 1; + for kk=1:setupStruct.subBandLength+1 + ySubBand(kk) = mean(afmicrophone(internalIndex:internalIndex+setupStruct.numInBand(kk)-1).^aecmStructNew.bandFactor); + xSubBand(kk) = mean(afTheFarEnd(internalIndex:internalIndex+setupStruct.numInBand(kk)-1).^aecmStructNew.bandFactor); + internalIndex = internalIndex + setupStruct.numInBand(kk); + end +else + ySubBand = afmicrophone.^aecmStructNew.bandFactor; + xSubBand = afTheFarEnd.^aecmStructNew.bandFactor; +end + +% Estimated echo energy +if (aecmStructNew.bandFactor == 1) + %aecmStructNew.enerEcho(setupStruct.currentBlock) = log((aecmStructNew.H.*xSubBand)'*(aecmStructNew.H.*xSubBand)); + %aecmStructNew.enerEchoStored(setupStruct.currentBlock) = log((aecmStructNew.HStored.*xSubBand)'*(aecmStructNew.HStored.*xSubBand)); + aecmStructNew.enerEcho(setupStruct.currentBlock) = log(sum(aecmStructNew.H.*xSubBand)); + aecmStructNew.enerEchoStored(setupStruct.currentBlock) = log(sum(aecmStructNew.HStored.*xSubBand)); +elseif (aecmStructNew.bandFactor == 2) + aecmStructNew.enerEcho(setupStruct.currentBlock) = log(aecmStructNew.H'*xSubBand); + aecmStructNew.enerEchoStored(setupStruct.currentBlock) = log(aecmStructNew.HStored'*xSubBand); +end + +% Last 100 blocks of data, used for plotting +n100 = max(1,setupStruct.currentBlock-99):setupStruct.currentBlock; +enerError = aecmStructNew.enerNear(n100)-aecmStructNew.enerEcho(n100); +enerErrorStored = aecmStructNew.enerNear(n100)-aecmStructNew.enerEchoStored(n100); + +% Store the far end sub band. This is needed if we use LSE instead of NLMS +aecmStructNew.X = [xSubBand aecmStructNew.X(:,1:end-1)]; + +% Update energy levels, which control the VAD +if ((aecmStructNew.enerFar(setupStruct.currentBlock) < aecmStructNew.energyMin) & (aecmStructNew.enerFar(setupStruct.currentBlock) >= aecmStruct.FAR_ENERGY_MIN)) + aecmStructNew.energyMin = aecmStructNew.enerFar(setupStruct.currentBlock); + %aecmStructNew.energyMin = max(aecmStructNew.energyMin,12); + aecmStructNew.energyMin = max(aecmStructNew.energyMin,aecmStruct.FAR_ENERGY_MIN); + aecmStructNew.energyLevel = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThres+aecmStructNew.energyMin; + aecmStructNew.energyLevelMSE = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThresMSE+aecmStructNew.energyMin; +end +if (aecmStructNew.enerFar(setupStruct.currentBlock) > aecmStructNew.energyMax) + aecmStructNew.energyMax = aecmStructNew.enerFar(setupStruct.currentBlock); + aecmStructNew.energyLevel = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThres+aecmStructNew.energyMin; + aecmStructNew.energyLevelMSE = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThresMSE+aecmStructNew.energyMin; +end + +% Calculate current energy error in near end (estimated echo vs. near end) +dE = aecmStructNew.enerNear(setupStruct.currentBlock)-aecmStructNew.enerEcho(setupStruct.currentBlock); + +%%%%%%%% +% Calculate step size used in LMS algorithm, based on current far end energy and near end energy error (dE) +%%%%%%%% +if setupStruct.stepSize_flag + [mu, aecmStructNew] = calcStepSize(aecmStructNew.enerFar(setupStruct.currentBlock), dE, aecmStructNew, setupStruct.currentBlock, 1); +else + mu = 0.25; +end +aecmStructNew.muLog(setupStruct.currentBlock) = mu; % Store the step size + +% Estimate Echo Spectral Shape +[U, aecmStructNew.H] = fallerEstimator(ySubBand,aecmStructNew.X,aecmStructNew.H,mu); + +%%%%% +% Determine if we should store or restore the channel +%%%%% +if ((setupStruct.currentBlock <= aecmStructNew.convLength) | (~setupStruct.channelUpdate_flag)) + aecmStructNew.HStored = aecmStructNew.H; % Store what you have after startup +elseif ((setupStruct.currentBlock > aecmStructNew.convLength) & (setupStruct.channelUpdate_flag)) + if ((aecmStructNew.enerFar(setupStruct.currentBlock) < aecmStructNew.energyLevelMSE) & (aecmStructNew.enerFar(setupStruct.currentBlock-1) >= aecmStructNew.energyLevelMSE)) + xxx = aecmStructNew.countMseH; + if (xxx > 20) + mseStored = mean(abs(aecmStructNew.enerEchoStored(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1)-aecmStructNew.enerNear(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1))); + mseLatest = mean(abs(aecmStructNew.enerEcho(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1)-aecmStructNew.enerNear(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1))); + %fprintf('Stored: %4f Latest: %4f\n', mseStored, mseLatest) % Uncomment if you want to display the MSE values + if ((mseStored < 0.8*mseLatest) & (aecmStructNew.mseHStoredOld < 0.8*aecmStructNew.mseHLatestOld)) + aecmStructNew.H = aecmStructNew.HStored; + fprintf('Restored H at block %d\n',setupStruct.currentBlock) + elseif (((0.8*mseStored > mseLatest) & (mseLatest < aecmStructNew.mseHThreshold) & (aecmStructNew.mseHLatestOld < aecmStructNew.mseHThreshold)) | (mseStored == Inf)) + aecmStructNew.HStored = aecmStructNew.H; + fprintf('Stored new H at block %d\n',setupStruct.currentBlock) + end + aecmStructNew.mseHStoredOld = mseStored; + aecmStructNew.mseHLatestOld = mseLatest; + end + elseif ((aecmStructNew.enerFar(setupStruct.currentBlock) >= aecmStructNew.energyLevelMSE) & (aecmStructNew.enerFar(setupStruct.currentBlock-1) < aecmStructNew.energyLevelMSE)) + aecmStructNew.countMseH = 1; + elseif (aecmStructNew.enerFar(setupStruct.currentBlock) >= aecmStructNew.energyLevelMSE) + aecmStructNew.countMseH = aecmStructNew.countMseH + 1; + end +end + +%%%%% +% Check delay (calculate the delay offset (if we can)) +% The algorithm is not tuned and should be used with care. It runs separately from Bastiaan's algorithm. +%%%%% +yyy = 31; % Correlation buffer length (currently unfortunately hard coded) +dxxx = 25; % Maximum offset (currently unfortunately hard coded) +if (setupStruct.currentBlock > aecmStructNew.convLength) + if (aecmStructNew.enerFar(setupStruct.currentBlock-(yyy+2*dxxx-1):setupStruct.currentBlock) > aecmStructNew.energyLevelMSE) + for xxx = -dxxx:dxxx + aecmStructNew.delayLatestS(xxx+dxxx+1) = sum(sign(aecmStructNew.enerEcho(setupStruct.currentBlock-(yyy+dxxx-xxx)+1:setupStruct.currentBlock+xxx-dxxx)-mean(aecmStructNew.enerEcho(setupStruct.currentBlock-(yyy++dxxx-xxx)+1:setupStruct.currentBlock+xxx-dxxx))).*sign(aecmStructNew.enerNear(setupStruct.currentBlock-yyy-dxxx+1:setupStruct.currentBlock-dxxx)-mean(aecmStructNew.enerNear(setupStruct.currentBlock-yyy-dxxx+1:setupStruct.currentBlock-dxxx)))); + end + aecmStructNew.newDelayCurve = 1; + end +end +if ((setupStruct.currentBlock > 2*aecmStructNew.convLength) & ~rem(setupStruct.currentBlock,yyy*2) & aecmStructNew.newDelayCurve) + [maxV,maxP] = max(aecmStructNew.delayLatestS); + if ((maxP > 2) & (maxP < 2*dxxx)) + maxVLeft = aecmStructNew.delayLatestS(max(1,maxP-4)); + maxVRight = aecmStructNew.delayLatestS(min(2*dxxx+1,maxP+4)); + %fprintf('Max %d, Left %d, Right %d\n',maxV,maxVLeft,maxVRight) % Uncomment if you want to see max value + if ((maxV > 24) & (maxVLeft < maxV - 10) & (maxVRight < maxV - 10)) + aecmStructNew.feedbackDelay = maxP-dxxx-1; + aecmStructNew.newDelayCurve = 0; + aecmStructNew.feedbackDelayUpdate = 1; + fprintf('Feedback Update at block %d\n',setupStruct.currentBlock) + end + end +end +% End of "Check delay" +%%%%%%%% + +%%%%% +% Calculate suppression gain, based on far end energy and near end energy error (dE) +if (setupStruct.supGain_flag) + [gamma_echo, aecmStructNew.cntIn, aecmStructNew.cntOut] = calcFilterGain(aecmStructNew.enerFar(setupStruct.currentBlock), dE, aecmStructNew, setupStruct.currentBlock, aecmStructNew.convLength, aecmStructNew.cntIn, aecmStructNew.cntOut); +else + gamma_echo = 1; +end +aecmStructNew.gammaLog(setupStruct.currentBlock) = gamma_echo; % Store the gain +gamma_use = gamma_echo; + +% Use the stored channel +U = aecmStructNew.HStored.*xSubBand; + +% compute Wiener filter and suppressor function +Iy = find(ySubBand); +subBandFilter = zeros(size(ySubBand)); +if (aecmStructNew.bandFactor == 2) + subBandFilter(Iy) = (1 - gamma_use*sqrt(U(Iy)./ySubBand(Iy))); % For Faller +else + subBandFilter(Iy) = (1 - gamma_use*(U(Iy)./ySubBand(Iy))); % For COV +end +ix0 = find(subBandFilter < 0); % bounding trick 1 +subBandFilter(ix0) = 0; +ix0 = find(subBandFilter > 1); % bounding trick 1 +subBandFilter(ix0) = 1; + +% Interpolate back to normal frequency bins if we use sub bands +if aecmStructNew.useSubBand + thefilter = interp1(setupStruct.centerFreq,subBandFilter,linspace(0,setupStruct.samplingfreq/2,setupStruct.hsupport1)','nearest'); + testfilter = interp1(setupStruct.centerFreq,subBandFilter,linspace(0,setupStruct.samplingfreq/2,1000),'nearest'); + thefilter(end) = subBandFilter(end); + + internalIndex = 1; + for kk=1:setupStruct.subBandLength+1 + internalIndex:internalIndex+setupStruct.numInBand(kk)-1; + thefilter(internalIndex:internalIndex+setupStruct.numInBand(kk)-1) = subBandFilter(kk); + internalIndex = internalIndex + setupStruct.numInBand(kk); + end +else + thefilter = subBandFilter; + testfilter = subBandFilter; +end + +% Bound the filter +ix0 = find(thefilter < setupStruct.de_echo_bound); % bounding trick 1 +thefilter(ix0) = setupStruct.de_echo_bound; % bounding trick 2 +ix0 = find(thefilter > 1); % bounding in reasonable range +thefilter(ix0) = 1; + +%%%% +% NLP +%%%% +thefmean = mean(thefilter(8:16)); +if (thefmean < 1) + disp(''); +end +aecmStructNew.runningfmean = setupStruct.nl_alpha*aecmStructNew.runningfmean + (1-setupStruct.nl_alpha)*thefmean; +slope0 = 1.0/(1.0 - setupStruct.nlSeverity); % +thegain = max(0.0, min(1.0, slope0*(aecmStructNew.runningfmean - setupStruct.nlSeverity))); +if ~setupStruct.nlp_flag + thegain = 1; +end +% END NONLINEARITY +thefilter = thegain*thefilter; + +%%%% +% The suppression +%%%% +femicrophone = fmicrophone .* thefilter; +% Store the output energy (used for plotting) +%aecmStructNew.enerOut(setupStruct.currentBlock) = log(abs(femicrophone)'*abs(femicrophone)); +aecmStructNew.enerOut(setupStruct.currentBlock) = log(sum(abs(femicrophone))); + +if aecmStructNew.plotIt + figure(13) + subplot(311) + %plot(n100,enerFar(n100),'b-',n100,enerNear(n100),'k--',n100,enerEcho(n100),'r-',[n100(1) n100(end)],[1 1]*vadThNew,'b:',[n100(1) n100(end)],[1 1]*((energyMax-energyMin)/4+energyMin),'r-.',[n100(1) n100(end)],[1 1]*vadNearThNew,'g:',[n100(1) n100(end)],[1 1]*energyMax,'r-.',[n100(1) n100(end)],[1 1]*energyMin,'r-.','LineWidth',2) + plot(n100,aecmStructNew.enerFar(n100),'b-',n100,aecmStructNew.enerNear(n100),'k--',n100,aecmStructNew.enerOut(n100),'r-.',n100,aecmStructNew.enerEcho(n100),'r-',n100,aecmStructNew.enerEchoStored(n100),'c-',[n100(1) n100(end)],[1 1]*((aecmStructNew.energyMax-aecmStructNew.energyMin)/4+aecmStructNew.energyMin),'g-.',[n100(1) n100(end)],[1 1]*aecmStructNew.energyMax,'g-.',[n100(1) n100(end)],[1 1]*aecmStructNew.energyMin,'g-.','LineWidth',2) + %title(['Frame ',int2str(i),' av ',int2str(setupStruct.updateno),' State = ',int2str(speechState),' \mu = ',num2str(mu)]) + title(['\gamma = ',num2str(gamma_echo),' \mu = ',num2str(mu)]) + subplot(312) + %plot(n100,enerError,'b-',[n100(1) n100(end)],[1 1]*vadNearTh,'r:',[n100(1) n100(end)],[-1.5 -1.5]*vadNearTh,'r:','LineWidth',2) + %plot(n100,enerError,'b-',[n100(1) n100(end)],[1 1],'r:',[n100(1) n100(end)],[-2 -2],'r:','LineWidth',2) + plot(n100,enerError,'b-',n100,enerErrorStored,'c-',[n100(1) n100(end)],[1 1]*aecmStructNew.varMean,'k--',[n100(1) n100(end)],[1 1],'r:',[n100(1) n100(end)],[-2 -2],'r:','LineWidth',2) + % Plot mu + %plot(n100,log2(aecmStructNew.muLog(n100)),'b-','LineWidth',2) + %plot(n100,log2(aecmStructNew.HGain(n100)),'b-',[n100(1) n100(end)],[1 1]*log2(sum(aecmStructNew.HStored)),'r:','LineWidth',2) + title(['Block ',int2str(setupStruct.currentBlock),' av ',int2str(setupStruct.updateno)]) + subplot(313) + %plot(n100,enerVar(n100),'b-',[n100(1) n100(end)],[1 1],'r:',[n100(1) n100(end)],[-2 -2],'r:','LineWidth',2) + %plot(n100,enerVar(n100),'b-','LineWidth',2) + % Plot correlation curve + + %plot(-25:25,aecmStructNew.delayStored/max(aecmStructNew.delayStored),'c-',-25:25,aecmStructNew.delayLatest/max(aecmStructNew.delayLatest),'r-',-25:25,(max(aecmStructNew.delayStoredS)-aecmStructNew.delayStoredS)/(max(aecmStructNew.delayStoredS)-min(aecmStructNew.delayStoredS)),'c:',-25:25,(max(aecmStructNew.delayLatestS)-aecmStructNew.delayLatestS)/(max(aecmStructNew.delayLatestS)-min(aecmStructNew.delayLatestS)),'r:','LineWidth',2) + %plot(-25:25,aecmStructNew.delayStored,'c-',-25:25,aecmStructNew.delayLatest,'r-',-25:25,(max(aecmStructNew.delayStoredS)-aecmStructNew.delayStoredS)/(max(aecmStructNew.delayStoredS)-min(aecmStructNew.delayStoredS)),'c:',-25:25,(max(aecmStructNew.delayLatestS)-aecmStructNew.delayLatestS)/(max(aecmStructNew.delayLatestS)-min(aecmStructNew.delayLatestS)),'r:','LineWidth',2) + %plot(-25:25,aecmStructNew.delayLatest,'r-',-25:25,(50-aecmStructNew.delayLatestS)/100,'r:','LineWidth',2) + plot(-25:25,aecmStructNew.delayLatestS,'r:','LineWidth',2) + %plot(-25:25,aecmStructNew.delayStored,'c-',-25:25,aecmStructNew.delayLatest,'r-','LineWidth',2) + plot(0:32,aecmStruct.HStored,'bo-','LineWidth',2) + %title(['\gamma | In = ',int2str(aecmStructNew.muStruct.countInInterval),' | Out High = ',int2str(aecmStructNew.muStruct.countOutHighInterval),' | Out Low = ',int2str(aecmStructNew.muStruct.countOutLowInterval)]) + pause(1) + %if ((setupStruct.currentBlock == 860) | (setupStruct.currentBlock == 420) | (setupStruct.currentBlock == 960)) + if 0%(setupStruct.currentBlock == 960) + figure(60) + plot(n100,aecmStructNew.enerNear(n100),'k--',n100,aecmStructNew.enerEcho(n100),'k:','LineWidth',2) + legend('Near End','Estimated Echo') + title('Signal Energy witH offset compensation') + figure(61) + subplot(211) + stem(sign(aecmStructNew.enerNear(n100)-mean(aecmStructNew.enerNear(n100)))) + title('Near End Energy Pattern (around mean value)') + subplot(212) + stem(sign(aecmStructNew.enerEcho(n100)-mean(aecmStructNew.enerEcho(n100)))) + title('Estimated Echo Energy Pattern (around mean value)') + pause + end + drawnow%,pause +elseif ~rem(setupStruct.currentBlock,100) + fprintf('Block %d of %d\n',setupStruct.currentBlock,setupStruct.updateno) +end diff --git a/src/modules/audio_processing/aecm/main/matlab/matlab/align.m b/src/modules/audio_processing/aecm/main/matlab/matlab/align.m new file mode 100644 index 0000000000..9b9c0baf3b --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/matlab/align.m @@ -0,0 +1,98 @@ +function [delayStructNew] = align(xf, yf, delayStruct, i, trueDelay); + +%%%%%%% +% Bastiaan's algorithm copied +%%%%%%% +Ap500 = [1.00, -4.95, 9.801, -9.70299, 4.80298005, -0.9509900499]; +Bp500 = [0.662743088639636, -2.5841655608125, 3.77668102146288, -2.45182477425154, 0.596566274575251, 0.0]; +Ap200 = [1.00, -4.875, 9.50625, -9.26859375, 4.518439453125, -0.881095693359375]; +Bp200 = [0.862545460994275, -3.2832804496114, 4.67892032308828, -2.95798023879133, 0.699796870041299, 0.0]; + +oldMethod = 1; % Turn on or off the old method. The new one is Bastiaan's August 2008 updates +THReSHoLD = 2.0; % ADJUSTABLE threshold factor; 4.0 seems good +%%%%%%%%%%%%%%%%%%% +% use log domain (showed improved performance) +xxf = sqrt(real(xf.*conj(xf))+1e-20); +yyf = sqrt(real(yf.*conj(yf))+1e-20); +delayStruct.sxAll2(:,i) = 20*log10(xxf); +delayStruct.syAll2(:,i) = 20*log10(yyf); + +mD = min(i-1,delayStruct.maxDelayb); +if oldMethod + factor = 1.0; + histLenb = 250; + xthreshold = factor*median(delayStruct.sxAll2(:,i-mD:i),2); + ythreshold = factor*median(delayStruct.syAll2(:,i-mD:i),2); +else + xthreshold = sum(delayStruct.sxAll2(:,i-mD:i),2)/(delayStruct.maxDelayb+1); + + [yout, delayStruct.z200] = filter(Bp200, Ap200, delayStruct.syAll2(:,i), delayStruct.z200, 2); + yout = yout/(delayStruct.maxDelayb+1); + ythreshold = mean(delayStruct.syAll2(:,i-mD:i),2); + ythreshold = yout; +end + +delayStruct.bxspectrum(i) = getBspectrum(delayStruct.sxAll2(:,i), xthreshold, delayStruct.bandfirst, delayStruct.bandlast); +delayStruct.byspectrum(i) = getBspectrum(delayStruct.syAll2(:,i), ythreshold, delayStruct.bandfirst, delayStruct.bandlast); + +delayStruct.bxhist(end-mD:end) = delayStruct.bxspectrum(i-mD:i); + +delayStruct.bcount(:,i) = hisser2(delayStruct.byspectrum(i), flipud(delayStruct.bxhist), delayStruct.bandfirst, delayStruct.bandlast); +[delayStruct.fout(:,i), delayStruct.z500] = filter(Bp500, Ap500, delayStruct.bcount(:,i), delayStruct.z500, 2); +if oldMethod + %delayStruct.new(:,i) = sum(delayStruct.bcount(:,max(1,i-histLenb+1):i),2); % using the history range + tmpVec = [delayStruct.fout(1,i)*ones(2,1); delayStruct.fout(:,i); delayStruct.fout(end,i)*ones(2,1)]; % using the history range + tmpVec = filter(ones(1,5), 1, tmpVec); + delayStruct.new(:,i) = tmpVec(5:end); + %delayStruct.new(:,i) = delayStruct.fout(:,i); % using the history range +else + [delayStruct.fout(:,i), delayStruct.z500] = filter(Bp500, Ap500, delayStruct.bcount(:,i), delayStruct.z500, 2); + % NEW CODE + delayStruct.new(:,i) = filter([-1,-2,1,4,1,-2,-1], 1, delayStruct.fout(:,i)); %remv smth component + delayStruct.new(1:end-3,i) = delayStruct.new(1+3:end,i); + delayStruct.new(1:6,i) = 0.0; + delayStruct.new(end-6:end,i) = 0.0; % ends are no good +end +[valuen, tempdelay] = min(delayStruct.new(:,i)); % find minimum +if oldMethod + threshold = valuen + (max(delayStruct.new(:,i)) - valuen)/4; + thIndex = find(delayStruct.new(:,i) <= threshold); + if (i > 1) + delayDiff = abs(delayStruct.delay(i-1)-tempdelay+1); + if (delayStruct.oneGoodEstimate & (max(diff(thIndex)) > 1) & (delayDiff < 10)) + % We consider this minimum to be significant, hence update the delay + delayStruct.delay(i) = tempdelay; + elseif (~delayStruct.oneGoodEstimate & (max(diff(thIndex)) > 1)) + delayStruct.delay(i) = tempdelay; + if (i > histLenb) + delayStruct.oneGoodEstimate = 1; + end + else + delayStruct.delay(i) = delayStruct.delay(i-1); + end + else + delayStruct.delay(i) = tempdelay; + end +else + threshold = THReSHoLD*std(delayStruct.new(:,i)); % set updata threshold + if ((-valuen > threshold) | (i < delayStruct.smlength)) % see if you want to update delay + delayStruct.delay(i) = tempdelay; + else + delayStruct.delay(i) = delayStruct.delay(i-1); + end + % END NEW CODE +end +delayStructNew = delayStruct; + +% administrative and plotting stuff +if( 0) + figure(10); + plot([1:length(delayStructNew.new(:,i))],delayStructNew.new(:,i),trueDelay*[1 1],[min(delayStructNew.new(:,i)),max(delayStructNew.new(:,i))],'r',[1 length(delayStructNew.new(:,i))],threshold*[1 1],'r:', 'LineWidth',2); + %plot([1:length(delayStructNew.bcount(:,i))],delayStructNew.bcount(:,i),trueDelay*[1 1],[min(delayStructNew.bcount(:,i)),max(delayStructNew.bcount(:,i))],'r','LineWidth',2); + %plot([thedelay,thedelay],[min(fcount(:,i)),max(fcount(:,i))],'r'); + %title(sprintf('bin count and known delay at time %5.1f s\n',(i-1)*(support/(fs*oversampling)))); + title(delayStructNew.oneGoodEstimate) + xlabel('delay in frames'); + %hold off; + drawnow +end diff --git a/src/modules/audio_processing/aecm/main/matlab/matlab/calcFilterGain.m b/src/modules/audio_processing/aecm/main/matlab/matlab/calcFilterGain.m new file mode 100644 index 0000000000..a09a7f2225 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/matlab/calcFilterGain.m @@ -0,0 +1,88 @@ +function [gam, cntIn2, cntOut2] = calcFilterGain(energy, dE, aecmStruct, t, T, cntIn, cntOut) + +defaultLevel = 1.2; +cntIn2 = cntIn; +cntOut2 = cntOut; +if (t < T) + gam = 1; +else + dE1 = -5; + dE2 = 1; + gamMid = 0.2; + gam = max(0,min((energy - aecmStruct.energyMin)/(aecmStruct.energyLevel - aecmStruct.energyMin), 1-(1-gamMid)*(aecmStruct.energyMax-energy)/(aecmStruct.energyMax-aecmStruct.energyLevel))); + + dEOffset = -0.5; + dEWidth = 1.5; + %gam2 = max(1,2-((dE-dEOffset)/(dE2-dEOffset)).^2); + gam2 = 1+(abs(dE-dEOffset)<(dE2-dEOffset)); + + gam = gam*gam2; + + + if (energy < aecmStruct.energyLevel) + gam = 0; + else + gam = defaultLevel; + end + dEVec = aecmStruct.enerNear(t-63:t)-aecmStruct.enerEcho(t-63:t); + %dEVec = aecmStruct.enerNear(t-20:t)-aecmStruct.enerEcho(t-20:t); + numCross = 0; + currentState = 0; + for ii=1:64 + if (currentState == 0) + currentState = (dEVec(ii) > dE2) - (dEVec(ii) < -2); + elseif ((currentState == 1) & (dEVec(ii) < -2)) + numCross = numCross + 1; + currentState = -1; + elseif ((currentState == -1) & (dEVec(ii) > dE2)) + numCross = numCross + 1; + currentState = 1; + end + end + gam = max(0, gam - numCross/25); + gam = 1; + + ener_A = 1; + ener_B = 0.8; + ener_C = aecmStruct.energyLevel + (aecmStruct.energyMax-aecmStruct.energyLevel)/5; + dE_A = 4;%2; + dE_B = 3.6;%1.8; + dE_C = 0.9*dEWidth; + dE_D = 1; + timeFactorLength = 10; + ddE = abs(dE-dEOffset); + if (energy < aecmStruct.energyLevel) + gam = 0; + else + gam = 1; + gam2 = max(0, min(ener_B*(energy-aecmStruct.energyLevel)/(ener_C-aecmStruct.energyLevel), ener_B+(ener_A-ener_B)*(energy-ener_C)/(aecmStruct.energyMax-ener_C))); + if (ddE < dEWidth) + % Update counters + cntIn2 = cntIn2 + 1; + if (cntIn2 > 2) + cntOut2 = 0; + end + gam3 = max(dE_D, min(dE_A-(dE_A-dE_B)*(ddE/dE_C), dE_D+(dE_B-dE_D)*(dEWidth-ddE)/(dEWidth-dE_C))); + gam3 = dE_A; + else + % Update counters + cntOut2 = cntOut2 + 1; + if (cntOut2 > 2) + cntIn2 = 0; + end + %gam2 = 1; + gam3 = dE_D; + end + timeFactor = min(1, cntIn2/timeFactorLength); + gam = gam*(1-timeFactor) + timeFactor*gam2*gam3; + end + %gam = gam/floor(numCross/2+1); +end +if isempty(gam) + numCross + timeFactor + cntIn2 + cntOut2 + gam2 + gam3 +end diff --git a/src/modules/audio_processing/aecm/main/matlab/matlab/calcStepSize.m b/src/modules/audio_processing/aecm/main/matlab/matlab/calcStepSize.m new file mode 100644 index 0000000000..ae1365fa48 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/matlab/calcStepSize.m @@ -0,0 +1,105 @@ +function [mu, aecmStructNew] = calcStepSize(energy, dE, aecmStruct, t, logscale) + +if (nargin < 4) + t = 1; + logscale = 1; +elseif (nargin == 4) + logscale = 1; +end +T = aecmStruct.convLength; + +if logscale + currentMuMax = aecmStruct.MU_MIN + (aecmStruct.MU_MAX-aecmStruct.MU_MIN)*min(t,T)/T; + if (aecmStruct.energyMin >= aecmStruct.energyMax) + mu = aecmStruct.MU_MIN; + else + mu = (energy - aecmStruct.energyMin)/(aecmStruct.energyMax - aecmStruct.energyMin)*(currentMuMax-aecmStruct.MU_MIN) + aecmStruct.MU_MIN; + end + mu = 2^mu; + if (energy < aecmStruct.energyLevel) + mu = 0; + end +else + muMin = 0; + muMax = 0.5; + currentMuMax = muMin + (muMax-muMin)*min(t,T)/T; + if (aecmStruct.energyMin >= aecmStruct.energyMax) + mu = muMin; + else + mu = (energy - aecmStruct.energyMin)/(aecmStruct.energyMax - aecmStruct.energyMin)*(currentMuMax-muMin) + muMin; + end +end +dE2 = 1; +dEOffset = -0.5; +offBoost = 5; +if (mu > 0) + if (abs(dE-aecmStruct.ENERGY_DEV_OFFSET) > aecmStruct.ENERGY_DEV_TOL) + aecmStruct.muStruct.countInInterval = 0; + else + aecmStruct.muStruct.countInInterval = aecmStruct.muStruct.countInInterval + 1; + end + if (dE < aecmStruct.ENERGY_DEV_OFFSET - aecmStruct.ENERGY_DEV_TOL) + aecmStruct.muStruct.countOutLowInterval = aecmStruct.muStruct.countOutLowInterval + 1; + else + aecmStruct.muStruct.countOutLowInterval = 0; + end + if (dE > aecmStruct.ENERGY_DEV_OFFSET + aecmStruct.ENERGY_DEV_TOL) + aecmStruct.muStruct.countOutHighInterval = aecmStruct.muStruct.countOutHighInterval + 1; + else + aecmStruct.muStruct.countOutHighInterval = 0; + end +end +muVar = 2^min(-3,5/50*aecmStruct.muStruct.countInInterval-3); +muOff = 2^max(offBoost,min(0,offBoost*(aecmStruct.muStruct.countOutLowInterval-aecmStruct.muStruct.minOutLowInterval)/(aecmStruct.muStruct.maxOutLowInterval-aecmStruct.muStruct.minOutLowInterval))); + +muLow = 1/64; +muVar = 1; +if (t < 2*T) + muDT = 1; + muVar = 1; + mdEVec = 0; + numCross = 0; +else + muDT = min(1,max(muLow,1-(1-muLow)*(dE-aecmStruct.ENERGY_DEV_OFFSET)/aecmStruct.ENERGY_DEV_TOL)); + dEVec = aecmStruct.enerNear(t-63:t)-aecmStruct.enerEcho(t-63:t); + %dEVec = aecmStruct.enerNear(t-20:t)-aecmStruct.enerEcho(t-20:t); + numCross = 0; + currentState = 0; + for ii=1:64 + if (currentState == 0) + currentState = (dEVec(ii) > dE2) - (dEVec(ii) < -2); + elseif ((currentState == 1) & (dEVec(ii) < -2)) + numCross = numCross + 1; + currentState = -1; + elseif ((currentState == -1) & (dEVec(ii) > dE2)) + numCross = numCross + 1; + currentState = 1; + end + end + + %logicDEVec = (dEVec > dE2) - (dEVec < -2); + %numCross = sum(abs(diff(logicDEVec))); + %mdEVec = mean(abs(dEVec-dEOffset)); + %mdEVec = mean(abs(dEVec-mean(dEVec))); + %mdEVec = max(dEVec)-min(dEVec); + %if (mdEVec > 4)%1.5) + % muVar = 0; + %end + muVar = 2^(-floor(numCross/2)); + muVar = 2^(-numCross); +end +%muVar = 1; + + +% if (eStd > (dE2-dEOffset)) +% muVar = 1/8; +% else +% muVar = 1; +% end + +%mu = mu*muDT*muVar*muOff; +mu = mu*muDT*muVar; +mu = min(mu,0.25); +aecmStructNew = aecmStruct; +%aecmStructNew.varMean = mdEVec; +aecmStructNew.varMean = numCross; diff --git a/src/modules/audio_processing/aecm/main/matlab/matlab/fallerEstimator.m b/src/modules/audio_processing/aecm/main/matlab/matlab/fallerEstimator.m new file mode 100644 index 0000000000..d038b519c0 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/matlab/fallerEstimator.m @@ -0,0 +1,42 @@ +function [U, Hnew] = fallerEstimator(Y, X, H, mu) + +% Near end signal is stacked frame by frame columnwise in matrix Y and far end in X +% +% Possible estimation procedures are +% 1) LSE +% 2) NLMS +% 3) Separated numerator and denomerator filters +regParam = 1; +[numFreqs, numFrames] = size(Y); +[numFreqs, Q] = size(X); +U = zeros(numFreqs, 1); + +if ((nargin == 3) | (nargin == 5)) + dtd = 0; +end +if (nargin == 4) + dtd = H; +end +Emax = 7; +dEH = Emax-sum(sum(H)); +nu = 2*mu; +% if (nargin < 5) +% H = zeros(numFreqs, Q); +% for kk = 1:numFreqs +% Xmatrix = hankel(X(kk,1:Q),X(kk,Q:end)); +% y = Y(kk,1:end-Q+1)'; +% H(kk,:) = (y'*Xmatrix')*inv(Xmatrix*Xmatrix'+regParam); +% U(kk,1) = H(kk,:)*Xmatrix(:,1); +% end +% else + for kk = 1:numFreqs + x = X(kk,1:Q)'; + y = Y(kk,1); + Htmp = mu*(y-H(kk,:)*x)/(x'*x+regParam)*x; + %Htmp = (mu*(y-H(kk,:)*x)/(x'*x+regParam) - nu/dEH)*x; + H(kk,:) = H(kk,:) + Htmp'; + U(kk,1) = H(kk,:)*x; + end +% end + +Hnew = H; diff --git a/src/modules/audio_processing/aecm/main/matlab/matlab/getBspectrum.m b/src/modules/audio_processing/aecm/main/matlab/matlab/getBspectrum.m new file mode 100644 index 0000000000..a4a533d600 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/matlab/getBspectrum.m @@ -0,0 +1,22 @@ +function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast) +% function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast) +% compute binary spectrum using threshold spectrum as pivot +% bspectrum = binary spectrum (binary) +% ps=current power spectrum (float) +% threshold=threshold spectrum (float) +% bandfirst = first band considered +% bandlast = last band considered + +% initialization stuff + if( length(ps)<bandlast | bandlast>32 | length(ps)~=length(threshold)) + error('BinDelayEst:spectrum:invalid','Dimensionality error'); +end + +% get current binary spectrum +diff = ps - threshold; +bspectrum=uint32(0); +for(i=bandfirst:bandlast) + if( diff(i)>0 ) + bspectrum = bitset(bspectrum,i); + end +end diff --git a/src/modules/audio_processing/aecm/main/matlab/matlab/hisser2.m b/src/modules/audio_processing/aecm/main/matlab/matlab/hisser2.m new file mode 100644 index 0000000000..5a414f9da8 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/matlab/hisser2.m @@ -0,0 +1,21 @@ +function bcount=hisser2(bs,bsr,bandfirst,bandlast) +% function bcount=hisser(bspectrum,bandfirst,bandlast) +% histogram for the binary spectra +% bcount= array of bit counts +% bs=binary spectrum (one int32 number each) +% bsr=reference binary spectra (one int32 number each) +% blockSize = histogram over blocksize blocks +% bandfirst = first band considered +% bandlast = last band considered + +% weight all delays equally +maxDelay = length(bsr); + +% compute counts (two methods; the first works better and is operational) +bcount=zeros(maxDelay,1); +for(i=1:maxDelay) + % the delay should have low count for low-near&high-far and high-near&low-far + bcount(i)= sum(bitget(bitxor(bs,bsr(i)),bandfirst:bandlast)); + % the delay should have low count for low-near&high-far (works less well) +% bcount(i)= sum(bitget(bitand(bsr(i),bitxor(bs,bsr(i))),bandfirst:bandlast)); +end diff --git a/src/modules/audio_processing/aecm/main/matlab/matlab/mainProgram.m b/src/modules/audio_processing/aecm/main/matlab/matlab/mainProgram.m new file mode 100644 index 0000000000..eeb2aaa79c --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/matlab/mainProgram.m @@ -0,0 +1,283 @@ +useHTC = 1; % Set this if you want to run a single file and set file names below. Otherwise use simEnvironment to run from several scenarios in a row +delayCompensation_flag = 0; % Set this flag to one if you want to turn on the delay compensation/enhancement +global FARENDFFT; +global NEARENDFFT; +global F; + +if useHTC +% fid=fopen('./htcTouchHd/nb/aecFar.pcm'); xFar=fread(fid,'short'); fclose(fid); +% fid=fopen('./htcTouchHd/nb/aecNear.pcm'); yNear=fread(fid,'short'); fclose(fid); +% fid=fopen('./samsungBlackjack/nb/aecFar.pcm'); xFar=fread(fid,'short'); fclose(fid); +% fid=fopen('./samsungBlackjack/nb/aecNear.pcm'); yNear=fread(fid,'short'); fclose(fid); +% fid=fopen('aecFarPoor.pcm'); xFar=fread(fid,'short'); fclose(fid); +% fid=fopen('aecNearPoor.pcm'); yNear=fread(fid,'short'); fclose(fid); +% fid=fopen('out_aes.pcm'); outAES=fread(fid,'short'); fclose(fid); + fid=fopen('aecFar4.pcm'); xFar=fread(fid,'short'); fclose(fid); + fid=fopen('aecNear4.pcm'); yNear=fread(fid,'short'); fclose(fid); + yNearSpeech = zeros(size(xFar)); + fs = 8000; + frameSize = 64; +% frameSize = 128; + fs = 16000; +% frameSize = 256; +%F = load('fftValues.txt'); +%FARENDFFT = F(:,1:33); +%NEARENDFFT = F(:,34:66); + +else + loadFileFar = [speakerType, '_s_',scenario,'_far_b.wav']; + [xFar,fs,nbits] = wavread(loadFileFar); + xFar = xFar*2^(nbits-1); + loadFileNear = [speakerType, '_s_',scenario,'_near_b.wav']; + [yNear,fs,nbits] = wavread(loadFileNear); + yNear = yNear*2^(nbits-1); + loadFileNearSpeech = [speakerType, '_s_',scenario,'_nearSpeech_b.wav']; + [yNearSpeech,fs,nbits] = wavread(loadFileNearSpeech); + yNearSpeech = yNearSpeech*2^(nbits-1); + frameSize = 256; +end + +dtRegions = []; + +% General settings for the AECM +setupStruct = struct(... + 'stepSize_flag', 1,... % This flag turns on the step size calculation. If turned off, mu = 0.25. + 'supGain_flag', 0,... % This flag turns on the suppression gain calculation. If turned off, gam = 1. + 'channelUpdate_flag', 0,... % This flag turns on the channel update. If turned off, H is updated for convLength and then kept constant. + 'nlp_flag', 0,... % Turn on/off NLP + 'withVAD_flag', 0,... % Turn on/off NLP + 'useSubBand', 0,... % Set to 1 if to use subBands + 'useDelayEstimation', 1,... % Set to 1 if to use delay estimation + 'support', frameSize,... % # of samples per frame + 'samplingfreq',fs,... % Sampling frequency + 'oversampling', 2,... % Overlap between blocks/frames + 'updatel', 0,... % # of samples between blocks + 'hsupport1', 0,... % # of bins in frequency domain + 'factor', 0,... % synthesis window amplification + 'tlength', 0,... % # of samples of entire file + 'updateno', 0,... % # of updates + 'nb', 1,... % # of blocks + 'currentBlock', 0,... % + 'win', zeros(frameSize,1),...% Window to apply for fft and synthesis + 'avtime', 1,... % Time (in sec.) to perform averaging + 'estLen', 0,... % Averaging in # of blocks + 'A_GAIN', 10.0,... % + 'suppress_overdrive', 1.0,... % overdrive factor for suppression 1.4 is good + 'gamma_echo', 1.0,... % same as suppress_overdrive but at different place + 'de_echo_bound', 0.0,... % + 'nl_alpha', 0.4,... % memory; seems not very critical + 'nlSeverity', 0.2,... % nonlinearity severity: 0 does nothing; 1 suppresses all + 'numInBand', [],... % # of frequency bins in resp. subBand + 'centerFreq', [],... % Center frequency of resp. subBand + 'dtRegions', dtRegions,... % Regions where we have DT + 'subBandLength', frameSize/2);%All bins + %'subBandLength', 11); %Something's wrong when subBandLength even + %'nl_alpha', 0.8,... % memory; seems not very critical + +delayStruct = struct(... + 'bandfirst', 8,... + 'bandlast', 25,... + 'smlength', 600,... + 'maxDelay', 0.4,... + 'oneGoodEstimate', 0,... + 'delayAdjust', 0,... + 'maxDelayb', 0); +% More parameters in delayStruct are constructed in "updateSettings" below + +% Make struct settings +[setupStruct, delayStruct] = updateSettings(yNear, xFar, setupStruct, delayStruct); +setupStruct.numInBand = ones(setupStruct.hsupport1,1); + +Q = 1; % Time diversity in channel +% General settings for the step size calculation +muStruct = struct(... + 'countInInterval', 0,... + 'countOutHighInterval', 0,... + 'countOutLowInterval', 0,... + 'minInInterval', 50,... + 'minOutHighInterval', 10,... + 'minOutLowInterval', 10,... + 'maxOutLowInterval', 50); +% General settings for the AECM +aecmStruct = struct(... + 'plotIt', 0,... % Set to 0 to turn off plotting + 'useSubBand', 0,... + 'bandFactor', 1,... + 'H', zeros(setupStruct.subBandLength+1,Q),... + 'HStored', zeros(setupStruct.subBandLength+1,Q),... + 'X', zeros(setupStruct.subBandLength+1,Q),... + 'energyThres', 0.28,... + 'energyThresMSE', 0.4,... + 'energyMin', inf,... + 'energyMax', -inf,... + 'energyLevel', 0,... + 'energyLevelMSE', 0,... + 'convLength', 100,... + 'gammaLog', ones(setupStruct.updateno,1),... + 'muLog', ones(setupStruct.updateno,1),... + 'enerFar', zeros(setupStruct.updateno,1),... + 'enerNear', zeros(setupStruct.updateno,1),... + 'enerEcho', zeros(setupStruct.updateno,1),... + 'enerEchoStored', zeros(setupStruct.updateno,1),... + 'enerOut', zeros(setupStruct.updateno,1),... + 'runningfmean', 0,... + 'muStruct', muStruct,... + 'varMean', 0,... + 'countMseH', 0,... + 'mseHThreshold', 1.1,... + 'mseHStoredOld', inf,... + 'mseHLatestOld', inf,... + 'delayLatestS', zeros(1,51),... + 'feedbackDelay', 0,... + 'feedbackDelayUpdate', 0,... + 'cntIn', 0,... + 'cntOut', 0,... + 'FAR_ENERGY_MIN', 1,... + 'ENERGY_DEV_OFFSET', 0.5,... + 'ENERGY_DEV_TOL', 1.5,... + 'MU_MIN', -16,... + 'MU_MAX', -2,... + 'newDelayCurve', 0); + +% Adjust speech signals +xFar = [zeros(setupStruct.hsupport1-1,1);xFar(1:setupStruct.tlength)]; +yNear = [zeros(setupStruct.hsupport1-1,1);yNear(1:setupStruct.tlength)]; +yNearSpeech = [zeros(setupStruct.hsupport1-1,1);yNearSpeech(1:setupStruct.tlength)]; +xFar = xFar(1:setupStruct.tlength); +yNear = yNear(1:setupStruct.tlength); + +% Set figure settings +if aecmStruct.plotIt + figure(13) + set(gcf,'doublebuffer','on') +end +%%%%%%%%%% +% Here starts the algorithm +% Dividing into frames and then estimating the near end speech +%%%%%%%%%% +fTheFarEnd = complex(zeros(setupStruct.hsupport1,1)); +afTheFarEnd = zeros(setupStruct.hsupport1,setupStruct.updateno+1); +fFar = zeros(setupStruct.hsupport1,setupStruct.updateno+1); +fmicrophone = complex(zeros(setupStruct.hsupport1,1)); +afmicrophone = zeros(setupStruct.hsupport1,setupStruct.updateno+1); +fNear = zeros(setupStruct.hsupport1,setupStruct.updateno+1); +femicrophone = complex(zeros(setupStruct.hsupport1,1)); +emicrophone = zeros(setupStruct.tlength,1); + +if (setupStruct.useDelayEstimation == 2) + delSamples = [1641 1895 2032 1895 2311 2000 2350 2222 NaN 2332 2330 2290 2401 2415 NaN 2393 2305 2381 2398]; + delBlocks = round(delSamples/setupStruct.updatel); + delStarts = floor([25138 46844 105991 169901 195739 218536 241803 333905 347703 362660 373753 745135 765887 788078 806257 823835 842443 860139 881869]/setupStruct.updatel); +else + delStarts = []; +end + +for i=1:setupStruct.updateno + setupStruct.currentBlock = i; + + sb = (i-1)*setupStruct.updatel + 1; + se = sb + setupStruct.support - 1; + + %%%%%%% + % Analysis FFTs + %%%%%%% + % Far end signal + temp = fft(setupStruct.win .* xFar(sb:se))/frameSize; + fTheFarEnd = temp(1:setupStruct.hsupport1); + afTheFarEnd(:,i) = abs(fTheFarEnd); + fFar(:,i) = fTheFarEnd; + % Near end signal + temp = fft(setupStruct.win .* yNear(sb:se))/frameSize;%,pause + fmicrophone = temp(1:setupStruct.hsupport1); + afmicrophone(:,i) = abs(fmicrophone); + fNear(:,i) = fmicrophone; + %abs(fmicrophone),pause + % The true near end speaker (if we have such info) + temp = fft(setupStruct.win .* yNearSpeech(sb:se)); + aftrueSpeech = abs(temp(1:setupStruct.hsupport1)); + + if(i == 1000) + %break; + end + + % Perform delay estimation + if (setupStruct.useDelayEstimation == 1) + % Delay Estimation + delayStruct = align(fTheFarEnd, fmicrophone, delayStruct, i); + %delayStruct.delay(i) = 39;%19; + idel = max(i - delayStruct.delay(i) + 1,1); + + if delayCompensation_flag + % If we have a new delay estimate from Bastiaan's alg. update the offset + if (delayStruct.delay(i) ~= delayStruct.delay(max(1,i-1))) + delayStruct.delayAdjust = delayStruct.delayAdjust + delayStruct.delay(i) - delayStruct.delay(i-1); + end + % Store the compensated delay + delayStruct.delayNew(i) = delayStruct.delay(i) - delayStruct.delayAdjust; + if (delayStruct.delayNew(i) < 1) + % Something's wrong + pause,break + end + % Compensate with the offset estimate + idel = idel + delayStruct.delayAdjust; + end + if 0%aecmStruct.plotIt + figure(1) + plot(1:i,delayStruct.delay(1:i),'k:',1:i,delayStruct.delayNew(1:i),'k--','LineWidth',2),drawnow + end + elseif (setupStruct.useDelayEstimation == 2) + % Use "manual delay" + delIndex = find(delStarts<i); + if isempty(delIndex) + idel = i; + else + idel = i - delBlocks(max(delIndex)); + if isnan(idel) + idel = i - delBlocks(max(delIndex)-1); + end + end + else + % No delay estimation + %idel = max(i - 18, 1); + idel = max(i - 50, 1); + end + + %%%%%%%% + % This is the AECM algorithm + % + % Output is the new frequency domain signal (hopefully) echo compensated + %%%%%%%% + [femicrophone, aecmStruct] = AECMobile(fmicrophone, afTheFarEnd(:,idel), setupStruct, aecmStruct); + %[femicrophone, aecmStruct] = AECMobile(fmicrophone, FARENDFFT(idel,:)'/2^F(idel,end-1), setupStruct, aecmStruct); + + if aecmStruct.feedbackDelayUpdate + % If the feedback tells us there is a new offset out there update the enhancement + delayStruct.delayAdjust = delayStruct.delayAdjust + aecmStruct.feedbackDelay; + aecmStruct.feedbackDelayUpdate = 0; + end + + % reconstruction; first make spectrum odd + temp = [femicrophone; flipud(conj(femicrophone(2:(setupStruct.hsupport1-1))))]; + emicrophone(sb:se) = emicrophone(sb:se) + setupStruct.factor * setupStruct.win .* real(ifft(temp))*frameSize; + if max(isnan(emicrophone(sb:se))) + % Something's wrong with the output at block i + i + break + end +end + + +if useHTC + fid=fopen('aecOutMatlabC.pcm','w');fwrite(fid,int16(emicrophone),'short');fclose(fid); + %fid=fopen('farendFFT.txt','w');fwrite(fid,int16(afTheFarEnd(:)),'short');fclose(fid); + %fid=fopen('farendFFTreal.txt','w');fwrite(fid,int16(imag(fFar(:))),'short');fclose(fid); + %fid=fopen('farendFFTimag.txt','w');fwrite(fid,int16(real(fFar(:))),'short');fclose(fid); + %fid=fopen('nearendFFT.txt','w');fwrite(fid,int16(afmicrophone(:)),'short');fclose(fid); + %fid=fopen('nearendFFTreal.txt','w');fwrite(fid,int16(real(fNear(:))),'short');fclose(fid); + %fid=fopen('nearendFFTimag.txt','w');fwrite(fid,int16(imag(fNear(:))),'short');fclose(fid); +end +if useHTC + %spclab(setupStruct.samplingfreq,xFar,yNear,emicrophone) +else + spclab(setupStruct.samplingfreq,xFar,yNear,emicrophone,yNearSpeech) +end diff --git a/src/modules/audio_processing/aecm/main/matlab/matlab/simEnvironment.m b/src/modules/audio_processing/aecm/main/matlab/matlab/simEnvironment.m new file mode 100644 index 0000000000..3ebe701dfd --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/matlab/simEnvironment.m @@ -0,0 +1,15 @@ +speakerType = 'fm'; +%for k=2:5 +%for k=[2 4 5] +for k=3 + scenario = int2str(k); + fprintf('Current scenario: %d\n',k) + mainProgram + %saveFile = [speakerType, '_s_',scenario,'_delayEst_v2_vad_man.wav']; + %wavwrite(emic,fs,nbits,saveFile); + %saveFile = ['P:\Engineering_share\BjornV\AECM\',speakerType, '_s_',scenario,'_delayEst_v2_vad_man.pcm']; + %saveFile = [speakerType, '_s_',scenario,'_adaptMu_adaptGamma_withVar_gammFilt_HSt.pcm']; + saveFile = ['scenario_',scenario,'_090417_backupH_nlp.pcm']; + fid=fopen(saveFile,'w');fwrite(fid,int16(emicrophone),'short');fclose(fid); + %pause +end diff --git a/src/modules/audio_processing/aecm/main/matlab/matlab/updateSettings.m b/src/modules/audio_processing/aecm/main/matlab/matlab/updateSettings.m new file mode 100644 index 0000000000..c805f1d09f --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/matlab/updateSettings.m @@ -0,0 +1,94 @@ +function [setupStructNew, delayStructNew] = updateSettings(microphone, TheFarEnd, setupStruct, delayStruct); + +% other, constants +setupStruct.hsupport1 = setupStruct.support/2 + 1; +setupStruct.factor = 2 / setupStruct.oversampling; +setupStruct.updatel = setupStruct.support/setupStruct.oversampling; +setupStruct.estLen = round(setupStruct.avtime * setupStruct.samplingfreq/setupStruct.updatel); + +% compute some constants +blockLen = setupStruct.support/setupStruct.oversampling; +delayStruct.maxDelayb = floor(setupStruct.samplingfreq*delayStruct.maxDelay/setupStruct.updatel); % in blocks + +%input +tlength = min([length(microphone),length(TheFarEnd)]); +updateno = floor(tlength/setupStruct.updatel); +setupStruct.tlength = setupStruct.updatel*updateno; +setupStruct.updateno = updateno - setupStruct.oversampling + 1; + +% signal length +n = floor(min([length(TheFarEnd), length(microphone)])/setupStruct.support)*setupStruct.support; +setupStruct.nb = n/blockLen - setupStruct.oversampling + 1; % in blocks + +setupStruct.win = sqrt([0 ; hanning(setupStruct.support-1)]); + +% Construct filterbank in Bark-scale + +K = setupStruct.subBandLength; %Something's wrong when K even +erbs = 21.4*log10(0.00437*setupStruct.samplingfreq/2+1); +fe = (10.^((0:K)'*erbs/K/21.4)-1)/0.00437; +setupStruct.centerFreq = fe; +H = diag(ones(1,K-1))+diag(ones(1,K-2),-1); +Hinv = inv(H); +aty = 2*Hinv(end,:)*fe(2:end-1); +boundary = aty - (setupStruct.samplingfreq/2 + fe(end-1))/2; +if rem(K,2) + x1 = min([fe(2)/2, -boundary]); +else + x1 = max([0, boundary]); +end +%x1 +g = fe(2:end-1); +g(1) = g(1) - x1/2; +x = 2*Hinv*g; +x = [x1;x]; +%figure(42), clf +xy = zeros((K+1)*4,1); +yy = zeros((K+1)*4,1); +xy(1:4) = [fe(1) fe(1) x(1) x(1)]'; +yy(1:4) = [0 1 1 0]'/x(1); +for kk=2:K + xy((kk-1)*4+(1:4)) = [x(kk-1) x(kk-1) x(kk) x(kk)]'; + yy((kk-1)*4+(1:4)) = [0 1 1 0]'/(x(kk)-x(kk-1)); +end +xy(end-3:end) = [x(K) x(K) fe(end) fe(end)]'; +yy(end-3:end) = [0 1 1 0]'/(fe(end)*2-2*x(K)); +%plot(xy,yy,'LineWidth',2) +%fill(xy,yy,'y') + +x = [0;x]; +xk = x*setupStruct.hsupport1/setupStruct.samplingfreq*2; +%setupStruct.erbBoundaries = xk; +numInBand = zeros(length(xk),1); +xh = (0:setupStruct.hsupport1-1); + +for kk=1:length(xk) + if (kk==length(xk)) + numInBand(kk) = length(find(xh>=xk(kk))); + else + numInBand(kk) = length(intersect(find(xh>=xk(kk)),find(xh<xk(kk+1)))); + end +end +setupStruct.numInBand = numInBand; + +setupStructNew = setupStruct; + +delayStructNew = struct(... + 'sxAll2',zeros(setupStructNew.hsupport1,setupStructNew.nb),... + 'syAll2',zeros(setupStructNew.hsupport1,setupStructNew.nb),... + 'z200',zeros(5,setupStructNew.hsupport1),... + 'z500',zeros(5,delayStruct.maxDelayb+1),... + 'bxspectrum',uint32(zeros(setupStructNew.nb,1)),... + 'byspectrum',uint32(zeros(setupStructNew.nb,1)),... + 'bandfirst',delayStruct.bandfirst,'bandlast',delayStruct.bandlast,... + 'bxhist',uint32(zeros(delayStruct.maxDelayb+1,1)),... + 'bcount',zeros(1+delayStruct.maxDelayb,setupStructNew.nb),... + 'fout',zeros(1+delayStruct.maxDelayb,setupStructNew.nb),... + 'new',zeros(1+delayStruct.maxDelayb,setupStructNew.nb),... + 'smlength',delayStruct.smlength,... + 'maxDelay', delayStruct.maxDelay,... + 'maxDelayb', delayStruct.maxDelayb,... + 'oneGoodEstimate', 0,... + 'delayAdjust', 0,... + 'delayNew',zeros(setupStructNew.nb,1),... + 'delay',zeros(setupStructNew.nb,1)); diff --git a/src/modules/audio_processing/aecm/main/matlab/waitbar_j.m b/src/modules/audio_processing/aecm/main/matlab/waitbar_j.m new file mode 100644 index 0000000000..50b9ccf309 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/matlab/waitbar_j.m @@ -0,0 +1,234 @@ +function fout = waitbar_j(x,whichbar, varargin) +%WAITBAR Display wait bar. +% H = WAITBAR(X,'title', property, value, property, value, ...) +% creates and displays a waitbar of fractional length X. The +% handle to the waitbar figure is returned in H. +% X should be between 0 and 1. Optional arguments property and +% value allow to set corresponding waitbar figure properties. +% Property can also be an action keyword 'CreateCancelBtn', in +% which case a cancel button will be added to the figure, and +% the passed value string will be executed upon clicking on the +% cancel button or the close figure button. +% +% WAITBAR(X) will set the length of the bar in the most recently +% created waitbar window to the fractional length X. +% +% WAITBAR(X,H) will set the length of the bar in waitbar H +% to the fractional length X. +% +% WAITBAR(X,H,'updated title') will update the title text in +% the waitbar figure, in addition to setting the fractional +% length to X. +% +% WAITBAR is typically used inside a FOR loop that performs a +% lengthy computation. A sample usage is shown below: +% +% h = waitbar(0,'Please wait...'); +% for i=1:100, +% % computation here % +% waitbar(i/100,h) +% end +% close(h) + +% Clay M. Thompson 11-9-92 +% Vlad Kolesnikov 06-7-99 +% Copyright 1984-2001 The MathWorks, Inc. +% $Revision: 1.22 $ $Date: 2001/04/15 12:03:29 $ + +if nargin>=2 + if ischar(whichbar) + type=2; %we are initializing + name=whichbar; + elseif isnumeric(whichbar) + type=1; %we are updating, given a handle + f=whichbar; + else + error(['Input arguments of type ' class(whichbar) ' not valid.']) + end +elseif nargin==1 + f = findobj(allchild(0),'flat','Tag','TMWWaitbar'); + + if isempty(f) + type=2; + name='Waitbar'; + else + type=1; + f=f(1); + end +else + error('Input arguments not valid.'); +end + +x = max(0,min(100*x,100)); + +switch type + case 1, % waitbar(x) update + p = findobj(f,'Type','patch'); + l = findobj(f,'Type','line'); + if isempty(f) | isempty(p) | isempty(l), + error('Couldn''t find waitbar handles.'); + end + xpatch = get(p,'XData'); + xpatch = [0 x x 0]; + set(p,'XData',xpatch) + xline = get(l,'XData'); + set(l,'XData',xline); + + if nargin>2, + % Update waitbar title: + hAxes = findobj(f,'type','axes'); + hTitle = get(hAxes,'title'); + set(hTitle,'string',varargin{1}); + end + + case 2, % waitbar(x,name) initialize + vertMargin = 0; + if nargin > 2, + % we have optional arguments: property-value pairs + if rem (nargin, 2 ) ~= 0 + error( 'Optional initialization arguments must be passed in pairs' ); + end + end + + oldRootUnits = get(0,'Units'); + + set(0, 'Units', 'points'); + screenSize = get(0,'ScreenSize'); + + axFontSize=get(0,'FactoryAxesFontSize'); + + pointsPerPixel = 72/get(0,'ScreenPixelsPerInch'); + + width = 360 * pointsPerPixel; + height = 75 * pointsPerPixel; + pos = [screenSize(3)/2-width/2 screenSize(4)/2-height/2 width height]; + +%pos= [501.75 589.5 393.75 52.5]; + f = figure(... + 'Units', 'points', ... + 'BusyAction', 'queue', ... + 'Position', pos, ... + 'Resize','on', ... + 'CreateFcn','', ... + 'NumberTitle','off', ... + 'IntegerHandle','off', ... + 'MenuBar', 'none', ... + 'Tag','TMWWaitbar',... + 'Interruptible', 'off', ... + 'Visible','on'); + + %%%%%%%%%%%%%%%%%%%%% + % set figure properties as passed to the fcn + % pay special attention to the 'cancel' request + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + if nargin > 2, + propList = varargin(1:2:end); + valueList = varargin(2:2:end); + cancelBtnCreated = 0; + for ii = 1:length( propList ) + try + if strcmp(lower(propList{ii}), 'createcancelbtn' ) & ~cancelBtnCreated + cancelBtnHeight = 23 * pointsPerPixel; + cancelBtnWidth = 60 * pointsPerPixel; + newPos = pos; + vertMargin = vertMargin + cancelBtnHeight; + newPos(4) = newPos(4)+vertMargin; + callbackFcn = [valueList{ii}]; + set( f, 'Position', newPos, 'CloseRequestFcn', callbackFcn ); + cancelButt = uicontrol('Parent',f, ... + 'Units','points', ... + 'Callback',callbackFcn, ... + 'ButtonDownFcn', callbackFcn, ... + 'Enable','on', ... + 'Interruptible','off', ... + 'Position', [pos(3)-cancelBtnWidth*1.4, 7, ... + cancelBtnWidth, cancelBtnHeight], ... + 'String','Cancel', ... + 'Tag','TMWWaitbarCancelButton'); + cancelBtnCreated = 1; + else + % simply set the prop/value pair of the figure + set( f, propList{ii}, valueList{ii}); + end + catch + disp ( ['Warning: could not set property ''' propList{ii} ''' with value ''' num2str(valueList{ii}) '''' ] ); + end + end + end + + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + colormap([]); + + axNorm=[.05 .3 .9 .2]; + % axNorm=[1 1 1 1]; + axPos=axNorm.*[pos(3:4),pos(3:4)] + [0 vertMargin 0 0]; + + h = axes('XLim',[0 100],... + 'YLim',[0 1],... + 'Box','on', ... + 'Units','Points',... + 'FontSize', axFontSize,... + 'Position',axPos,... + 'XTickMode','manual',... + 'YTickMode','manual',... + 'XTick',[],... + 'YTick',[],... + 'XTickLabelMode','manual',... + 'XTickLabel',[],... + 'YTickLabelMode','manual',... + 'YTickLabel',[]); + + tHandle=title(name); + tHandle=get(h,'title'); + oldTitleUnits=get(tHandle,'Units'); + set(tHandle,... + 'Units', 'points',... + 'String', name); + + tExtent=get(tHandle,'Extent'); + set(tHandle,'Units',oldTitleUnits); + + titleHeight=tExtent(4)+axPos(2)+axPos(4)+5; + if titleHeight>pos(4) + pos(4)=titleHeight; + pos(2)=screenSize(4)/2-pos(4)/2; + figPosDirty=logical(1); + else + figPosDirty=logical(0); + end + + if tExtent(3)>pos(3)*1.10; + pos(3)=min(tExtent(3)*1.10,screenSize(3)); + pos(1)=screenSize(3)/2-pos(3)/2; + + axPos([1,3])=axNorm([1,3])*pos(3); + set(h,'Position',axPos); + + figPosDirty=logical(1); + end + + if figPosDirty + set(f,'Position',pos); + end + + xpatch = [0 x x 0]; + ypatch = [0 0 1 1]; + xline = [100 0 0 100 100]; + yline = [0 0 1 1 0]; + + p = patch(xpatch,ypatch,'r','EdgeColor','r','EraseMode','none'); + l = line(xline,yline,'EraseMode','none'); + set(l,'Color',get(gca,'XColor')); + + + set(f,'HandleVisibility','callback','visible','on', 'resize','off'); + + set(0, 'Units', oldRootUnits); +end % case +drawnow; + +if nargout==1, + fout = f; +end diff --git a/src/modules/audio_processing/aecm/main/source/Android.mk b/src/modules/audio_processing/aecm/main/source/Android.mk new file mode 100644 index 0000000000..7ed9f3616a --- /dev/null +++ b/src/modules/audio_processing/aecm/main/source/Android.mk @@ -0,0 +1,55 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_ARM_MODE := arm +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +LOCAL_MODULE := libwebrtc_aecm +LOCAL_MODULE_TAGS := optional +LOCAL_GENERATED_SOURCES := +LOCAL_SRC_FILES := echo_control_mobile.c \ + aecm_core.c + +# Flags passed to both C and C++ files. +MY_CFLAGS := +MY_CFLAGS_C := +MY_DEFS := '-DNO_TCMALLOC' \ + '-DNO_HEAPCHECKER' \ + '-DWEBRTC_TARGET_PC' \ + '-DWEBRTC_LINUX' \ + '-DWEBRTC_THREAD_RR' +ifeq ($(TARGET_ARCH),arm) +MY_DEFS += \ + '-DWEBRTC_ANDROID' \ + '-DANDROID' +endif +LOCAL_CFLAGS := $(MY_CFLAGS_C) $(MY_CFLAGS) $(MY_DEFS) + +# Include paths placed before CFLAGS/CPPFLAGS +LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../../.. \ + $(LOCAL_PATH)/../interface \ + $(LOCAL_PATH)/../../../utility \ + $(LOCAL_PATH)/../../../../../common_audio/signal_processing_library/main/interface + +# Flags passed to only C++ (and not C) files. +LOCAL_CPPFLAGS := + +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := + +LOCAL_SHARED_LIBRARIES := libcutils \ + libdl \ + libstlport +LOCAL_ADDITIONAL_DEPENDENCIES := + +include external/stlport/libstlport.mk +include $(BUILD_STATIC_LIBRARY) diff --git a/src/modules/audio_processing/aecm/main/source/aecm.gyp b/src/modules/audio_processing/aecm/main/source/aecm.gyp new file mode 100644 index 0000000000..a535d2b294 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/source/aecm.gyp @@ -0,0 +1,43 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +{ + 'includes': [ + '../../../../../common_settings.gypi', + ], + 'targets': [ + { + 'target_name': 'aecm', + 'type': '<(library)', + 'dependencies': [ + '../../../../../common_audio/signal_processing_library/main/source/spl.gyp:spl', + '../../../utility/util.gyp:apm_util' + ], + 'include_dirs': [ + '../interface', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../interface', + ], + }, + 'sources': [ + '../interface/echo_control_mobile.h', + 'echo_control_mobile.c', + 'aecm_core.c', + 'aecm_core.h', + ], + }, + ], +} + +# Local Variables: +# tab-width:2 +# indent-tabs-mode:nil +# End: +# vim: set expandtab tabstop=2 shiftwidth=2: diff --git a/src/modules/audio_processing/aecm/main/source/aecm_core.c b/src/modules/audio_processing/aecm/main/source/aecm_core.c new file mode 100644 index 0000000000..f17f1bf237 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/source/aecm_core.c @@ -0,0 +1,2534 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> + +#include "aecm_core.h" +#include "ring_buffer.h" +#include "echo_control_mobile.h" +#include "typedefs.h" + +// TODO(bjornv): Will be removed in final version. +//#include <stdio.h> + +#ifdef ARM_WINM_LOG +#include <stdio.h> +#include <windows.h> +#endif + +// BANDLAST - BANDFIRST must be < 32 +#define BANDFIRST 12 // Only bit BANDFIRST through bit BANDLAST are processed +#define BANDLAST 43 + +#ifdef ARM_WINM +#define WebRtcSpl_AddSatW32(a,b) _AddSatInt(a,b) +#define WebRtcSpl_SubSatW32(a,b) _SubSatInt(a,b) +#endif +// 16 instructions on most risc machines for 32-bit bitcount ! + +#ifdef AEC_DEBUG +FILE *dfile; +FILE *testfile; +#endif + +#ifdef AECM_SHORT + +// Square root of Hanning window in Q14 +static const WebRtc_Word16 kSqrtHanning[] = +{ + 0, 804, 1606, 2404, 3196, 3981, 4756, 5520, + 6270, 7005, 7723, 8423, 9102, 9760, 10394, 11003, + 11585, 12140, 12665, 13160, 13623, 14053, 14449, 14811, + 15137, 15426, 15679, 15893, 16069, 16207, 16305, 16364, + 16384 +}; + +#else + +// Square root of Hanning window in Q14 +static const WebRtc_Word16 kSqrtHanning[] = {0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019, 8364, + 8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, 11795, 12068, 12335, + 12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189, 14384, 14571, 14749, 14918, + 15079, 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, 16111, 16179, 16237, + 16286, 16325, 16354, 16373, 16384}; + +#endif + +//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation +static const WebRtc_UWord16 kAlpha1 = 32584; +//Q15 beta = 0.12967166976970 const Factor for magnitude approximation +static const WebRtc_UWord16 kBeta1 = 4249; +//Q15 alpha = 0.94234827210087 const Factor for magnitude approximation +static const WebRtc_UWord16 kAlpha2 = 30879; +//Q15 beta = 0.33787806009150 const Factor for magnitude approximation +static const WebRtc_UWord16 kBeta2 = 11072; +//Q15 alpha = 0.82247698684306 const Factor for magnitude approximation +static const WebRtc_UWord16 kAlpha3 = 26951; +//Q15 beta = 0.57762063060713 const Factor for magnitude approximation +static const WebRtc_UWord16 kBeta3 = 18927; + +// Initialization table for echo channel in 8 kHz +static const WebRtc_Word16 kChannelStored8kHz[PART_LEN1] = { + 2040, 1815, 1590, 1498, 1405, 1395, 1385, 1418, + 1451, 1506, 1562, 1644, 1726, 1804, 1882, 1918, + 1953, 1982, 2010, 2025, 2040, 2034, 2027, 2021, + 2014, 1997, 1980, 1925, 1869, 1800, 1732, 1683, + 1635, 1604, 1572, 1545, 1517, 1481, 1444, 1405, + 1367, 1331, 1294, 1270, 1245, 1239, 1233, 1247, + 1260, 1282, 1303, 1338, 1373, 1407, 1441, 1470, + 1499, 1524, 1549, 1565, 1582, 1601, 1621, 1649, + 1676 +}; + +// Initialization table for echo channel in 16 kHz +static const WebRtc_Word16 kChannelStored16kHz[PART_LEN1] = { + 2040, 1590, 1405, 1385, 1451, 1562, 1726, 1882, + 1953, 2010, 2040, 2027, 2014, 1980, 1869, 1732, + 1635, 1572, 1517, 1444, 1367, 1294, 1245, 1233, + 1260, 1303, 1373, 1441, 1499, 1549, 1582, 1621, + 1676, 1741, 1802, 1861, 1921, 1983, 2040, 2102, + 2170, 2265, 2375, 2515, 2651, 2781, 2922, 3075, + 3253, 3471, 3738, 3976, 4151, 4258, 4308, 4288, + 4270, 4253, 4237, 4179, 4086, 3947, 3757, 3484, + 3153 +}; + +#ifdef ARM_WINM_LOG +HANDLE logFile = NULL; +#endif + +static void WebRtcAecm_ComfortNoise(AecmCore_t* const aecm, const WebRtc_UWord16 * const dfa, + WebRtc_Word16 * const outReal, + WebRtc_Word16 * const outImag, + const WebRtc_Word16 * const lambda); + +static __inline WebRtc_UWord32 WebRtcAecm_SetBit(WebRtc_UWord32 in, WebRtc_Word32 pos) +{ + WebRtc_UWord32 mask, out; + + mask = WEBRTC_SPL_SHIFT_W32(1, pos); + out = (in | mask); + + return out; +} + +// WebRtcAecm_Hisser(...) +// +// This function compares the binary vector specvec with all rows of the binary matrix specmat +// and counts per row the number of times they have the same value. +// Input: +// - specvec : binary "vector" that is stored in a long +// - specmat : binary "matrix" that is stored as a vector of long +// Output: +// - bcount : "Vector" stored as a long, containing for each row the number of times +// the matrix row and the input vector have the same value +// +// +void WebRtcAecm_Hisser(const WebRtc_UWord32 specvec, const WebRtc_UWord32 * const specmat, + WebRtc_UWord32 * const bcount) +{ + int n; + WebRtc_UWord32 a, b; + register WebRtc_UWord32 tmp; + + a = specvec; + // compare binary vector specvec with all rows of the binary matrix specmat + for (n = 0; n < MAX_DELAY; n++) + { + b = specmat[n]; + a = (specvec ^ b); + // Returns bit counts in tmp + tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111); + tmp = ((tmp + (tmp >> 3)) & 030707070707); + tmp = (tmp + (tmp >> 6)); + tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; + + bcount[n] = tmp; + } +} + +// WebRtcAecm_BSpectrum(...) +// +// Computes the binary spectrum by comparing the input spectrum with a threshold spectrum. +// +// Input: +// - spectrum : Spectrum of which the binary spectrum should be calculated. +// - thresvec : Threshold spectrum with which the input spectrum is compared. +// Return: +// - out : Binary spectrum +// +WebRtc_UWord32 WebRtcAecm_BSpectrum(const WebRtc_UWord16 * const spectrum, + const WebRtc_UWord16 * const thresvec) +{ + int k; + WebRtc_UWord32 out; + + out = 0; + for (k = BANDFIRST; k <= BANDLAST; k++) + { + if (spectrum[k] > thresvec[k]) + { + out = WebRtcAecm_SetBit(out, k - BANDFIRST); + } + } + + return out; +} + +// WebRtcAecm_MedianEstimator(...) +// +// Calculates the median recursively. +// +// Input: +// - newVal : new additional value +// - medianVec : vector with current medians +// - factor : factor for smoothing +// +// Output: +// - medianVec : vector with updated median +// +int WebRtcAecm_MedianEstimator(const WebRtc_UWord16 newVal, WebRtc_UWord16 * const medianVec, + const int factor) +{ + WebRtc_Word32 median; + WebRtc_Word32 diff; + + median = (WebRtc_Word32)medianVec[0]; + + //median = median + ((newVal-median)>>factor); + diff = (WebRtc_Word32)newVal - median; + diff = WEBRTC_SPL_SHIFT_W32(diff, -factor); + median = median + diff; + + medianVec[0] = (WebRtc_UWord16)median; + + return 0; +} + +int WebRtcAecm_CreateCore(AecmCore_t **aecmInst) +{ + AecmCore_t *aecm = malloc(sizeof(AecmCore_t)); + *aecmInst = aecm; + if (aecm == NULL) + { + return -1; + } + + if (WebRtcApm_CreateBuffer(&aecm->farFrameBuf, FRAME_LEN + PART_LEN) == -1) + { + WebRtcAecm_FreeCore(aecm); + aecm = NULL; + return -1; + } + + if (WebRtcApm_CreateBuffer(&aecm->nearNoisyFrameBuf, FRAME_LEN + PART_LEN) == -1) + { + WebRtcAecm_FreeCore(aecm); + aecm = NULL; + return -1; + } + + if (WebRtcApm_CreateBuffer(&aecm->nearCleanFrameBuf, FRAME_LEN + PART_LEN) == -1) + { + WebRtcAecm_FreeCore(aecm); + aecm = NULL; + return -1; + } + + if (WebRtcApm_CreateBuffer(&aecm->outFrameBuf, FRAME_LEN + PART_LEN) == -1) + { + WebRtcAecm_FreeCore(aecm); + aecm = NULL; + return -1; + } + + return 0; +} + +// WebRtcAecm_InitCore(...) +// +// This function initializes the AECM instant created with WebRtcAecm_CreateCore(...) +// Input: +// - aecm : Pointer to the Echo Suppression instance +// - samplingFreq : Sampling Frequency +// +// Output: +// - aecm : Initialized instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) +{ + int retVal = 0; + WebRtc_Word16 i; + WebRtc_Word16 tmp16; + + if (samplingFreq != 8000 && samplingFreq != 16000) + { + samplingFreq = 8000; + retVal = -1; + } + // sanity check of sampling frequency + aecm->mult = (WebRtc_Word16)samplingFreq / 8000; + + aecm->farBufWritePos = 0; + aecm->farBufReadPos = 0; + aecm->knownDelay = 0; + aecm->lastKnownDelay = 0; + + WebRtcApm_InitBuffer(aecm->farFrameBuf); + WebRtcApm_InitBuffer(aecm->nearNoisyFrameBuf); + WebRtcApm_InitBuffer(aecm->nearCleanFrameBuf); + WebRtcApm_InitBuffer(aecm->outFrameBuf); + + memset(aecm->xBuf, 0, sizeof(aecm->xBuf)); + memset(aecm->dBufClean, 0, sizeof(aecm->dBufClean)); + memset(aecm->dBufNoisy, 0, sizeof(aecm->dBufNoisy)); + memset(aecm->outBuf, 0, sizeof(WebRtc_Word16) * PART_LEN); + + aecm->seed = 666; + aecm->totCount = 0; + + memset(aecm->xfaHistory, 0, sizeof(WebRtc_UWord16) * (PART_LEN1) * MAX_DELAY); + + aecm->delHistoryPos = MAX_DELAY; + + memset(aecm->medianYlogspec, 0, sizeof(WebRtc_UWord16) * PART_LEN1); + memset(aecm->medianXlogspec, 0, sizeof(WebRtc_UWord16) * PART_LEN1); + memset(aecm->medianBCount, 0, sizeof(WebRtc_UWord16) * MAX_DELAY); + memset(aecm->bxHistory, 0, sizeof(aecm->bxHistory)); + + // Initialize to reasonable values + aecm->currentDelay = 8; + aecm->previousDelay = 8; + aecm->delayAdjust = 0; + + aecm->nlpFlag = 1; + aecm->fixedDelay = -1; + + memset(aecm->xfaQDomainBuf, 0, sizeof(WebRtc_Word16) * MAX_DELAY); + aecm->dfaCleanQDomain = 0; + aecm->dfaCleanQDomainOld = 0; + aecm->dfaNoisyQDomain = 0; + aecm->dfaNoisyQDomainOld = 0; + + memset(aecm->nearLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN); + memset(aecm->farLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN); + memset(aecm->echoAdaptLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN); + memset(aecm->echoStoredLogEnergy, 0, sizeof(WebRtc_Word16) * MAX_BUF_LEN); + + // Initialize the echo channels with a stored shape. + if (samplingFreq == 8000) + { + memcpy(aecm->channelAdapt16, kChannelStored8kHz, sizeof(WebRtc_Word16) * PART_LEN1); + } + else + { + memcpy(aecm->channelAdapt16, kChannelStored16kHz, sizeof(WebRtc_Word16) * PART_LEN1); + } + memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(WebRtc_Word16) * PART_LEN1); + for (i = 0; i < PART_LEN1; i++) + { + aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( + (WebRtc_Word32)(aecm->channelAdapt16[i]), 16); + } + + memset(aecm->echoFilt, 0, sizeof(WebRtc_Word32) * PART_LEN1); + memset(aecm->nearFilt, 0, sizeof(WebRtc_Word16) * PART_LEN1); + aecm->noiseEstCtr = 0; + + aecm->cngMode = AecmTrue; + + // Increase the noise Q domain with increasing frequency, to correspond to the + // expected energy levels. + // Also shape the initial noise level with this consideration. +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + for (i = 0; i < PART_LEN1; i++) + { + if (i < PART_LEN1 >> 2) + { + aecm->noiseEstQDomain[i] = 10; + tmp16 = PART_LEN1 - i; + aecm->noiseEst[i] = (tmp16 * tmp16) << 4; + } else if (i < PART_LEN1 >> 1) + { + aecm->noiseEstQDomain[i] = 11; + tmp16 = PART_LEN1 - i; + aecm->noiseEst[i] = ((tmp16 * tmp16) << 4) << 1; + } else + { + aecm->noiseEstQDomain[i] = 12; + aecm->noiseEst[i] = aecm->noiseEst[(PART_LEN1 >> 1) - 1] << 1; + } + } +#else + for (i = 0; i < PART_LEN1 >> 2; i++) + { + aecm->noiseEstQDomain[i] = 10; + tmp16 = PART_LEN1 - i; + aecm->noiseEst[i] = (tmp16 * tmp16) << 4; + } + for (; i < PART_LEN1 >> 1; i++) + { + aecm->noiseEstQDomain[i] = 11; + tmp16 = PART_LEN1 - i; + aecm->noiseEst[i] = ((tmp16 * tmp16) << 4) << 1; + } + for (; i < PART_LEN1; i++) + { + aecm->noiseEstQDomain[i] = 12; + aecm->noiseEst[i] = aecm->noiseEst[(PART_LEN1 >> 1) - 1] << 1; + } +#endif + + aecm->mseAdaptOld = 1000; + aecm->mseStoredOld = 1000; + aecm->mseThreshold = WEBRTC_SPL_WORD32_MAX; + + aecm->farEnergyMin = WEBRTC_SPL_WORD16_MAX; + aecm->farEnergyMax = WEBRTC_SPL_WORD16_MIN; + aecm->farEnergyMaxMin = 0; + aecm->farEnergyVAD = FAR_ENERGY_MIN; // This prevents false speech detection at the + // beginning. + aecm->farEnergyMSE = 0; + aecm->currentVADValue = 0; + aecm->vadUpdateCount = 0; + aecm->firstVAD = 1; + + aecm->delayCount = 0; + aecm->newDelayCorrData = 0; + aecm->lastDelayUpdateCount = 0; + memset(aecm->delayCorrelation, 0, sizeof(WebRtc_Word16) * ((CORR_MAX << 1) + 1)); + + aecm->startupState = 0; + aecm->mseChannelCount = 0; + aecm->supGain = SUPGAIN_DEFAULT; + aecm->supGainOld = SUPGAIN_DEFAULT; + aecm->delayOffsetFlag = 0; + + memset(aecm->delayHistogram, 0, sizeof(aecm->delayHistogram)); + aecm->delayVadCount = 0; + aecm->maxDelayHistIdx = 0; + aecm->lastMinPos = 0; + + aecm->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; + aecm->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; + aecm->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; + aecm->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; + + return 0; +} + +int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag, int delayOffsetFlag) +{ + aecm->nlpFlag = nlpFlag; + aecm->fixedDelay = delay; + aecm->delayOffsetFlag = delayOffsetFlag; + + return 0; +} + +// WebRtcAecm_GetNewDelPos(...) +// +// Moves the pointer to the next entry. Returns to zero if max position reached. +// +// Input: +// - aecm : Pointer to the AECM instance +// Return: +// - pos : New position in the history. +// +// +WebRtc_Word16 WebRtcAecm_GetNewDelPos(AecmCore_t * const aecm) +{ + WebRtc_Word16 pos; + + pos = aecm->delHistoryPos; + pos++; + if (pos >= MAX_DELAY) + { + pos = 0; + } + aecm->delHistoryPos = pos; + + return pos; +} + +// WebRtcAecm_EstimateDelay(...) +// +// Estimate the delay of the echo signal. +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farSpec : Delayed farend magnitude spectrum +// - nearSpec : Nearend magnitude spectrum +// - stages : Q-domain of xxFIX and yyFIX (without dynamic Q-domain) +// - xfaQ : normalization factor, i.e., Q-domain before FFT +// Return: +// - delay : Estimated delay +// +WebRtc_Word16 WebRtcAecm_EstimateDelay(AecmCore_t * const aecm, + const WebRtc_UWord16 * const farSpec, + const WebRtc_UWord16 * const nearSpec, + const WebRtc_Word16 xfaQ) +{ + WebRtc_UWord32 bxspectrum, byspectrum; + WebRtc_UWord32 bcount[MAX_DELAY]; + + int i, res; + + WebRtc_UWord16 xmean[PART_LEN1], ymean[PART_LEN1]; + WebRtc_UWord16 dtmp1; + WebRtc_Word16 fcount[MAX_DELAY]; + + //WebRtc_Word16 res; + WebRtc_Word16 histpos; + WebRtc_Word16 maxHistLvl; + WebRtc_UWord16 *state; + WebRtc_Word16 minpos = -1; + + enum + { + kVadCountThreshold = 25 + }; + enum + { + kMaxHistogram = 600 + }; + + histpos = WebRtcAecm_GetNewDelPos(aecm); + + for (i = 0; i < PART_LEN1; i++) + { + aecm->xfaHistory[i][histpos] = farSpec[i]; + + state = &(aecm->medianXlogspec[i]); + res = WebRtcAecm_MedianEstimator(farSpec[i], state, 6); + + state = &(aecm->medianYlogspec[i]); + res = WebRtcAecm_MedianEstimator(nearSpec[i], state, 6); + + // Mean: + // FLOAT: + // ymean = dtmp2/MAX_DELAY + // + // FIX: + // input: dtmp2FIX in Q0 + // output: ymeanFIX in Q8 + // 20 = 1/MAX_DELAY in Q13 = 1/MAX_DELAY * 2^13 + xmean[i] = (aecm->medianXlogspec[i]); + ymean[i] = (aecm->medianYlogspec[i]); + + } + // Update Q-domain buffer + aecm->xfaQDomainBuf[histpos] = xfaQ; + + // Get binary spectra + // FLOAT: + // bxspectrum = bspectrum(xlogspec, xmean); + // + // FIX: + // input: xlogspecFIX,ylogspecFIX in Q8 + // xmeanFIX, ymeanFIX in Q8 + // output: unsigned long bxspectrum, byspectrum in Q0 + bxspectrum = WebRtcAecm_BSpectrum(farSpec, xmean); + byspectrum = WebRtcAecm_BSpectrum(nearSpec, ymean); + + // Shift binary spectrum history + memmove(&(aecm->bxHistory[1]), &(aecm->bxHistory[0]), + (MAX_DELAY - 1) * sizeof(WebRtc_UWord32)); + + aecm->bxHistory[0] = bxspectrum; + + // Compare with delayed spectra + WebRtcAecm_Hisser(byspectrum, aecm->bxHistory, bcount); + + for (i = 0; i < MAX_DELAY; i++) + { + // Update sum + // bcount is constrained to [0, 32], meaning we can smooth with a factor up to 2^11. + dtmp1 = (WebRtc_UWord16)bcount[i]; + dtmp1 = WEBRTC_SPL_LSHIFT_W16(dtmp1, 9); + state = &(aecm->medianBCount[i]); + res = WebRtcAecm_MedianEstimator(dtmp1, state, 9); + fcount[i] = (aecm->medianBCount[i]); + } + + // Find minimum + minpos = WebRtcSpl_MinIndexW16(fcount, MAX_DELAY); + + // If the farend has been active sufficiently long, begin accumulating a histogram + // of the minimum positions. Search for the maximum bin to determine the delay. + if (aecm->currentVADValue == 1) + { + if (aecm->delayVadCount >= kVadCountThreshold) + { + // Increment the histogram at the current minimum position. + if (aecm->delayHistogram[minpos] < kMaxHistogram) + { + aecm->delayHistogram[minpos] += 3; + } + +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + // Decrement the entire histogram. + for (i = 0; i < MAX_DELAY; i++) + { + if (aecm->delayHistogram[i] > 0) + { + aecm->delayHistogram[i]--; + } + } + + // Select the histogram index corresponding to the maximum bin as the delay. + maxHistLvl = 0; + aecm->maxDelayHistIdx = 0; + for (i = 0; i < MAX_DELAY; i++) + { + if (aecm->delayHistogram[i] > maxHistLvl) + { + maxHistLvl = aecm->delayHistogram[i]; + aecm->maxDelayHistIdx = i; + } + } +#else + maxHistLvl = 0; + aecm->maxDelayHistIdx = 0; + + for (i = 0; i < MAX_DELAY; i++) + { + WebRtc_Word16 tempVar = aecm->delayHistogram[i]; + + // Decrement the entire histogram. + if (tempVar > 0) + { + tempVar--; + aecm->delayHistogram[i] = tempVar; + + // Select the histogram index corresponding to the maximum bin as the delay. + if (tempVar > maxHistLvl) + { + maxHistLvl = tempVar; + aecm->maxDelayHistIdx = i; + } + } + } +#endif + } else + { + aecm->delayVadCount++; + } + } else + { + aecm->delayVadCount = 0; + } + + return aecm->maxDelayHistIdx; +} + +int WebRtcAecm_FreeCore(AecmCore_t *aecm) +{ + if (aecm == NULL) + { + return -1; + } + + WebRtcApm_FreeBuffer(aecm->farFrameBuf); + WebRtcApm_FreeBuffer(aecm->nearNoisyFrameBuf); + WebRtcApm_FreeBuffer(aecm->nearCleanFrameBuf); + WebRtcApm_FreeBuffer(aecm->outFrameBuf); + + free(aecm); + + return 0; +} + +void WebRtcAecm_ProcessFrame(AecmCore_t * const aecm, const WebRtc_Word16 * const farend, + const WebRtc_Word16 * const nearendNoisy, + const WebRtc_Word16 * const nearendClean, + WebRtc_Word16 * const out) +{ + WebRtc_Word16 farBlock[PART_LEN]; + WebRtc_Word16 nearNoisyBlock[PART_LEN]; + WebRtc_Word16 nearCleanBlock[PART_LEN]; + WebRtc_Word16 outBlock[PART_LEN]; + WebRtc_Word16 farFrame[FRAME_LEN]; + int size = 0; + + // Buffer the current frame. + // Fetch an older one corresponding to the delay. + WebRtcAecm_BufferFarFrame(aecm, farend, FRAME_LEN); + WebRtcAecm_FetchFarFrame(aecm, farFrame, FRAME_LEN, aecm->knownDelay); + + // Buffer the synchronized far and near frames, + // to pass the smaller blocks individually. + WebRtcApm_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN); + WebRtcApm_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN); + if (nearendClean != NULL) + { + WebRtcApm_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN); + } + + // Process as many blocks as possible. + while (WebRtcApm_get_buffer_size(aecm->farFrameBuf) >= PART_LEN) + { + WebRtcApm_ReadBuffer(aecm->farFrameBuf, farBlock, PART_LEN); + WebRtcApm_ReadBuffer(aecm->nearNoisyFrameBuf, nearNoisyBlock, PART_LEN); + if (nearendClean != NULL) + { + WebRtcApm_ReadBuffer(aecm->nearCleanFrameBuf, nearCleanBlock, PART_LEN); + WebRtcAecm_ProcessBlock(aecm, farBlock, nearNoisyBlock, nearCleanBlock, outBlock); + } else + { + WebRtcAecm_ProcessBlock(aecm, farBlock, nearNoisyBlock, NULL, outBlock); + } + + WebRtcApm_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN); + } + + // Stuff the out buffer if we have less than a frame to output. + // This should only happen for the first frame. + size = WebRtcApm_get_buffer_size(aecm->outFrameBuf); + if (size < FRAME_LEN) + { + WebRtcApm_StuffBuffer(aecm->outFrameBuf, FRAME_LEN - size); + } + + // Obtain an output frame. + WebRtcApm_ReadBuffer(aecm->outFrameBuf, out, FRAME_LEN); +} + +// WebRtcAecm_AsymFilt(...) +// +// Performs asymmetric filtering. +// +// Inputs: +// - filtOld : Previous filtered value. +// - inVal : New input value. +// - stepSizePos : Step size when we have a positive contribution. +// - stepSizeNeg : Step size when we have a negative contribution. +// +// Output: +// +// Return: - Filtered value. +// +WebRtc_Word16 WebRtcAecm_AsymFilt(const WebRtc_Word16 filtOld, const WebRtc_Word16 inVal, + const WebRtc_Word16 stepSizePos, + const WebRtc_Word16 stepSizeNeg) +{ + WebRtc_Word16 retVal; + + if ((filtOld == WEBRTC_SPL_WORD16_MAX) | (filtOld == WEBRTC_SPL_WORD16_MIN)) + { + return inVal; + } + retVal = filtOld; + if (filtOld > inVal) + { + retVal -= WEBRTC_SPL_RSHIFT_W16(filtOld - inVal, stepSizeNeg); + } else + { + retVal += WEBRTC_SPL_RSHIFT_W16(inVal - filtOld, stepSizePos); + } + + return retVal; +} + +// WebRtcAecm_CalcEnergies(...) +// +// This function calculates the log of energies for nearend, farend and estimated +// echoes. There is also an update of energy decision levels, i.e. internl VAD. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// @param delayDiff [in] Delay position in farend buffer. +// @param nearEner [in] Near end energy for current block (Q[aecm->dfaQDomain]). +// @param echoEst [i/o] Estimated echo +// (Q[aecm->xfaQDomain[delayDiff]+RESOLUTION_CHANNEL16]). +// +void WebRtcAecm_CalcEnergies(AecmCore_t * const aecm, const WebRtc_Word16 delayDiff, + const WebRtc_UWord32 nearEner, WebRtc_Word32 * const echoEst) +{ + // Local variables + WebRtc_UWord32 tmpAdapt, tmpStored, tmpFar; + + int i; + + WebRtc_Word16 zeros, frac; + WebRtc_Word16 tmp16; + WebRtc_Word16 increase_max_shifts = 4; + WebRtc_Word16 decrease_max_shifts = 11; + WebRtc_Word16 increase_min_shifts = 11; + WebRtc_Word16 decrease_min_shifts = 3; + + // Get log of near end energy and store in buffer + + // Shift buffer + memmove(aecm->nearLogEnergy + 1, aecm->nearLogEnergy, + sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); + + // Logarithm of integrated magnitude spectrum (nearEner) + if (nearEner) + { + zeros = WebRtcSpl_NormU32(nearEner); + frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32( + (WEBRTC_SPL_LSHIFT_U32(nearEner, zeros) & 0x7FFFFFFF), + 23); + // log2 in Q8 + aecm->nearLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; + aecm->nearLogEnergy[0] -= WEBRTC_SPL_LSHIFT_W16(aecm->dfaNoisyQDomain, 8); + } else + { + aecm->nearLogEnergy[0] = 0; + } + aecm->nearLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7); + // END: Get log of near end energy + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + tmpAdapt = 0; + tmpStored = 0; + tmpFar = 0; + + for (i = 0; i < PART_LEN1; i++) + { + // Get estimated echo energies for adaptive channel and stored channel + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + aecm->xfaHistory[i][delayDiff]); + tmpFar += (WebRtc_UWord32)(aecm->xfaHistory[i][delayDiff]); + tmpAdapt += WEBRTC_SPL_UMUL_16_16(aecm->channelAdapt16[i], + aecm->xfaHistory[i][delayDiff]); + tmpStored += (WebRtc_UWord32)echoEst[i]; + } + // Shift buffers + memmove(aecm->farLogEnergy + 1, aecm->farLogEnergy, + sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); + memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy, + sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); + memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy, + sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); + + // Logarithm of delayed far end energy + if (tmpFar) + { + zeros = WebRtcSpl_NormU32(tmpFar); + frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpFar, zeros) + & 0x7FFFFFFF), 23); + // log2 in Q8 + aecm->farLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; + aecm->farLogEnergy[0] -= WEBRTC_SPL_LSHIFT_W16(aecm->xfaQDomainBuf[delayDiff], 8); + } else + { + aecm->farLogEnergy[0] = 0; + } + aecm->farLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7); + + // Logarithm of estimated echo energy through adapted channel + if (tmpAdapt) + { + zeros = WebRtcSpl_NormU32(tmpAdapt); + frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpAdapt, zeros) + & 0x7FFFFFFF), 23); + //log2 in Q8 + aecm->echoAdaptLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; + aecm->echoAdaptLogEnergy[0] + -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + aecm->xfaQDomainBuf[delayDiff], 8); + } else + { + aecm->echoAdaptLogEnergy[0] = 0; + } + aecm->echoAdaptLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7); + + // Logarithm of estimated echo energy through stored channel + if (tmpStored) + { + zeros = WebRtcSpl_NormU32(tmpStored); + frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpStored, zeros) + & 0x7FFFFFFF), 23); + //log2 in Q8 + aecm->echoStoredLogEnergy[0] = WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; + aecm->echoStoredLogEnergy[0] + -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + aecm->xfaQDomainBuf[delayDiff], 8); + } else + { + aecm->echoStoredLogEnergy[0] = 0; + } + aecm->echoStoredLogEnergy[0] += WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7); + + // Update farend energy levels (min, max, vad, mse) + if (aecm->farLogEnergy[0] > FAR_ENERGY_MIN) + { + if (aecm->startupState == 0) + { + increase_max_shifts = 2; + decrease_min_shifts = 2; + increase_min_shifts = 8; + } + + aecm->farEnergyMin = WebRtcAecm_AsymFilt(aecm->farEnergyMin, aecm->farLogEnergy[0], + increase_min_shifts, decrease_min_shifts); + aecm->farEnergyMax = WebRtcAecm_AsymFilt(aecm->farEnergyMax, aecm->farLogEnergy[0], + increase_max_shifts, decrease_max_shifts); + aecm->farEnergyMaxMin = (aecm->farEnergyMax - aecm->farEnergyMin); + + // Dynamic VAD region size + tmp16 = 2560 - aecm->farEnergyMin; + if (tmp16 > 0) + { + tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, FAR_ENERGY_VAD_REGION, 9); + } else + { + tmp16 = 0; + } + tmp16 += FAR_ENERGY_VAD_REGION; + + if ((aecm->startupState == 0) | (aecm->vadUpdateCount > 1024)) + { + // In startup phase or VAD update halted + aecm->farEnergyVAD = aecm->farEnergyMin + tmp16; + } else + { + if (aecm->farEnergyVAD > aecm->farLogEnergy[0]) + { + aecm->farEnergyVAD += WEBRTC_SPL_RSHIFT_W16(aecm->farLogEnergy[0] + tmp16 + - aecm->farEnergyVAD, 6); + aecm->vadUpdateCount = 0; + } else + { + aecm->vadUpdateCount++; + } + } + // Put MSE threshold higher than VAD + aecm->farEnergyMSE = aecm->farEnergyVAD + (1 << 8); + } + + // Update VAD variables + if (aecm->farLogEnergy[0] > aecm->farEnergyVAD) + { + if ((aecm->startupState == 0) | (aecm->farEnergyMaxMin > FAR_ENERGY_DIFF)) + { + // We are in startup or have significant dynamics in input speech level + aecm->currentVADValue = 1; + } + } else + { + aecm->currentVADValue = 0; + } + if ((aecm->currentVADValue) && (aecm->firstVAD)) + { + aecm->firstVAD = 0; + if (aecm->echoAdaptLogEnergy[0] > aecm->nearLogEnergy[0]) + { + // The estimated echo has higher energy than the near end signal. This means that + // the initialization was too aggressive. Scale down by a factor 8 + for (i = 0; i < PART_LEN1; i++) + { + aecm->channelAdapt16[i] >>= 3; + } + // Compensate the adapted echo energy level accordingly. + aecm->echoAdaptLogEnergy[0] -= (3 << 8); + aecm->firstVAD = 1; + } + } + // END: Energies of delayed far, echo estimates + // TODO(bjornv): Will be removed in final version. +#ifdef VAD_DATA + fwrite(&(aecm->currentVADValue), sizeof(WebRtc_Word16), 1, aecm->vad_file); + fwrite(&(aecm->currentDelay), sizeof(WebRtc_Word16), 1, aecm->delay_file); + fwrite(&(aecm->farLogEnergy[0]), sizeof(WebRtc_Word16), 1, aecm->far_cur_file); + fwrite(&(aecm->farEnergyMin), sizeof(WebRtc_Word16), 1, aecm->far_min_file); + fwrite(&(aecm->farEnergyMax), sizeof(WebRtc_Word16), 1, aecm->far_max_file); + fwrite(&(aecm->farEnergyVAD), sizeof(WebRtc_Word16), 1, aecm->far_vad_file); +#endif +} + +// WebRtcAecm_CalcStepSize(...) +// +// This function calculates the step size used in channel estimation +// +// +// @param aecm [in] Handle of the AECM instance. +// @param mu [out] (Return value) Stepsize in log2(), i.e. number of shifts. +// +// +WebRtc_Word16 WebRtcAecm_CalcStepSize(AecmCore_t * const aecm) +{ + + WebRtc_Word32 tmp32; + WebRtc_Word16 tmp16; + WebRtc_Word16 mu; + + // Here we calculate the step size mu used in the + // following NLMS based Channel estimation algorithm + mu = MU_MAX; + if (!aecm->currentVADValue) + { + // Far end energy level too low, no channel update + mu = 0; + } else if (aecm->startupState > 0) + { + if (aecm->farEnergyMin >= aecm->farEnergyMax) + { + mu = MU_MIN; + } else + { + tmp16 = (aecm->farLogEnergy[0] - aecm->farEnergyMin); + tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, MU_DIFF); + tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin); + mu = MU_MIN - 1 - (WebRtc_Word16)(tmp32); + // The -1 is an alternative to rounding. This way we get a larger + // stepsize, so we in some sense compensate for truncation in NLMS + } + if (mu < MU_MAX) + { + mu = MU_MAX; // Equivalent with maximum step size of 2^-MU_MAX + } + } + // END: Update step size + + return mu; +} + +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. NLMS and decision on channel storage. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// @param dfa [in] Absolute value of the nearend signal (Q[aecm->dfaQDomain]) +// @param delayDiff [in] Delay position in farend buffer. +// @param mu [in] NLMS step size. +// @param echoEst [i/o] Estimated echo +// (Q[aecm->xfaQDomain[delayDiff]+RESOLUTION_CHANNEL16]). +// +void WebRtcAecm_UpdateChannel(AecmCore_t * const aecm, const WebRtc_UWord16 * const dfa, + const WebRtc_Word16 delayDiff, const WebRtc_Word16 mu, + WebRtc_Word32 * const echoEst) +{ + + WebRtc_UWord32 tmpU32no1, tmpU32no2; + WebRtc_Word32 tmp32no1, tmp32no2; + WebRtc_Word32 mseStored; + WebRtc_Word32 mseAdapt; + + int i; + + WebRtc_Word16 zerosFar, zerosNum, zerosCh, zerosDfa; + WebRtc_Word16 shiftChFar, shiftNum, shift2ResChan; + WebRtc_Word16 tmp16no1; + WebRtc_Word16 xfaQ, dfaQ; + + // This is the channel estimation algorithm. It is base on NLMS but has a variable step + // length, which was calculated above. + if (mu) + { + for (i = 0; i < PART_LEN1; i++) + { + // Determine norm of channel and farend to make sure we don't get overflow in + // multiplication + zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]); + zerosFar = WebRtcSpl_NormU32((WebRtc_UWord32)aecm->xfaHistory[i][delayDiff]); + if (zerosCh + zerosFar > 31) + { + // Multiplication is safe + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(aecm->channelAdapt32[i], + aecm->xfaHistory[i][delayDiff]); + shiftChFar = 0; + } else + { + // We need to shift down before multiplication + shiftChFar = 32 - zerosCh - zerosFar; + tmpU32no1 + = WEBRTC_SPL_UMUL_32_16(WEBRTC_SPL_RSHIFT_W32(aecm->channelAdapt32[i], + shiftChFar), + aecm->xfaHistory[i][delayDiff]); + } + // Determine Q-domain of numerator + zerosNum = WebRtcSpl_NormU32(tmpU32no1); + if (dfa[i]) + { + zerosDfa = WebRtcSpl_NormU32((WebRtc_UWord32)dfa[i]); + } else + { + zerosDfa = 32; + } + tmp16no1 = zerosDfa - 2 + aecm->dfaNoisyQDomain - RESOLUTION_CHANNEL32 + - aecm->xfaQDomainBuf[delayDiff] + shiftChFar; + if (zerosNum > tmp16no1 + 1) + { + xfaQ = tmp16no1; + dfaQ = zerosDfa - 2; + } else + { + xfaQ = zerosNum - 2; + dfaQ = RESOLUTION_CHANNEL32 + aecm->xfaQDomainBuf[delayDiff] + - aecm->dfaNoisyQDomain - shiftChFar + xfaQ; + } + // Add in the same Q-domain + tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ); + tmpU32no2 = WEBRTC_SPL_SHIFT_W32((WebRtc_UWord32)dfa[i], dfaQ); + tmp32no1 = (WebRtc_Word32)tmpU32no2 - (WebRtc_Word32)tmpU32no1; + zerosNum = WebRtcSpl_NormW32(tmp32no1); + if ((tmp32no1) && (aecm->xfaHistory[i][delayDiff] > (CHANNEL_VAD + << aecm->xfaQDomainBuf[delayDiff]))) + { + // + // Update is needed + // + // This is what we would like to compute + // + // tmp32no1 = dfa[i] - (aecm->channelAdapt[i] * aecm->xfaHistory[i][delayDiff]) + // tmp32norm = (i + 1) + // aecm->channelAdapt[i] += (2^mu) * tmp32no1 + // / (tmp32norm * aecm->xfaHistory[i][delayDiff]) + // + + // Make sure we don't get overflow in multiplication. + if (zerosNum + zerosFar > 31) + { + if (tmp32no1 > 0) + { + tmp32no2 = (WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(tmp32no1, + aecm->xfaHistory[i][delayDiff]); + } else + { + tmp32no2 = -(WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(-tmp32no1, + aecm->xfaHistory[i][delayDiff]); + } + shiftNum = 0; + } else + { + shiftNum = 32 - (zerosNum + zerosFar); + if (tmp32no1 > 0) + { + tmp32no2 = (WebRtc_Word32)WEBRTC_SPL_UMUL_32_16( + WEBRTC_SPL_RSHIFT_W32(tmp32no1, shiftNum), + aecm->xfaHistory[i][delayDiff]); + } else + { + tmp32no2 = -(WebRtc_Word32)WEBRTC_SPL_UMUL_32_16( + WEBRTC_SPL_RSHIFT_W32(-tmp32no1, shiftNum), + aecm->xfaHistory[i][delayDiff]); + } + } + // Normalize with respect to frequency bin + tmp32no2 = WebRtcSpl_DivW32W16(tmp32no2, i + 1); + // Make sure we are in the right Q-domain + shift2ResChan = shiftNum + shiftChFar - xfaQ - mu - ((30 - zerosFar) << 1); + if (WebRtcSpl_NormW32(tmp32no2) < shift2ResChan) + { + tmp32no2 = WEBRTC_SPL_WORD32_MAX; + } else + { + tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, shift2ResChan); + } + aecm->channelAdapt32[i] = WEBRTC_SPL_ADD_SAT_W32(aecm->channelAdapt32[i], + tmp32no2); + if (aecm->channelAdapt32[i] < 0) + { + // We can never have negative channel gain + aecm->channelAdapt32[i] = 0; + } + aecm->channelAdapt16[i] + = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(aecm->channelAdapt32[i], 16); + } + } + } + // END: Adaptive channel update + + // Determine if we should store or restore the channel + if ((aecm->startupState == 0) & (aecm->currentVADValue)) + { + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(WebRtc_Word16) * PART_LEN1); + // TODO(bjornv): Will be removed in final version. +#ifdef STORE_CHANNEL_DATA + fwrite(aecm->channelStored, sizeof(WebRtc_Word16), PART_LEN1, aecm->channel_file_init); +#endif + // Recalculate echo estimate +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + for (i = 0; i < PART_LEN1; i++) + { + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + aecm->xfaHistory[i][delayDiff]); + } +#else + for (i = 0; i < PART_LEN; ) //assume PART_LEN is 4's multiples + + { + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + aecm->xfaHistory[i][delayDiff]); + i++; + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + aecm->xfaHistory[i][delayDiff]); + i++; + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + aecm->xfaHistory[i][delayDiff]); + i++; + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + aecm->xfaHistory[i][delayDiff]); + i++; + } + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + aecm->xfaHistory[i][delayDiff]); +#endif + } else + { + if (aecm->farLogEnergy[0] < aecm->farEnergyMSE) + { + aecm->mseChannelCount = 0; + aecm->delayCount = 0; + } else + { + aecm->mseChannelCount++; + aecm->delayCount++; + } + // Enough data for validation. Store channel if we can. + if (aecm->mseChannelCount >= (MIN_MSE_COUNT + 10)) + { + // We have enough data. + // Calculate MSE of "Adapt" and "Stored" versions. + // It is actually not MSE, but average absolute error. + mseStored = 0; + mseAdapt = 0; + for (i = 0; i < MIN_MSE_COUNT; i++) + { + tmp32no1 = ((WebRtc_Word32)aecm->echoStoredLogEnergy[i] + - (WebRtc_Word32)aecm->nearLogEnergy[i]); + tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); + mseStored += tmp32no2; + + tmp32no1 = ((WebRtc_Word32)aecm->echoAdaptLogEnergy[i] + - (WebRtc_Word32)aecm->nearLogEnergy[i]); + tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); + mseAdapt += tmp32no2; + } + if (((mseStored << MSE_RESOLUTION) < (MIN_MSE_DIFF * mseAdapt)) + & ((aecm->mseStoredOld << MSE_RESOLUTION) < (MIN_MSE_DIFF + * aecm->mseAdaptOld))) + { + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, aecm->channelStored, + sizeof(WebRtc_Word16) * PART_LEN1); + // Restore the W32 channel +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + for (i = 0; i < PART_LEN1; i++) + { + aecm->channelAdapt32[i] + = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16); + } +#else + for (i = 0; i < PART_LEN; ) //assume PART_LEN is 4's multiples + + { + aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16); + i++; + aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16); + i++; + aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16); + i++; + aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16); + i++; + } + aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16); +#endif + + } else if (((MIN_MSE_DIFF * mseStored) > (mseAdapt << MSE_RESOLUTION)) & (mseAdapt + < aecm->mseThreshold) & (aecm->mseAdaptOld < aecm->mseThreshold)) + { + // The adaptive channel has a significantly lower MSE than the stored one. + // The MSE for the adaptive channel has also been low for two consecutive + // calculations. Store the adaptive channel. + memcpy(aecm->channelStored, aecm->channelAdapt16, + sizeof(WebRtc_Word16) * PART_LEN1); + // TODO(bjornv): Will be removed in final version. +#ifdef STORE_CHANNEL_DATA + fwrite(aecm->channelStored, sizeof(WebRtc_Word16), PART_LEN1, + aecm->channel_file); +#endif +// Recalculate echo estimate +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + for (i = 0; i < PART_LEN1; i++) + { + echoEst[i] + = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + } +#else + for (i = 0; i < PART_LEN; ) //assume PART_LEN is 4's multiples + + { + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + i++; + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + i++; + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + i++; + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); + i++; + } + echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], aecm->xfaHistory[i][delayDiff]); +#endif + // Update threshold + if (aecm->mseThreshold == WEBRTC_SPL_WORD32_MAX) + { + aecm->mseThreshold = (mseAdapt + aecm->mseAdaptOld); + } else + { + aecm->mseThreshold += WEBRTC_SPL_MUL_16_16_RSFT(mseAdapt + - WEBRTC_SPL_MUL_16_16_RSFT(aecm->mseThreshold, 5, 3), 205, 8); + } + + } + + // Reset counter + aecm->mseChannelCount = 0; + + // Store the MSE values. + aecm->mseStoredOld = mseStored; + aecm->mseAdaptOld = mseAdapt; + } + } + // END: Determine if we should store or reset channel estimate. +} + +// WebRtcAecm_CalcSuppressionGain(...) +// +// This function calculates the suppression gain that is used in the Wiener filter. +// +// +// @param aecm [i/n] Handle of the AECM instance. +// @param supGain [out] (Return value) Suppression gain with which to scale the noise +// level (Q14). +// +// +WebRtc_Word16 WebRtcAecm_CalcSuppressionGain(AecmCore_t * const aecm) +{ + WebRtc_Word32 tmp32no1; + + WebRtc_Word16 supGain; + WebRtc_Word16 tmp16no1; + WebRtc_Word16 dE = 0; + + // Determine suppression gain used in the Wiener filter. The gain is based on a mix of far + // end energy and echo estimation error. + supGain = SUPGAIN_DEFAULT; + // Adjust for the far end signal level. A low signal level indicates no far end signal, + // hence we set the suppression gain to 0 + if (!aecm->currentVADValue) + { + supGain = 0; + } else + { + // Adjust for possible double talk. If we have large variations in estimation error we + // likely have double talk (or poor channel). + tmp16no1 = (aecm->nearLogEnergy[0] - aecm->echoStoredLogEnergy[0] - ENERGY_DEV_OFFSET); + dE = WEBRTC_SPL_ABS_W16(tmp16no1); + + if (dE < ENERGY_DEV_TOL) + { + // Likely no double talk. The better estimation, the more we can suppress signal. + // Update counters + if (dE < SUPGAIN_EPC_DT) + { + tmp32no1 = WEBRTC_SPL_MUL_16_16(aecm->supGainErrParamDiffAB, dE); + tmp32no1 += (SUPGAIN_EPC_DT >> 1); + tmp16no1 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT); + supGain = aecm->supGainErrParamA - tmp16no1; + } else + { + tmp32no1 = WEBRTC_SPL_MUL_16_16(aecm->supGainErrParamDiffBD, + (ENERGY_DEV_TOL - dE)); + tmp32no1 += ((ENERGY_DEV_TOL - SUPGAIN_EPC_DT) >> 1); + tmp16no1 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32no1, (ENERGY_DEV_TOL + - SUPGAIN_EPC_DT)); + supGain = aecm->supGainErrParamD + tmp16no1; + } + } else + { + // Likely in double talk. Use default value + supGain = aecm->supGainErrParamD; + } + } + + if (supGain > aecm->supGainOld) + { + tmp16no1 = supGain; + } else + { + tmp16no1 = aecm->supGainOld; + } + aecm->supGainOld = supGain; + if (tmp16no1 < aecm->supGain) + { + aecm->supGain += (WebRtc_Word16)((tmp16no1 - aecm->supGain) >> 4); + } else + { + aecm->supGain += (WebRtc_Word16)((tmp16no1 - aecm->supGain) >> 4); + } + + // END: Update suppression gain + + return aecm->supGain; +} + +// WebRtcAecm_DelayCompensation(...) +// +// Secondary delay estimation that can be used as a backup or for validation. This function is +// still under construction and not activated in current version. +// +// +// @param aecm [i/o] Handle of the AECM instance. +// +// +void WebRtcAecm_DelayCompensation(AecmCore_t * const aecm) +{ + int i, j; + WebRtc_Word32 delayMeanEcho[CORR_BUF_LEN]; + WebRtc_Word32 delayMeanNear[CORR_BUF_LEN]; + WebRtc_Word16 sumBitPattern, bitPatternEcho, bitPatternNear, maxPos, maxValue, + maxValueLeft, maxValueRight; + + // Check delay (calculate the delay offset (if we can)). + if ((aecm->startupState > 0) & (aecm->delayCount >= CORR_MAX_BUF) & aecm->delayOffsetFlag) + { + // Calculate mean values + for (i = 0; i < CORR_BUF_LEN; i++) + { + delayMeanEcho[i] = 0; + delayMeanNear[i] = 0; +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + for (j = 0; j < CORR_WIDTH; j++) + { + delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j]; + delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j]; + } +#else + for (j = 0; j < CORR_WIDTH -1; ) + { + delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j]; + delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j]; + j++; + delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j]; + delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j]; + j++; + } + delayMeanEcho[i] += (WebRtc_Word32)aecm->echoStoredLogEnergy[i + j]; + delayMeanNear[i] += (WebRtc_Word32)aecm->nearLogEnergy[i + j]; +#endif + } + // Calculate correlation values + for (i = 0; i < CORR_BUF_LEN; i++) + { + sumBitPattern = 0; +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + for (j = 0; j < CORR_WIDTH; j++) + { + bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i + + j] * CORR_WIDTH > delayMeanEcho[i]); + bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX + + j] * CORR_WIDTH > delayMeanNear[CORR_MAX]); + sumBitPattern += !(bitPatternEcho ^ bitPatternNear); + } +#else + for (j = 0; j < CORR_WIDTH -1; ) + { + bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i + + j] * CORR_WIDTH > delayMeanEcho[i]); + bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX + + j] * CORR_WIDTH > delayMeanNear[CORR_MAX]); + sumBitPattern += !(bitPatternEcho ^ bitPatternNear); + j++; + bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i + + j] * CORR_WIDTH > delayMeanEcho[i]); + bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX + + j] * CORR_WIDTH > delayMeanNear[CORR_MAX]); + sumBitPattern += !(bitPatternEcho ^ bitPatternNear); + j++; + } + bitPatternEcho = (WebRtc_Word16)((WebRtc_Word32)aecm->echoStoredLogEnergy[i + j] + * CORR_WIDTH > delayMeanEcho[i]); + bitPatternNear = (WebRtc_Word16)((WebRtc_Word32)aecm->nearLogEnergy[CORR_MAX + j] + * CORR_WIDTH > delayMeanNear[CORR_MAX]); + sumBitPattern += !(bitPatternEcho ^ bitPatternNear); +#endif + aecm->delayCorrelation[i] = sumBitPattern; + } + aecm->newDelayCorrData = 1; // Indicate we have new correlation data to evaluate + } + if ((aecm->startupState == 2) & (aecm->lastDelayUpdateCount > (CORR_WIDTH << 1)) + & aecm->newDelayCorrData) + { + // Find maximum value and maximum position as well as values on the sides. + maxPos = 0; + maxValue = aecm->delayCorrelation[0]; + maxValueLeft = maxValue; + maxValueRight = aecm->delayCorrelation[CORR_DEV]; + for (i = 1; i < CORR_BUF_LEN; i++) + { + if (aecm->delayCorrelation[i] > maxValue) + { + maxValue = aecm->delayCorrelation[i]; + maxPos = i; + if (maxPos < CORR_DEV) + { + maxValueLeft = aecm->delayCorrelation[0]; + maxValueRight = aecm->delayCorrelation[i + CORR_DEV]; + } else if (maxPos > (CORR_MAX << 1) - CORR_DEV) + { + maxValueLeft = aecm->delayCorrelation[i - CORR_DEV]; + maxValueRight = aecm->delayCorrelation[(CORR_MAX << 1)]; + } else + { + maxValueLeft = aecm->delayCorrelation[i - CORR_DEV]; + maxValueRight = aecm->delayCorrelation[i + CORR_DEV]; + } + } + } + if ((maxPos > 0) & (maxPos < (CORR_MAX << 1))) + { + // Avoid maximum at boundaries. The maximum peak has to be higher than + // CORR_MAX_LEVEL. It also has to be sharp, i.e. the value CORR_DEV bins off should + // be CORR_MAX_LOW lower than the maximum. + if ((maxValue > CORR_MAX_LEVEL) & (maxValueLeft < maxValue - CORR_MAX_LOW) + & (maxValueRight < maxValue - CORR_MAX_LOW)) + { + aecm->delayAdjust += CORR_MAX - maxPos; + aecm->newDelayCorrData = 0; + aecm->lastDelayUpdateCount = 0; + } + } + } + // END: "Check delay" +} + +void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * const farend, + const WebRtc_Word16 * const nearendNoisy, + const WebRtc_Word16 * const nearendClean, + WebRtc_Word16 * const output) +{ + int i, j; + + WebRtc_UWord32 xfaSum; + WebRtc_UWord32 dfaNoisySum; + WebRtc_UWord32 echoEst32Gained; + WebRtc_UWord32 tmpU32; + + WebRtc_Word32 tmp32no1; + WebRtc_Word32 tmp32no2; + WebRtc_Word32 echoEst32[PART_LEN1]; + + WebRtc_UWord16 xfa[PART_LEN1]; + WebRtc_UWord16 dfaNoisy[PART_LEN1]; + WebRtc_UWord16 dfaClean[PART_LEN1]; + WebRtc_UWord16* ptrDfaClean = dfaClean; + + int outCFFT; + + WebRtc_Word16 fft[PART_LEN4]; +#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT) + WebRtc_Word16 postFft[PART_LEN4]; +#else + WebRtc_Word16 postFft[PART_LEN2]; +#endif + WebRtc_Word16 dfwReal[PART_LEN1]; + WebRtc_Word16 dfwImag[PART_LEN1]; + WebRtc_Word16 xfwReal[PART_LEN1]; + WebRtc_Word16 xfwImag[PART_LEN1]; + WebRtc_Word16 efwReal[PART_LEN1]; + WebRtc_Word16 efwImag[PART_LEN1]; + WebRtc_Word16 hnl[PART_LEN1]; + WebRtc_Word16 numPosCoef; + WebRtc_Word16 nlpGain; + WebRtc_Word16 delay, diff, diffMinusOne; + WebRtc_Word16 tmp16no1; + WebRtc_Word16 tmp16no2; +#ifdef AECM_WITH_ABS_APPROX + WebRtc_Word16 maxValue; + WebRtc_Word16 minValue; +#endif + WebRtc_Word16 mu; + WebRtc_Word16 supGain; + WebRtc_Word16 zeros32, zeros16; + WebRtc_Word16 zerosDBufNoisy, zerosDBufClean, zerosXBuf; + WebRtc_Word16 resolutionDiff, qDomainDiff; + +#ifdef ARM_WINM_LOG_ + DWORD temp; + static int flag0 = 0; + __int64 freq, start, end, diff__; + unsigned int milliseconds; +#endif + +#ifdef AECM_WITH_ABS_APPROX + WebRtc_UWord16 alpha, beta; +#endif + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) + { + aecm->startupState = (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(WebRtc_Word16) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(WebRtc_Word16) * PART_LEN); + if (nearendClean != NULL) + { + memcpy(aecm->dBufClean + PART_LEN, nearendClean, sizeof(WebRtc_Word16) * PART_LEN); + } + // TODO(bjornv): Will be removed in final version. +#ifdef VAD_DATA + fwrite(aecm->xBuf, sizeof(WebRtc_Word16), PART_LEN, aecm->far_file); +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(aecm->dBufNoisy, PART_LEN2); + tmp16no2 = WebRtcSpl_MaxAbsValueW16(aecm->xBuf, PART_LEN2); + zerosDBufNoisy = WebRtcSpl_NormW16(tmp16no1); + zerosXBuf = WebRtcSpl_NormW16(tmp16no2); +#else + zerosDBufNoisy = 0; + zerosXBuf = 0; +#endif + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = zerosDBufNoisy; + + if (nearendClean != NULL) + { +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(aecm->dBufClean, PART_LEN2); + zerosDBufClean = WebRtcSpl_NormW16(tmp16no1); +#else + zerosDBufClean = 0; +#endif + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = zerosDBufClean; + } else + { + zerosDBufClean = zerosDBufNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + } + +#ifdef ARM_WINM_LOG_ + // measure tick start + QueryPerformanceFrequency((LARGE_INTEGER*)&freq); + QueryPerformanceCounter((LARGE_INTEGER*)&start); +#endif + + // FFT of noisy near end signal + for (i = 0; i < PART_LEN; i++) + { + j = WEBRTC_SPL_LSHIFT_W32(i, 1); + // Window near end + fft[j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->dBufNoisy[i] + << zerosDBufNoisy), kSqrtHanning[i], 14); + fft[PART_LEN2 + j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( + (aecm->dBufNoisy[PART_LEN + i] << zerosDBufNoisy), + kSqrtHanning[PART_LEN - i], 14); + // Inserting zeros in imaginary parts + fft[j + 1] = 0; + fft[PART_LEN2 + j + 1] = 0; + } + + // Fourier transformation of near end signal. + // The result is scaled with 1/PART_LEN2, that is, the result is in Q(-6) for PART_LEN = 32 + +#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT) + outCFFT = WebRtcSpl_ComplexFFT2(fft, postFft, PART_LEN_SHIFT, 1); + + // The imaginary part has to switch sign + for(i = 1; i < PART_LEN2-1;) + { + postFft[i] = -postFft[i]; + i += 2; + postFft[i] = -postFft[i]; + i += 2; + } +#else + WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); + outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); + + // Take only the first PART_LEN2 samples + for (i = 0; i < PART_LEN2; i++) + { + postFft[i] = fft[i]; + } + // The imaginary part has to switch sign + for (i = 1; i < PART_LEN2;) + { + postFft[i] = -postFft[i]; + i += 2; + } +#endif + + // Extract imaginary and real part, calculate the magnitude for all frequency bins + dfwImag[0] = 0; + dfwImag[PART_LEN] = 0; + dfwReal[0] = postFft[0]; +#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT) + dfwReal[PART_LEN] = postFft[PART_LEN2]; +#else + dfwReal[PART_LEN] = fft[PART_LEN2]; +#endif + dfaNoisy[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[0]); + dfaNoisy[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[PART_LEN]); + dfaNoisySum = (WebRtc_UWord32)(dfaNoisy[0]); + dfaNoisySum += (WebRtc_UWord32)(dfaNoisy[PART_LEN]); + + for (i = 1; i < PART_LEN; i++) + { + j = WEBRTC_SPL_LSHIFT_W32(i, 1); + dfwReal[i] = postFft[j]; + dfwImag[i] = postFft[j + 1]; + + if (dfwReal[i] == 0 || dfwImag[i] == 0) + { + dfaNoisy[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[i] + dfwImag[i]); + } else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + + tmp16no1 = WEBRTC_SPL_ABS_W16(postFft[j]); + tmp16no2 = WEBRTC_SPL_ABS_W16(postFft[j + 1]); + +#ifdef AECM_WITH_ABS_APPROX + if(tmp16no1 > tmp16no2) + { + maxValue = tmp16no1; + minValue = tmp16no2; + } else + { + maxValue = tmp16no2; + minValue = tmp16no1; + } + + // Magnitude in Q-6 + if ((maxValue >> 2) > minValue) + { + alpha = kAlpha1; + beta = kBeta1; + } else if ((maxValue >> 1) > minValue) + { + alpha = kAlpha2; + beta = kBeta2; + } else + { + alpha = kAlpha3; + beta = kBeta3; + } + tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(maxValue, alpha, 15); + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(minValue, beta, 15); + dfaNoisy[i] = (WebRtc_UWord16)tmp16no1 + (WebRtc_UWord16)tmp16no2; +#else + tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); + tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); + tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); + tmp32no1 = WebRtcSpl_Sqrt(tmp32no2); + dfaNoisy[i] = (WebRtc_UWord16)tmp32no1; +#endif + } + dfaNoisySum += (WebRtc_UWord32)dfaNoisy[i]; + } + // END: FFT of noisy near end signal + + if (nearendClean == NULL) + { + ptrDfaClean = dfaNoisy; + } else + { + // FFT of clean near end signal + for (i = 0; i < PART_LEN; i++) + { + j = WEBRTC_SPL_LSHIFT_W32(i, 1); + // Window near end + fft[j] + = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->dBufClean[i] << zerosDBufClean), kSqrtHanning[i], 14); + fft[PART_LEN2 + j] + = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->dBufClean[PART_LEN + i] << zerosDBufClean), kSqrtHanning[PART_LEN - i], 14); + // Inserting zeros in imaginary parts + fft[j + 1] = 0; + fft[PART_LEN2 + j + 1] = 0; + } + + // Fourier transformation of near end signal. + // The result is scaled with 1/PART_LEN2, that is, in Q(-6) for PART_LEN = 32 + +#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT) + outCFFT = WebRtcSpl_ComplexFFT2(fft, postFft, PART_LEN_SHIFT, 1); + + // The imaginary part has to switch sign + for(i = 1; i < PART_LEN2-1;) + { + postFft[i] = -postFft[i]; + i += 2; + postFft[i] = -postFft[i]; + i += 2; + } +#else + WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); + outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); + + // Take only the first PART_LEN2 samples + for (i = 0; i < PART_LEN2; i++) + { + postFft[i] = fft[i]; + } + // The imaginary part has to switch sign + for (i = 1; i < PART_LEN2;) + { + postFft[i] = -postFft[i]; + i += 2; + } +#endif + + // Extract imaginary and real part, calculate the magnitude for all frequency bins + dfwImag[0] = 0; + dfwImag[PART_LEN] = 0; + dfwReal[0] = postFft[0]; +#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT) + dfwReal[PART_LEN] = postFft[PART_LEN2]; +#else + dfwReal[PART_LEN] = fft[PART_LEN2]; +#endif + dfaClean[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[0]); + dfaClean[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[PART_LEN]); + + for (i = 1; i < PART_LEN; i++) + { + j = WEBRTC_SPL_LSHIFT_W32(i, 1); + dfwReal[i] = postFft[j]; + dfwImag[i] = postFft[j + 1]; + + if (dfwReal[i] == 0 || dfwImag[i] == 0) + { + dfaClean[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(dfwReal[i] + dfwImag[i]); + } else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + + tmp16no1 = WEBRTC_SPL_ABS_W16(postFft[j]); + tmp16no2 = WEBRTC_SPL_ABS_W16(postFft[j + 1]); + +#ifdef AECM_WITH_ABS_APPROX + if(tmp16no1 > tmp16no2) + { + maxValue = tmp16no1; + minValue = tmp16no2; + } else + { + maxValue = tmp16no2; + minValue = tmp16no1; + } + + // Magnitude in Q-6 + if ((maxValue >> 2) > minValue) + { + alpha = kAlpha1; + beta = kBeta1; + } else if ((maxValue >> 1) > minValue) + { + alpha = kAlpha2; + beta = kBeta2; + } else + { + alpha = kAlpha3; + beta = kBeta3; + } + tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(maxValue, alpha, 15); + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(minValue, beta, 15); + dfaClean[i] = (WebRtc_UWord16)tmp16no1 + (WebRtc_UWord16)tmp16no2; +#else + tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); + tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); + tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); + tmp32no1 = WebRtcSpl_Sqrt(tmp32no2); + dfaClean[i] = (WebRtc_UWord16)tmp32no1; +#endif + } + } + } + // END: FFT of clean near end signal + + // FFT of far end signal + for (i = 0; i < PART_LEN; i++) + { + j = WEBRTC_SPL_LSHIFT_W32(i, 1); + // Window farend + fft[j] + = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->xBuf[i] << zerosXBuf), kSqrtHanning[i], 14); + fft[PART_LEN2 + j] + = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((aecm->xBuf[PART_LEN + i] << zerosXBuf), kSqrtHanning[PART_LEN - i], 14); + // Inserting zeros in imaginary parts + fft[j + 1] = 0; + fft[PART_LEN2 + j + 1] = 0; + } + // Fourier transformation of far end signal. + // The result is scaled with 1/PART_LEN2, that is the result is in Q(-6) for PART_LEN = 32 +#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT) + outCFFT = WebRtcSpl_ComplexFFT2(fft, postFft, PART_LEN_SHIFT, 1); + + // The imaginary part has to switch sign + for(i = 1; i < PART_LEN2-1;) + { + postFft[i] = -postFft[i]; + i += 2; + postFft[i] = -postFft[i]; + i += 2; + } +#else + WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); + outCFFT = WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); + + // Take only the first PART_LEN2 samples + for (i = 0; i < PART_LEN2; i++) + { + postFft[i] = fft[i]; + } + // The imaginary part has to switch sign + for (i = 1; i < PART_LEN2;) + { + postFft[i] = -postFft[i]; + i += 2; + } +#endif + + // Extract imaginary and real part, calculate the magnitude for all frequency bins + xfwImag[0] = 0; + xfwImag[PART_LEN] = 0; + xfwReal[0] = postFft[0]; +#if (defined ARM_WINM) || (defined ARM9E_GCC) || (defined ANDROID_AECOPT) + xfwReal[PART_LEN] = postFft[PART_LEN2]; +#else + xfwReal[PART_LEN] = fft[PART_LEN2]; +#endif + xfa[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[0]); + xfa[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[PART_LEN]); + xfaSum = (WebRtc_UWord32)(xfa[0]) + (WebRtc_UWord32)(xfa[PART_LEN]); + + for (i = 1; i < PART_LEN; i++) + { + j = WEBRTC_SPL_LSHIFT_W32(i,1); + xfwReal[i] = postFft[j]; + xfwImag[i] = postFft[j + 1]; + + if (xfwReal[i] == 0 || xfwImag[i] == 0) + { + xfa[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(xfwReal[i] + xfwImag[i]); + } else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + + tmp16no1 = WEBRTC_SPL_ABS_W16(postFft[j]); + tmp16no2 = WEBRTC_SPL_ABS_W16(postFft[j + 1]); + +#ifdef AECM_WITH_ABS_APPROX + if(tmp16no1 > xfwImag[i]) + { + maxValue = tmp16no1; + minValue = tmp16no2; + } else + { + maxValue = tmp16no2; + minValue = tmp16no1; + } + // Magnitude in Q-6 + if ((maxValue >> 2) > minValue) + { + alpha = kAlpha1; + beta = kBeta1; + } else if ((maxValue >> 1) > minValue) + { + alpha = kAlpha2; + beta = kBeta2; + } else + { + alpha = kAlpha3; + beta = kBeta3; + } + tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(maxValue, alpha, 15); + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(minValue, beta, 15); + xfa[i] = (WebRtc_UWord16)tmp16no1 + (WebRtc_UWord16)tmp16no2; +#else + tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); + tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); + tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); + tmp32no1 = WebRtcSpl_Sqrt(tmp32no2); + xfa[i] = (WebRtc_UWord16)tmp32no1; +#endif + } + xfaSum += (WebRtc_UWord32)xfa[i]; + } + +#ifdef ARM_WINM_LOG_ + // measure tick end + QueryPerformanceCounter((LARGE_INTEGER*)&end); + diff__ = ((end - start) * 1000) / (freq/1000); + milliseconds = (unsigned int)(diff__ & 0xffffffff); + WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); +#endif + // END: FFT of far end signal + + // Get the delay + + // Fixed delay estimation + // input: dfaFIX, xfaFIX in Q-stages + // output: delay in Q0 + // + // comment on the fixed point accuracy of estimate_delayFIX + // -> due to rounding the fixed point variables xfa and dfa contain a lot more zeros + // than the corresponding floating point variables this results in big differences + // between the floating point and the fixed point logarithmic spectra for small values +#ifdef ARM_WINM_LOG_ + // measure tick start + QueryPerformanceCounter((LARGE_INTEGER*)&start); +#endif + + // Save far-end history and estimate delay + delay = WebRtcAecm_EstimateDelay(aecm, xfa, dfaNoisy, zerosXBuf); + + if (aecm->fixedDelay >= 0) + { + // Use fixed delay + delay = aecm->fixedDelay; + } + + aecm->currentDelay = delay; + + if ((aecm->delayOffsetFlag) & (aecm->startupState > 0)) // If delay compensation is on + { + // If the delay estimate changed from previous block, update the offset + if ((aecm->currentDelay != aecm->previousDelay) & !aecm->currentDelay + & !aecm->previousDelay) + { + aecm->delayAdjust += (aecm->currentDelay - aecm->previousDelay); + } + // Compensate with the offset estimate + aecm->currentDelay -= aecm->delayAdjust; + aecm->previousDelay = delay; + } + + diff = aecm->delHistoryPos - aecm->currentDelay; + if (diff < 0) + { + diff = diff + MAX_DELAY; + } + +#ifdef ARM_WINM_LOG_ + // measure tick end + QueryPerformanceCounter((LARGE_INTEGER*)&end); + diff__ = ((end - start) * 1000) / (freq/1000); + milliseconds = (unsigned int)(diff__ & 0xffffffff); + WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); +#endif + + // END: Get the delay + +#ifdef ARM_WINM_LOG_ + // measure tick start + QueryPerformanceCounter((LARGE_INTEGER*)&start); +#endif + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, diff, dfaNoisySum, echoEst32); + + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + aecm->lastDelayUpdateCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, which was calculated above. + WebRtcAecm_UpdateChannel(aecm, dfaNoisy, diff, mu, echoEst32); + WebRtcAecm_DelayCompensation(aecm); + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + +#ifdef ARM_WINM_LOG_ + // measure tick end + QueryPerformanceCounter((LARGE_INTEGER*)&end); + diff__ = ((end - start) * 1000) / (freq/1000); + milliseconds = (unsigned int)(diff__ & 0xffffffff); + WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); +#endif + +#ifdef ARM_WINM_LOG_ + // measure tick start + QueryPerformanceCounter((LARGE_INTEGER*)&start); +#endif + + // Calculate Wiener filter hnl[] + numPosCoef = 0; + diffMinusOne = diff - 1; + if (diff == 0) + { + diffMinusOne = MAX_DELAY; + } + for (i = 0; i < PART_LEN1; i++) + { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32no1, 50), 8); + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) + { + // Multiplication is safe + // Result in Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((WebRtc_UWord32)aecm->echoFilt[i], + (WebRtc_UWord16)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - aecm->xfaQDomainBuf[diff]); + } else + { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - aecm->xfaQDomainBuf[diff]); + if (zeros32 > tmp16no1) + { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((WebRtc_UWord32)aecm->echoFilt[i], + (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_W16(supGain, + tmp16no1)); // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + } else + { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16( + (WebRtc_UWord32)WEBRTC_SPL_RSHIFT_W32(aecm->echoFilt[i], tmp16no1), + (WebRtc_UWord16)supGain); + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld)) + & (aecm->nearFilt[i])) + { + tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16); + qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld; + } else + { + tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], aecm->dfaCleanQDomain + - aecm->dfaCleanQDomainOld); + qDomainDiff = 0; + } + tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff); + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no2 - tmp16no1, 1, 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) + { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else + { + aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff); + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) + { + hnl[i] = ONE_Q14; + } else if (aecm->nearFilt[i] == 0) + { + hnl[i] = 0; + } else + { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (WebRtc_UWord32)(aecm->nearFilt[i] >> 1); + tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, (WebRtc_UWord16)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN - max(0, 17 - zeros16 - zeros32)) + // Make sure we are in Q14 + tmp32no1 = (WebRtc_Word32)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) + { + hnl[i] = 0; + } else if (tmp32no1 < 0) + { + hnl[i] = ONE_Q14; + } else + { + // 1-echoEst/dfa +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + hnl[i] = ONE_Q14 - (WebRtc_Word16)tmp32no1; + if (hnl[i] < 0) + { + hnl[i] = 0; + } +#else + hnl[i] = ((ONE_Q14 - (WebRtc_Word16)tmp32no1) > 0) ? (ONE_Q14 - (WebRtc_Word16)tmp32no1) : 0; +#endif + } + } + if (hnl[i]) + { + numPosCoef++; + } + } + +#ifdef ARM_WINM_LOG_ + // measure tick end + QueryPerformanceCounter((LARGE_INTEGER*)&end); + diff__ = ((end - start) * 1000) / (freq/1000); + milliseconds = (unsigned int)(diff__ & 0xffffffff); + WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); +#endif + +#ifdef ARM_WINM_LOG_ + // measure tick start + QueryPerformanceCounter((LARGE_INTEGER*)&start); +#endif + + // Calculate NLP gain, result is in Q14 + for (i = 0; i < PART_LEN1; i++) + { + if (aecm->nlpFlag) + { + // Truncate values close to zero and one. + if (hnl[i] > NLP_COMP_HIGH) + { + hnl[i] = ONE_Q14; + } else if (hnl[i] < NLP_COMP_LOW) + { + hnl[i] = 0; + } + + // Remove outliers + if (numPosCoef < 3) + { + nlpGain = 0; + } else + { + nlpGain = ONE_Q14; + } + // NLP + if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14)) + { + hnl[i] = ONE_Q14; + } else + { + hnl[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], nlpGain, 14); + } + } + + // multiply with Wiener coefficients + efwReal[i] = (WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfwReal[i], hnl[i], + 14)); + efwImag[i] = (WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfwImag[i], hnl[i], + 14)); + } + + if (aecm->cngMode == AecmTrue) + { + WebRtcAecm_ComfortNoise(aecm, ptrDfaClean, efwReal, efwImag, hnl); + } + +#ifdef ARM_WINM_LOG_ + // measure tick end + QueryPerformanceCounter((LARGE_INTEGER*)&end); + diff__ = ((end - start) * 1000) / (freq/1000); + milliseconds = (unsigned int)(diff__ & 0xffffffff); + WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); +#endif + +#ifdef ARM_WINM_LOG_ + // measure tick start + QueryPerformanceCounter((LARGE_INTEGER*)&start); +#endif + + // Synthesis + for (i = 1; i < PART_LEN; i++) + { + j = WEBRTC_SPL_LSHIFT_W32(i, 1); + fft[j] = efwReal[i]; + + // mirrored data, even + fft[PART_LEN4 - j] = efwReal[i]; + fft[j + 1] = -efwImag[i]; + + //mirrored data, odd + fft[PART_LEN4 - (j - 1)] = efwImag[i]; + } + fft[0] = efwReal[0]; + fft[1] = -efwImag[0]; + + fft[PART_LEN2] = efwReal[PART_LEN]; + fft[PART_LEN2 + 1] = -efwImag[PART_LEN]; + +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + // inverse FFT, result should be scaled with outCFFT + WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); + outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); + + //take only the real values and scale with outCFFT + for (i = 0; i < PART_LEN2; i++) + { + j = WEBRTC_SPL_LSHIFT_W32(i, 1); + fft[i] = fft[j]; + } +#else + outCFFT = WebRtcSpl_ComplexIFFT2(fft, postFft, PART_LEN_SHIFT, 1); + + //take only the real values and scale with outCFFT + for(i = 0, j = 0; i < PART_LEN2;) + { + fft[i] = postFft[j]; + i += 1; + j += 2; + fft[i] = postFft[j]; + i += 1; + j += 2; + } +#endif + + for (i = 0; i < PART_LEN; i++) + { + fft[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + fft[i], + kSqrtHanning[i], + 14); + tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)fft[i], + outCFFT - aecm->dfaCleanQDomain); + fft[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1 + aecm->outBuf[i], + WEBRTC_SPL_WORD16_MIN); + output[i] = fft[i]; + + tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT( + fft[PART_LEN + i], + kSqrtHanning[PART_LEN - i], + 14); + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, + outCFFT - aecm->dfaCleanQDomain); + aecm->outBuf[i] = (WebRtc_Word16)WEBRTC_SPL_SAT( + WEBRTC_SPL_WORD16_MAX, + tmp32no1, + WEBRTC_SPL_WORD16_MIN); + } + +#ifdef ARM_WINM_LOG_ + // measure tick end + QueryPerformanceCounter((LARGE_INTEGER*)&end); + diff__ = ((end - start) * 1000) / (freq/1000); + milliseconds = (unsigned int)(diff__ & 0xffffffff); + WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); +#endif + // Copy the current block to the old position (outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(WebRtc_Word16) * PART_LEN); + memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN, sizeof(WebRtc_Word16) * PART_LEN); + if (nearendClean != NULL) + { + memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN, sizeof(WebRtc_Word16) * PART_LEN); + } +} + +// Generate comfort noise and add to output signal. +// +// \param[in] aecm Handle of the AECM instance. +// \param[in] dfa Absolute value of the nearend signal (Q[aecm->dfaQDomain]). +// \param[in,out] outReal Real part of the output signal (Q[aecm->dfaQDomain]). +// \param[in,out] outImag Imaginary part of the output signal (Q[aecm->dfaQDomain]). +// \param[in] lambda Suppression gain with which to scale the noise level (Q14). +// +static void WebRtcAecm_ComfortNoise(AecmCore_t * const aecm, const WebRtc_UWord16 * const dfa, + WebRtc_Word16 * const outReal, + WebRtc_Word16 * const outImag, + const WebRtc_Word16 * const lambda) +{ + WebRtc_Word16 i; + WebRtc_Word16 tmp16; + WebRtc_Word32 tmp32; + + WebRtc_Word16 randW16[PART_LEN]; + WebRtc_Word16 uReal[PART_LEN1]; + WebRtc_Word16 uImag[PART_LEN1]; + WebRtc_Word32 outLShift32[PART_LEN1]; + WebRtc_Word16 noiseRShift16[PART_LEN1]; + + WebRtc_Word16 shiftFromNearToNoise[PART_LEN1]; + WebRtc_Word16 minTrackShift; + WebRtc_Word32 upper32; + WebRtc_Word32 lower32; + + if (aecm->noiseEstCtr < 100) + { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 7; + } else + { + minTrackShift = 9; + } + + // Estimate noise power. + for (i = 0; i < PART_LEN1; i++) + { + shiftFromNearToNoise[i] = aecm->noiseEstQDomain[i] - aecm->dfaCleanQDomain; + + // Shift to the noise domain. + tmp32 = (WebRtc_Word32)dfa[i]; + outLShift32[i] = WEBRTC_SPL_SHIFT_W32(tmp32, shiftFromNearToNoise[i]); + + if (outLShift32[i] < aecm->noiseEst[i]) + { + // Track the minimum. + aecm->noiseEst[i] += ((outLShift32[i] - aecm->noiseEst[i]) >> minTrackShift); + } else + { + // Ramp slowly upwards until we hit the minimum again. + + // Avoid overflow. + if (aecm->noiseEst[i] < 2146435583) + { + // Store the fractional portion. + upper32 = (aecm->noiseEst[i] & 0xffff0000) >> 16; + lower32 = aecm->noiseEst[i] & 0x0000ffff; + upper32 = ((upper32 * 2049) >> 11); + lower32 = ((lower32 * 2049) >> 11); + aecm->noiseEst[i] = WEBRTC_SPL_ADD_SAT_W32(upper32 << 16, lower32); + } + } + } + + for (i = 0; i < PART_LEN1; i++) + { + tmp32 = WEBRTC_SPL_SHIFT_W32(aecm->noiseEst[i], -shiftFromNearToNoise[i]); + if (tmp32 > 32767) + { + tmp32 = 32767; + aecm->noiseEst[i] = WEBRTC_SPL_SHIFT_W32(tmp32, shiftFromNearToNoise[i]); + } + noiseRShift16[i] = (WebRtc_Word16)tmp32; + + tmp16 = ONE_Q14 - lambda[i]; + noiseRShift16[i] + = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, noiseRShift16[i], 14); + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + + // Generate noise according to estimated energy. + uReal[0] = 0; // Reject LF noise. + uImag[0] = 0; + for (i = 1; i < PART_LEN1; i++) + { + // Get a random index for the cos and sin tables over [0 359]. + tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(359, randW16[i - 1], 15); + + // Tables are in Q13. + uReal[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(noiseRShift16[i], + WebRtcSpl_kCosTable[tmp16], 13); + uImag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(-noiseRShift16[i], + WebRtcSpl_kSinTable[tmp16], 13); + } + uImag[PART_LEN] = 0; + +#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) + for (i = 0; i < PART_LEN1; i++) + { + outReal[i] = WEBRTC_SPL_ADD_SAT_W16(outReal[i], uReal[i]); + outImag[i] = WEBRTC_SPL_ADD_SAT_W16(outImag[i], uImag[i]); + } +#else + for (i = 0; i < PART_LEN1 -1; ) + { + outReal[i] = WEBRTC_SPL_ADD_SAT_W16(outReal[i], uReal[i]); + outImag[i] = WEBRTC_SPL_ADD_SAT_W16(outImag[i], uImag[i]); + i++; + + outReal[i] = WEBRTC_SPL_ADD_SAT_W16(outReal[i], uReal[i]); + outImag[i] = WEBRTC_SPL_ADD_SAT_W16(outImag[i], uImag[i]); + i++; + } + outReal[i] = WEBRTC_SPL_ADD_SAT_W16(outReal[i], uReal[i]); + outImag[i] = WEBRTC_SPL_ADD_SAT_W16(outImag[i], uImag[i]); +#endif +} + +void WebRtcAecm_BufferFarFrame(AecmCore_t * const aecm, const WebRtc_Word16 * const farend, + const int farLen) +{ + int writeLen = farLen, writePos = 0; + + // Check if the write position must be wrapped + while (aecm->farBufWritePos + writeLen > FAR_BUF_LEN) + { + // Write to remaining buffer space before wrapping + writeLen = FAR_BUF_LEN - aecm->farBufWritePos; + memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, + sizeof(WebRtc_Word16) * writeLen); + aecm->farBufWritePos = 0; + writePos = writeLen; + writeLen = farLen - writeLen; + } + + memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, + sizeof(WebRtc_Word16) * writeLen); + aecm->farBufWritePos += writeLen; +} + +void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm, WebRtc_Word16 * const farend, + const int farLen, const int knownDelay) +{ + int readLen = farLen; + int readPos = 0; + int delayChange = knownDelay - aecm->lastKnownDelay; + + aecm->farBufReadPos -= delayChange; + + // Check if delay forces a read position wrap + while (aecm->farBufReadPos < 0) + { + aecm->farBufReadPos += FAR_BUF_LEN; + } + while (aecm->farBufReadPos > FAR_BUF_LEN - 1) + { + aecm->farBufReadPos -= FAR_BUF_LEN; + } + + aecm->lastKnownDelay = knownDelay; + + // Check if read position must be wrapped + while (aecm->farBufReadPos + readLen > FAR_BUF_LEN) + { + + // Read from remaining buffer space before wrapping + readLen = FAR_BUF_LEN - aecm->farBufReadPos; + memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, + sizeof(WebRtc_Word16) * readLen); + aecm->farBufReadPos = 0; + readPos = readLen; + readLen = farLen - readLen; + } + memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, + sizeof(WebRtc_Word16) * readLen); + aecm->farBufReadPos += readLen; +} diff --git a/src/modules/audio_processing/aecm/main/source/aecm_core.h b/src/modules/audio_processing/aecm/main/source/aecm_core.h new file mode 100644 index 0000000000..5defbe46c7 --- /dev/null +++ b/src/modules/audio_processing/aecm/main/source/aecm_core.h @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs echo control (suppression) with fft routines in fixed-point + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_CORE_H_ + +#define AECM_DYNAMIC_Q // turn on/off dynamic Q-domain +//#define AECM_WITH_ABS_APPROX +//#define AECM_SHORT // for 32 sample partition length (otherwise 64) + +// TODO(bjornv): These defines will be removed in final version. +//#define STORE_CHANNEL_DATA +//#define VAD_DATA + +#include "typedefs.h" +#include "signal_processing_library.h" +// TODO(bjornv): Will be removed in final version. +//#include <stdio.h> + +// Algorithm parameters + +#define FRAME_LEN 80 // Total frame length, 10 ms +#ifdef AECM_SHORT + +#define PART_LEN 32 // Length of partition +#define PART_LEN_SHIFT 6 // Length of (PART_LEN * 2) in base 2 + +#else + +#define PART_LEN 64 // Length of partition +#define PART_LEN_SHIFT 7 // Length of (PART_LEN * 2) in base 2 + +#endif + +#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients +#define PART_LEN2 (PART_LEN << 1) // Length of partition * 2 +#define PART_LEN4 (PART_LEN << 2) // Length of partition * 4 +#define FAR_BUF_LEN PART_LEN4 // Length of buffers +#define MAX_DELAY 100 + +// Counter parameters +#ifdef AECM_SHORT + +#define CONV_LEN 1024 // Convergence length used at startup +#else + +#define CONV_LEN 512 // Convergence length used at startup +#endif + +#define CONV_LEN2 (CONV_LEN << 1) // Convergence length * 2 used at startup +// Energy parameters +#define MAX_BUF_LEN 64 // History length of energy signals + +#define FAR_ENERGY_MIN 1025 // Lowest Far energy level: At least 2 in energy +#define FAR_ENERGY_DIFF 929 // Allowed difference between max and min + +#define ENERGY_DEV_OFFSET 0 // The energy error offset in Q8 +#define ENERGY_DEV_TOL 400 // The energy estimation tolerance in Q8 +#define FAR_ENERGY_VAD_REGION 230 // Far VAD tolerance region +// Stepsize parameters +#define MU_MIN 10 // Min stepsize 2^-MU_MIN (far end energy dependent) +#define MU_MAX 1 // Max stepsize 2^-MU_MAX (far end energy dependent) +#define MU_DIFF 9 // MU_MIN - MU_MAX +// Channel parameters +#define MIN_MSE_COUNT 20 // Min number of consecutive blocks with enough far end + // energy to compare channel estimates +#define MIN_MSE_DIFF 29 // The ratio between adapted and stored channel to + // accept a new storage (0.8 in Q-MSE_RESOLUTION) +#define MSE_RESOLUTION 5 // MSE parameter resolution +#define RESOLUTION_CHANNEL16 12 // W16 Channel in Q-RESOLUTION_CHANNEL16 +#define RESOLUTION_CHANNEL32 28 // W32 Channel in Q-RESOLUTION_CHANNEL +#define CHANNEL_VAD 16 // Minimum energy in frequency band to update channel +// Suppression gain parameters: SUPGAIN_ parameters in Q-(RESOLUTION_SUPGAIN) +#define RESOLUTION_SUPGAIN 8 // Channel in Q-(RESOLUTION_SUPGAIN) +#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) // Default suppression gain +#define SUPGAIN_ERROR_PARAM_A 3072 // Estimation error parameter (Maximum gain) (8 in Q8) +#define SUPGAIN_ERROR_PARAM_B 1536 // Estimation error parameter (Gain before going down) +#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT // Estimation error parameter + // (Should be the same as Default) (1 in Q8) +#define SUPGAIN_EPC_DT 200 // = SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL +// Defines for "check delay estimation" +#define CORR_WIDTH 31 // Number of samples to correlate over. +#define CORR_MAX 16 // Maximum correlation offset +#define CORR_MAX_BUF 63 +#define CORR_DEV 4 +#define CORR_MAX_LEVEL 20 +#define CORR_MAX_LOW 4 +#define CORR_BUF_LEN (CORR_MAX << 1) + 1 +// Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN + +#define ONE_Q14 (1 << 14) + +// NLP defines +#define NLP_COMP_LOW 3277 // 0.2 in Q14 +#define NLP_COMP_HIGH ONE_Q14 // 1 in Q14 + +typedef struct +{ + int farBufWritePos; + int farBufReadPos; + int knownDelay; + int lastKnownDelay; + int firstVAD; // Parameter to control poorly initialized channels + + void *farFrameBuf; + void *nearNoisyFrameBuf; + void *nearCleanFrameBuf; + void *outFrameBuf; + + WebRtc_Word16 xBuf[PART_LEN2]; // farend + WebRtc_Word16 dBufClean[PART_LEN2]; // nearend + WebRtc_Word16 dBufNoisy[PART_LEN2]; // nearend + WebRtc_Word16 outBuf[PART_LEN]; + + WebRtc_Word16 farBuf[FAR_BUF_LEN]; + + WebRtc_Word16 mult; + WebRtc_UWord32 seed; + + // Delay estimation variables + WebRtc_UWord16 medianYlogspec[PART_LEN1]; + WebRtc_UWord16 medianXlogspec[PART_LEN1]; + WebRtc_UWord16 medianBCount[MAX_DELAY]; + WebRtc_UWord16 xfaHistory[PART_LEN1][MAX_DELAY]; + WebRtc_Word16 delHistoryPos; + WebRtc_UWord32 bxHistory[MAX_DELAY]; + WebRtc_UWord16 currentDelay; + WebRtc_UWord16 previousDelay; + WebRtc_Word16 delayAdjust; + + WebRtc_Word16 nlpFlag; + WebRtc_Word16 fixedDelay; + + WebRtc_UWord32 totCount; + + WebRtc_Word16 xfaQDomainBuf[MAX_DELAY]; + WebRtc_Word16 dfaCleanQDomain; + WebRtc_Word16 dfaCleanQDomainOld; + WebRtc_Word16 dfaNoisyQDomain; + WebRtc_Word16 dfaNoisyQDomainOld; + + WebRtc_Word16 nearLogEnergy[MAX_BUF_LEN]; + WebRtc_Word16 farLogEnergy[MAX_BUF_LEN]; + WebRtc_Word16 echoAdaptLogEnergy[MAX_BUF_LEN]; + WebRtc_Word16 echoStoredLogEnergy[MAX_BUF_LEN]; + + WebRtc_Word16 channelAdapt16[PART_LEN1]; + WebRtc_Word32 channelAdapt32[PART_LEN1]; + WebRtc_Word16 channelStored[PART_LEN1]; + WebRtc_Word32 echoFilt[PART_LEN1]; + WebRtc_Word16 nearFilt[PART_LEN1]; + WebRtc_Word32 noiseEst[PART_LEN1]; + WebRtc_Word16 noiseEstQDomain[PART_LEN1]; + WebRtc_Word16 noiseEstCtr; + WebRtc_Word16 cngMode; + + WebRtc_Word32 mseAdaptOld; + WebRtc_Word32 mseStoredOld; + WebRtc_Word32 mseThreshold; + + WebRtc_Word16 farEnergyMin; + WebRtc_Word16 farEnergyMax; + WebRtc_Word16 farEnergyMaxMin; + WebRtc_Word16 farEnergyVAD; + WebRtc_Word16 farEnergyMSE; + WebRtc_Word16 currentVADValue; + WebRtc_Word16 vadUpdateCount; + + WebRtc_Word16 delayHistogram[MAX_DELAY]; + WebRtc_Word16 delayVadCount; + WebRtc_Word16 maxDelayHistIdx; + WebRtc_Word16 lastMinPos; + + WebRtc_Word16 startupState; + WebRtc_Word16 mseChannelCount; + WebRtc_Word16 delayCount; + WebRtc_Word16 newDelayCorrData; + WebRtc_Word16 lastDelayUpdateCount; + WebRtc_Word16 delayCorrelation[CORR_BUF_LEN]; + WebRtc_Word16 supGain; + WebRtc_Word16 supGainOld; + WebRtc_Word16 delayOffsetFlag; + + WebRtc_Word16 supGainErrParamA; + WebRtc_Word16 supGainErrParamD; + WebRtc_Word16 supGainErrParamDiffAB; + WebRtc_Word16 supGainErrParamDiffBD; + + // TODO(bjornv): Will be removed after final version has been committed. +#ifdef VAD_DATA + FILE *vad_file; + FILE *delay_file; + FILE *far_file; + FILE *far_cur_file; + FILE *far_min_file; + FILE *far_max_file; + FILE *far_vad_file; +#endif + + // TODO(bjornv): Will be removed after final version has been committed. +#ifdef STORE_CHANNEL_DATA + FILE *channel_file; + FILE *channel_file_init; +#endif + +#ifdef AEC_DEBUG + FILE *farFile; + FILE *nearFile; + FILE *outFile; +#endif +} AecmCore_t; + +/////////////////////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CreateCore(...) +// +// Allocates the memory needed by the AECM. The memory needs to be +// initialized separately using the WebRtcAecm_InitCore() function. +// +// Input: +// - aecm : Instance that should be created +// +// Output: +// - aecm : Created instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_CreateCore(AecmCore_t **aecm); + +/////////////////////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_InitCore(...) +// +// This function initializes the AECM instant created with WebRtcAecm_CreateCore(...) +// Input: +// - aecm : Pointer to the AECM instance +// - samplingFreq : Sampling Frequency +// +// Output: +// - aecm : Initialized instance +// +// Return value : 0 - Ok +// -1 - Error +// +int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq); + +/////////////////////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_FreeCore(...) +// +// This function releases the memory allocated by WebRtcAecm_CreateCore() +// Input: +// - aecm : Pointer to the AECM instance +// +// Return value : 0 - Ok +// -1 - Error +// 11001-11016: Error +// +int WebRtcAecm_FreeCore(AecmCore_t *aecm); + +int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag, int delayOffsetFlag); + +/////////////////////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_ProcessFrame(...) +// +// This function processes frames and sends blocks to WebRtcAecm_ProcessBlock(...) +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of echo signal +// - nearendNoisy : In buffer containing one frame of nearend+echo signal without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal with NS +// +// Output: +// - out : Out buffer, one frame of nearend signal : +// +// +void WebRtcAecm_ProcessFrame(AecmCore_t * const aecm, const WebRtc_Word16 * const farend, + const WebRtc_Word16 * const nearendNoisy, + const WebRtc_Word16 * const nearendClean, + WebRtc_Word16 * const out); + +/////////////////////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_ProcessBlock(...) +// +// This function is called for every block within one frame +// This function is called by WebRtcAecm_ProcessFrame(...) +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one block of echo signal +// - nearendNoisy : In buffer containing one frame of nearend+echo signal without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal with NS +// +// Output: +// - out : Out buffer, one block of nearend signal : +// +// +void WebRtcAecm_ProcessBlock(AecmCore_t * const aecm, const WebRtc_Word16 * const farend, + const WebRtc_Word16 * const nearendNoisy, + const WebRtc_Word16 * const noisyClean, + WebRtc_Word16 * const out); + +/////////////////////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_BufferFarFrame() +// +// Inserts a frame of data into farend buffer. +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of farend signal +// - farLen : Length of frame +// +void WebRtcAecm_BufferFarFrame(AecmCore_t * const aecm, const WebRtc_Word16 * const farend, + const int farLen); + +/////////////////////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_FetchFarFrame() +// +// Read the farend buffer to account for known delay +// +// Inputs: +// - aecm : Pointer to the AECM instance +// - farend : In buffer containing one frame of farend signal +// - farLen : Length of frame +// - knownDelay : known delay +// +void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm, WebRtc_Word16 * const farend, + const int farLen, const int knownDelay); + +#endif diff --git a/src/modules/audio_processing/aecm/main/source/echo_control_mobile.c b/src/modules/audio_processing/aecm/main/source/echo_control_mobile.c new file mode 100644 index 0000000000..f9d84f0c4b --- /dev/null +++ b/src/modules/audio_processing/aecm/main/source/echo_control_mobile.c @@ -0,0 +1,733 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +//#include <string.h> + +#include "echo_control_mobile.h" +#include "aecm_core.h" +#include "ring_buffer.h" +#ifdef AEC_DEBUG +#include <stdio.h> +#endif +#ifdef MAC_IPHONE_PRINT +#include <time.h> +#include <stdio.h> +#elif defined ARM_WINM_LOG +#include "windows.h" +extern HANDLE logFile; +#endif + +#define BUF_SIZE_FRAMES 50 // buffer size (frames) +// Maximum length of resampled signal. Must be an integer multiple of frames +// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN +// The factor of 2 handles wb, and the + 1 is as a safety margin +#define MAX_RESAMP_LEN (5 * FRAME_LEN) + +static const int kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples) +static const int kSampMsNb = 8; // samples per ms in nb +// Target suppression levels for nlp modes +// log{0.001, 0.00001, 0.00000001} +static const int kInitCheck = 42; + +typedef struct +{ + int sampFreq; + int scSampFreq; + short bufSizeStart; + int knownDelay; + + // Stores the last frame added to the farend buffer + short farendOld[2][FRAME_LEN]; + short initFlag; // indicates if AEC has been initialized + + // Variables used for averaging far end buffer size + short counter; + short sum; + short firstVal; + short checkBufSizeCtr; + + // Variables used for delay shifts + short msInSndCardBuf; + short filtDelay; + int timeForDelayChange; + int ECstartup; + int checkBuffSize; + int delayChange; + short lastDelayDiff; + + WebRtc_Word16 echoMode; + +#ifdef AEC_DEBUG + FILE *bufFile; + FILE *delayFile; + FILE *preCompFile; + FILE *postCompFile; +#endif // AEC_DEBUG + // Structures + void *farendBuf; + + int lastError; + + AecmCore_t *aecmCore; +} aecmob_t; + +// Estimates delay to set the position of the farend buffer read pointer +// (controlled by knownDelay) +static int WebRtcAecm_EstBufDelay(aecmob_t *aecmInst, short msInSndCardBuf); + +// Stuffs the farend buffer if the estimated delay is too large +static int WebRtcAecm_DelayComp(aecmob_t *aecmInst); + +WebRtc_Word32 WebRtcAecm_Create(void **aecmInst) +{ + aecmob_t *aecm; + if (aecmInst == NULL) + { + return -1; + } + + aecm = malloc(sizeof(aecmob_t)); + *aecmInst = aecm; + if (aecm == NULL) + { + return -1; + } + + if (WebRtcAecm_CreateCore(&aecm->aecmCore) == -1) + { + WebRtcAecm_Free(aecm); + aecm = NULL; + return -1; + } + + if (WebRtcApm_CreateBuffer(&aecm->farendBuf, kBufSizeSamp) == -1) + { + WebRtcAecm_Free(aecm); + aecm = NULL; + return -1; + } + + aecm->initFlag = 0; + aecm->lastError = 0; + +#ifdef AEC_DEBUG + aecm->aecmCore->farFile = fopen("aecFar.pcm","wb"); + aecm->aecmCore->nearFile = fopen("aecNear.pcm","wb"); + aecm->aecmCore->outFile = fopen("aecOut.pcm","wb"); + //aecm->aecmCore->outLpFile = fopen("aecOutLp.pcm","wb"); + + aecm->bufFile = fopen("aecBuf.dat", "wb"); + aecm->delayFile = fopen("aecDelay.dat", "wb"); + aecm->preCompFile = fopen("preComp.pcm", "wb"); + aecm->postCompFile = fopen("postComp.pcm", "wb"); +#endif // AEC_DEBUG + return 0; +} + +WebRtc_Word32 WebRtcAecm_Free(void *aecmInst) +{ + aecmob_t *aecm = aecmInst; + + if (aecm == NULL) + { + return -1; + } + +#ifdef AEC_DEBUG + fclose(aecm->aecmCore->farFile); + fclose(aecm->aecmCore->nearFile); + fclose(aecm->aecmCore->outFile); + //fclose(aecm->aecmCore->outLpFile); + + fclose(aecm->bufFile); + fclose(aecm->delayFile); + fclose(aecm->preCompFile); + fclose(aecm->postCompFile); +#endif // AEC_DEBUG + WebRtcAecm_FreeCore(aecm->aecmCore); + WebRtcApm_FreeBuffer(aecm->farendBuf); + free(aecm); + + return 0; +} + +WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq, WebRtc_Word32 scSampFreq) +{ + aecmob_t *aecm = aecmInst; + AecmConfig aecConfig; + + if (aecm == NULL) + { + return -1; + } + + if (sampFreq != 8000 && sampFreq != 16000) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + aecm->sampFreq = sampFreq; + + if (scSampFreq < 1 || scSampFreq > 96000) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + aecm->scSampFreq = scSampFreq; + + // Initialize AECM core + if (WebRtcAecm_InitCore(aecm->aecmCore, aecm->sampFreq) == -1) + { + aecm->lastError = AECM_UNSPECIFIED_ERROR; + return -1; + } + + // Initialize farend buffer + if (WebRtcApm_InitBuffer(aecm->farendBuf) == -1) + { + aecm->lastError = AECM_UNSPECIFIED_ERROR; + return -1; + } + + aecm->initFlag = kInitCheck; // indicates that initialization has been done + + aecm->delayChange = 1; + + aecm->sum = 0; + aecm->counter = 0; + aecm->checkBuffSize = 1; + aecm->firstVal = 0; + + aecm->ECstartup = 1; + aecm->bufSizeStart = 0; + aecm->checkBufSizeCtr = 0; + aecm->filtDelay = 0; + aecm->timeForDelayChange = 0; + aecm->knownDelay = 0; + aecm->lastDelayDiff = 0; + + memset(&aecm->farendOld[0][0], 0, 160); + + // Default settings. + aecConfig.cngMode = AecmTrue; + aecConfig.echoMode = 3; + + if (WebRtcAecm_set_config(aecm, aecConfig) == -1) + { + aecm->lastError = AECM_UNSPECIFIED_ERROR; + return -1; + } + + return 0; +} + +WebRtc_Word32 WebRtcAecm_BufferFarend(void *aecmInst, const WebRtc_Word16 *farend, + WebRtc_Word16 nrOfSamples) +{ + aecmob_t *aecm = aecmInst; + WebRtc_Word32 retVal = 0; + + if (aecm == NULL) + { + return -1; + } + + if (farend == NULL) + { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; + } + + if (aecm->initFlag != kInitCheck) + { + aecm->lastError = AECM_UNINITIALIZED_ERROR; + return -1; + } + + if (nrOfSamples != 80 && nrOfSamples != 160) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + + // TODO: Is this really a good idea? + if (!aecm->ECstartup) + { + WebRtcAecm_DelayComp(aecm); + } + + WebRtcApm_WriteBuffer(aecm->farendBuf, farend, nrOfSamples); + + return retVal; +} + +WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoisy, + const WebRtc_Word16 *nearendClean, WebRtc_Word16 *out, + WebRtc_Word16 nrOfSamples, WebRtc_Word16 msInSndCardBuf) +{ + aecmob_t *aecm = aecmInst; + WebRtc_Word32 retVal = 0; + short i; + short farend[FRAME_LEN]; + short nmbrOfFilledBuffers; + short nBlocks10ms; + short nFrames; +#ifdef AEC_DEBUG + short msInAECBuf; +#endif + +#ifdef ARM_WINM_LOG + __int64 freq, start, end, diff; + unsigned int milliseconds; + DWORD temp; +#elif defined MAC_IPHONE_PRINT + // double endtime = 0, starttime = 0; + struct timeval starttime; + struct timeval endtime; + static long int timeused = 0; + static int timecount = 0; +#endif + + if (aecm == NULL) + { + return -1; + } + + if (nearendNoisy == NULL) + { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; + } + + if (out == NULL) + { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; + } + + if (aecm->initFlag != kInitCheck) + { + aecm->lastError = AECM_UNINITIALIZED_ERROR; + return -1; + } + + if (nrOfSamples != 80 && nrOfSamples != 160) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + + if (msInSndCardBuf < 0) + { + msInSndCardBuf = 0; + aecm->lastError = AECM_BAD_PARAMETER_WARNING; + retVal = -1; + } else if (msInSndCardBuf > 500) + { + msInSndCardBuf = 500; + aecm->lastError = AECM_BAD_PARAMETER_WARNING; + retVal = -1; + } + msInSndCardBuf += 10; + aecm->msInSndCardBuf = msInSndCardBuf; + + nFrames = nrOfSamples / FRAME_LEN; + nBlocks10ms = nFrames / aecm->aecmCore->mult; + + if (aecm->ECstartup) + { + if (nearendClean == NULL) + { + memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples); + } else + { + memcpy(out, nearendClean, sizeof(short) * nrOfSamples); + } + + nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecm->farendBuf) / FRAME_LEN; + // The AECM is in the start up mode + // AECM is disabled until the soundcard buffer and farend buffers are OK + + // Mechanism to ensure that the soundcard buffer is reasonably stable. + if (aecm->checkBuffSize) + { + aecm->checkBufSizeCtr++; + // Before we fill up the far end buffer we require the amount of data on the + // sound card to be stable (+/-8 ms) compared to the first value. This + // comparison is made during the following 4 consecutive frames. If it seems + // to be stable then we start to fill up the far end buffer. + + if (aecm->counter == 0) + { + aecm->firstVal = aecm->msInSndCardBuf; + aecm->sum = 0; + } + + if (abs(aecm->firstVal - aecm->msInSndCardBuf) + < WEBRTC_SPL_MAX(0.2 * aecm->msInSndCardBuf, kSampMsNb)) + { + aecm->sum += aecm->msInSndCardBuf; + aecm->counter++; + } else + { + aecm->counter = 0; + } + + if (aecm->counter * nBlocks10ms >= 6) + { + // The farend buffer size is determined in blocks of 80 samples + // Use 75% of the average value of the soundcard buffer + aecm->bufSizeStart + = WEBRTC_SPL_MIN((3 * aecm->sum + * aecm->aecmCore->mult) / (aecm->counter * 40), BUF_SIZE_FRAMES); + // buffersize has now been determined + aecm->checkBuffSize = 0; + } + + if (aecm->checkBufSizeCtr * nBlocks10ms > 50) + { + // for really bad sound cards, don't disable echocanceller for more than 0.5 sec + aecm->bufSizeStart = WEBRTC_SPL_MIN((3 * aecm->msInSndCardBuf + * aecm->aecmCore->mult) / 40, BUF_SIZE_FRAMES); + aecm->checkBuffSize = 0; + } + } + + // if checkBuffSize changed in the if-statement above + if (!aecm->checkBuffSize) + { + // soundcard buffer is now reasonably stable + // When the far end buffer is filled with approximately the same amount of + // data as the amount on the sound card we end the start up phase and start + // to cancel echoes. + + if (nmbrOfFilledBuffers == aecm->bufSizeStart) + { + aecm->ECstartup = 0; // Enable the AECM + } else if (nmbrOfFilledBuffers > aecm->bufSizeStart) + { + WebRtcApm_FlushBuffer( + aecm->farendBuf, + WebRtcApm_get_buffer_size(aecm->farendBuf) + - aecm->bufSizeStart * FRAME_LEN); + aecm->ECstartup = 0; + } + } + + } else + { + // AECM is enabled + + // Note only 1 block supported for nb and 2 blocks for wb + for (i = 0; i < nFrames; i++) + { + nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecm->farendBuf) / FRAME_LEN; + + // Check that there is data in the far end buffer + if (nmbrOfFilledBuffers > 0) + { + // Get the next 80 samples from the farend buffer + WebRtcApm_ReadBuffer(aecm->farendBuf, farend, FRAME_LEN); + + // Always store the last frame for use when we run out of data + memcpy(&(aecm->farendOld[i][0]), farend, FRAME_LEN * sizeof(short)); + } else + { + // We have no data so we use the last played frame + memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short)); + } + + // Call buffer delay estimator when all data is extracted, + // i,e. i = 0 for NB and i = 1 for WB + if ((i == 0 && aecm->sampFreq == 8000) || (i == 1 && aecm->sampFreq == 16000)) + { + WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf); + } + +#ifdef ARM_WINM_LOG + // measure tick start + QueryPerformanceFrequency((LARGE_INTEGER*)&freq); + QueryPerformanceCounter((LARGE_INTEGER*)&start); +#elif defined MAC_IPHONE_PRINT + // starttime = clock()/(double)CLOCKS_PER_SEC; + gettimeofday(&starttime, NULL); +#endif + // Call the AECM + /*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i], + &out[FRAME_LEN * i], aecm->knownDelay);*/ + if (nearendClean == NULL) + { + WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearendNoisy[FRAME_LEN * i], + NULL, &out[FRAME_LEN * i]); + } else + { + WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearendNoisy[FRAME_LEN * i], + &nearendClean[FRAME_LEN * i], &out[FRAME_LEN * i]); + } + +#ifdef ARM_WINM_LOG + + // measure tick end + QueryPerformanceCounter((LARGE_INTEGER*)&end); + + if(end > start) + { + diff = ((end - start) * 1000) / (freq/1000); + milliseconds = (unsigned int)(diff & 0xffffffff); + WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); + } +#elif defined MAC_IPHONE_PRINT + // endtime = clock()/(double)CLOCKS_PER_SEC; + // printf("%f\n", endtime - starttime); + + gettimeofday(&endtime, NULL); + + if( endtime.tv_usec > starttime.tv_usec) + { + timeused += endtime.tv_usec - starttime.tv_usec; + } else + { + timeused += endtime.tv_usec + 1000000 - starttime.tv_usec; + } + + if(++timecount == 1000) + { + timecount = 0; + printf("AEC: %ld\n", timeused); + timeused = 0; + } +#endif + + } + } + +#ifdef AEC_DEBUG + msInAECBuf = WebRtcApm_get_buffer_size(aecm->farendBuf) / (kSampMsNb*aecm->aecmCore->mult); + fwrite(&msInAECBuf, 2, 1, aecm->bufFile); + fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile); +#endif + + return retVal; +} + +WebRtc_Word32 WebRtcAecm_set_config(void *aecmInst, AecmConfig config) +{ + aecmob_t *aecm = aecmInst; + + if (aecm == NULL) + { + return -1; + } + + if (aecm->initFlag != kInitCheck) + { + aecm->lastError = AECM_UNINITIALIZED_ERROR; + return -1; + } + + if (config.cngMode != AecmFalse && config.cngMode != AecmTrue) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + aecm->aecmCore->cngMode = config.cngMode; + + if (config.echoMode < 0 || config.echoMode > 4) + { + aecm->lastError = AECM_BAD_PARAMETER_ERROR; + return -1; + } + aecm->echoMode = config.echoMode; + + if (aecm->echoMode == 0) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 3; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 3; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 3; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 3; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 3) + - (SUPGAIN_ERROR_PARAM_B >> 3); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 3) + - (SUPGAIN_ERROR_PARAM_D >> 3); + } else if (aecm->echoMode == 1) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 2; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 2; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 2; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 2; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 2) + - (SUPGAIN_ERROR_PARAM_B >> 2); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 2) + - (SUPGAIN_ERROR_PARAM_D >> 2); + } else if (aecm->echoMode == 2) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT >> 1; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT >> 1; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A >> 1; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D >> 1; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A >> 1) + - (SUPGAIN_ERROR_PARAM_B >> 1); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B >> 1) + - (SUPGAIN_ERROR_PARAM_D >> 1); + } else if (aecm->echoMode == 3) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D; + aecm->aecmCore->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; + aecm->aecmCore->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; + } else if (aecm->echoMode == 4) + { + aecm->aecmCore->supGain = SUPGAIN_DEFAULT << 1; + aecm->aecmCore->supGainOld = SUPGAIN_DEFAULT << 1; + aecm->aecmCore->supGainErrParamA = SUPGAIN_ERROR_PARAM_A << 1; + aecm->aecmCore->supGainErrParamD = SUPGAIN_ERROR_PARAM_D << 1; + aecm->aecmCore->supGainErrParamDiffAB = (SUPGAIN_ERROR_PARAM_A << 1) + - (SUPGAIN_ERROR_PARAM_B << 1); + aecm->aecmCore->supGainErrParamDiffBD = (SUPGAIN_ERROR_PARAM_B << 1) + - (SUPGAIN_ERROR_PARAM_D << 1); + } + + return 0; +} + +WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst, AecmConfig *config) +{ + aecmob_t *aecm = aecmInst; + + if (aecm == NULL) + { + return -1; + } + + if (config == NULL) + { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; + } + + if (aecm->initFlag != kInitCheck) + { + aecm->lastError = AECM_UNINITIALIZED_ERROR; + return -1; + } + + config->cngMode = aecm->aecmCore->cngMode; + config->echoMode = aecm->echoMode; + + return 0; +} + +WebRtc_Word32 WebRtcAecm_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len) +{ + const char version[] = "AECM 1.2.0"; + const short versionLen = (short)strlen(version) + 1; // +1 for null-termination + + if (versionStr == NULL) + { + return -1; + } + + if (versionLen > len) + { + return -1; + } + + strncpy(versionStr, version, versionLen); + return 0; +} + +WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst) +{ + aecmob_t *aecm = aecmInst; + + if (aecm == NULL) + { + return -1; + } + + return aecm->lastError; +} + +static int WebRtcAecm_EstBufDelay(aecmob_t *aecm, short msInSndCardBuf) +{ + short delayNew, nSampFar, nSampSndCard; + short diff; + + nSampFar = WebRtcApm_get_buffer_size(aecm->farendBuf); + nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; + + delayNew = nSampSndCard - nSampFar; + + if (delayNew < FRAME_LEN) + { + WebRtcApm_FlushBuffer(aecm->farendBuf, FRAME_LEN); + delayNew += FRAME_LEN; + } + + aecm->filtDelay = WEBRTC_SPL_MAX(0, (8 * aecm->filtDelay + 2 * delayNew) / 10); + + diff = aecm->filtDelay - aecm->knownDelay; + if (diff > 224) + { + if (aecm->lastDelayDiff < 96) + { + aecm->timeForDelayChange = 0; + } else + { + aecm->timeForDelayChange++; + } + } else if (diff < 96 && aecm->knownDelay > 0) + { + if (aecm->lastDelayDiff > 224) + { + aecm->timeForDelayChange = 0; + } else + { + aecm->timeForDelayChange++; + } + } else + { + aecm->timeForDelayChange = 0; + } + aecm->lastDelayDiff = diff; + + if (aecm->timeForDelayChange > 25) + { + aecm->knownDelay = WEBRTC_SPL_MAX((int)aecm->filtDelay - 160, 0); + } + return 0; +} + +static int WebRtcAecm_DelayComp(aecmob_t *aecm) +{ + int nSampFar, nSampSndCard, delayNew, nSampAdd; + const int maxStuffSamp = 10 * FRAME_LEN; + + nSampFar = WebRtcApm_get_buffer_size(aecm->farendBuf); + nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; + delayNew = nSampSndCard - nSampFar; + + if (delayNew > FAR_BUF_LEN - FRAME_LEN * aecm->aecmCore->mult) + { + // The difference of the buffer sizes is larger than the maximum + // allowed known delay. Compensate by stuffing the buffer. + nSampAdd = (int)(WEBRTC_SPL_MAX(((nSampSndCard >> 1) - nSampFar), + FRAME_LEN)); + nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp); + + WebRtcApm_StuffBuffer(aecm->farendBuf, nSampAdd); + aecm->delayChange = 1; // the delay needs to be updated + } + + return 0; +} diff --git a/src/modules/audio_processing/agc/main/interface/gain_control.h b/src/modules/audio_processing/agc/main/interface/gain_control.h new file mode 100644 index 0000000000..2893331faf --- /dev/null +++ b/src/modules/audio_processing/agc/main/interface/gain_control.h @@ -0,0 +1,273 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_ + +#include "typedefs.h" + +// Errors +#define AGC_UNSPECIFIED_ERROR 18000 +#define AGC_UNSUPPORTED_FUNCTION_ERROR 18001 +#define AGC_UNINITIALIZED_ERROR 18002 +#define AGC_NULL_POINTER_ERROR 18003 +#define AGC_BAD_PARAMETER_ERROR 18004 + +// Warnings +#define AGC_BAD_PARAMETER_WARNING 18050 + +enum +{ + kAgcModeUnchanged, + kAgcModeAdaptiveAnalog, + kAgcModeAdaptiveDigital, + kAgcModeFixedDigital +}; + +enum +{ + kAgcFalse = 0, + kAgcTrue +}; + +typedef struct +{ + WebRtc_Word16 targetLevelDbfs; // default 3 (-3 dBOv) + WebRtc_Word16 compressionGaindB; // default 9 dB + WebRtc_UWord8 limiterEnable; // default kAgcTrue (on) +} WebRtcAgc_config_t; + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* + * This function processes a 10/20ms frame of far-end speech to determine + * if there is active speech. Far-end speech length can be either 10ms or + * 20ms. The length of the input speech vector must be given in samples + * (80/160 when FS=8000, and 160/320 when FS=16000 or FS=32000). + * + * Input: + * - agcInst : AGC instance. + * - inFar : Far-end input speech vector (10 or 20ms) + * - samples : Number of samples in input vector + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_AddFarend(void* agcInst, + const WebRtc_Word16* inFar, + WebRtc_Word16 samples); + +/* + * This function processes a 10/20ms frame of microphone speech to determine + * if there is active speech. Microphone speech length can be either 10ms or + * 20ms. The length of the input speech vector must be given in samples + * (80/160 when FS=8000, and 160/320 when FS=16000 or FS=32000). For very low + * input levels, the input signal is increased in level by multiplying and + * overwriting the samples in inMic[]. + * + * This function should be called before any further processing of the + * near-end microphone signal. + * + * Input: + * - agcInst : AGC instance. + * - inMic : Microphone input speech vector (10 or 20 ms) for + * L band + * - inMic_H : Microphone input speech vector (10 or 20 ms) for + * H band + * - samples : Number of samples in input vector + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_AddMic(void* agcInst, + WebRtc_Word16* inMic, + WebRtc_Word16* inMic_H, + WebRtc_Word16 samples); + +/* + * This function replaces the analog microphone with a virtual one. + * It is a digital gain applied to the input signal and is used in the + * agcAdaptiveDigital mode where no microphone level is adjustable. + * Microphone speech length can be either 10ms or 20ms. The length of the + * input speech vector must be given in samples (80/160 when FS=8000, and + * 160/320 when FS=16000 or FS=32000). + * + * Input: + * - agcInst : AGC instance. + * - inMic : Microphone input speech vector for (10 or 20 ms) + * L band + * - inMic_H : Microphone input speech vector for (10 or 20 ms) + * H band + * - samples : Number of samples in input vector + * - micLevelIn : Input level of microphone (static) + * + * Output: + * - inMic : Microphone output after processing (L band) + * - inMic_H : Microphone output after processing (H band) + * - micLevelOut : Adjusted microphone level after processing + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_VirtualMic(void* agcInst, + WebRtc_Word16* inMic, + WebRtc_Word16* inMic_H, + WebRtc_Word16 samples, + WebRtc_Word32 micLevelIn, + WebRtc_Word32* micLevelOut); + +/* + * This function processes a 10/20ms frame and adjusts (normalizes) the gain + * both analog and digitally. The gain adjustments are done only during + * active periods of speech. The input speech length can be either 10ms or + * 20ms and the output is of the same length. The length of the speech + * vectors must be given in samples (80/160 when FS=8000, and 160/320 when + * FS=16000 or FS=32000). The echo parameter can be used to ensure the AGC will + * not adjust upward in the presence of echo. + * + * This function should be called after processing the near-end microphone + * signal, in any case after any echo cancellation. + * + * Input: + * - agcInst : AGC instance + * - inNear : Near-end input speech vector (10 or 20 ms) for + * L band + * - inNear_H : Near-end input speech vector (10 or 20 ms) for + * H band + * - samples : Number of samples in input/output vector + * - inMicLevel : Current microphone volume level + * - echo : Set to 0 if the signal passed to add_mic is + * almost certainly free of echo; otherwise set + * to 1. If you have no information regarding echo + * set to 0. + * + * Output: + * - outMicLevel : Adjusted microphone volume level + * - out : Gain-adjusted near-end speech vector (L band) + * : May be the same vector as the input. + * - out_H : Gain-adjusted near-end speech vector (H band) + * - saturationWarning : A returned value of 1 indicates a saturation event + * has occurred and the volume cannot be further + * reduced. Otherwise will be set to 0. + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_Process(void* agcInst, + const WebRtc_Word16* inNear, + const WebRtc_Word16* inNear_H, + WebRtc_Word16 samples, + WebRtc_Word16* out, + WebRtc_Word16* out_H, + WebRtc_Word32 inMicLevel, + WebRtc_Word32* outMicLevel, + WebRtc_Word16 echo, + WebRtc_UWord8* saturationWarning); + +/* + * This function sets the config parameters (targetLevelDbfs, + * compressionGaindB and limiterEnable). + * + * Input: + * - agcInst : AGC instance + * - config : config struct + * + * Output: + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_set_config(void* agcInst, WebRtcAgc_config_t config); + +/* + * This function returns the config parameters (targetLevelDbfs, + * compressionGaindB and limiterEnable). + * + * Input: + * - agcInst : AGC instance + * + * Output: + * - config : config struct + * + * Return value: + * : 0 - Normal operation. + * : -1 - Error + */ +int WebRtcAgc_get_config(void* agcInst, WebRtcAgc_config_t* config); + +/* + * This function creates an AGC instance, which will contain the state + * information for one (duplex) channel. + * + * Return value : AGC instance if successful + * : 0 (i.e., a NULL pointer) if unsuccessful + */ +int WebRtcAgc_Create(void **agcInst); + +/* + * This function frees the AGC instance created at the beginning. + * + * Input: + * - agcInst : AGC instance. + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcAgc_Free(void *agcInst); + +/* + * This function initializes an AGC instance. + * + * Input: + * - agcInst : AGC instance. + * - minLevel : Minimum possible mic level + * - maxLevel : Maximum possible mic level + * - agcMode : 0 - Unchanged + * : 1 - Adaptive Analog Automatic Gain Control -3dBOv + * : 2 - Adaptive Digital Automatic Gain Control -3dBOv + * : 3 - Fixed Digital Gain 0dB + * - fs : Sampling frequency + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcAgc_Init(void *agcInst, + WebRtc_Word32 minLevel, + WebRtc_Word32 maxLevel, + WebRtc_Word16 agcMode, + WebRtc_UWord32 fs); + +/* + * This function returns a text string containing the version. + * + * Input: + * - length : Length of the char array pointed to by version + * Output: + * - version : Pointer to a char array of to which the version + * : string will be copied. + * + * Return value : 0 - OK + * -1 - Error + */ +int WebRtcAgc_Version(WebRtc_Word8 *versionStr, WebRtc_Word16 length); + +#if defined(__cplusplus) +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_ diff --git a/src/modules/audio_processing/agc/main/matlab/getGains.m b/src/modules/audio_processing/agc/main/matlab/getGains.m new file mode 100644 index 0000000000..e0234b8593 --- /dev/null +++ b/src/modules/audio_processing/agc/main/matlab/getGains.m @@ -0,0 +1,32 @@ +% Outputs a file for testing purposes. +% +% Adjust the following parameters to suit. Their purpose becomes more clear on +% viewing the gain plots. +% MaxGain: Max gain in dB +% MinGain: Min gain at overload (0 dBov) in dB +% CompRatio: Compression ratio, essentially determines the slope of the gain +% function between the max and min gains +% Knee: The smoothness of the transition to max gain (smaller is smoother) +MaxGain = 5; MinGain = 0; CompRatio = 3; Knee = 1; + +% Compute gains +zeros = 0:31; lvl = 2.^(1-zeros); +A = -10*log10(lvl) * (CompRatio - 1) / CompRatio; +B = MaxGain - MinGain; +gains = round(2^16*10.^(0.05 * (MinGain + B * ( log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / log(1/(1+exp(Knee*B)))))); +fprintf(1, '\t%i, %i, %i, %i,\n', gains); + +% Save gains to file +fid = fopen('gains', 'wb'); +if fid == -1 + error(sprintf('Unable to open file %s', filename)); + return +end +fwrite(fid, gains, 'int32'); +fclose(fid); + +% Plotting +in = 10*log10(lvl); out = 20*log10(gains/65536); +subplot(121); plot(in, out); axis([-60, 0, -5, 30]); grid on; xlabel('Input (dB)'); ylabel('Gain (dB)'); +subplot(122); plot(in, in+out); axis([-60, 0, -60, 10]); grid on; xlabel('Input (dB)'); ylabel('Output (dB)'); +zoom on; diff --git a/src/modules/audio_processing/agc/main/source/Android.mk b/src/modules/audio_processing/agc/main/source/Android.mk new file mode 100644 index 0000000000..e045839147 --- /dev/null +++ b/src/modules/audio_processing/agc/main/source/Android.mk @@ -0,0 +1,49 @@ +# This file is generated by gyp; do not edit. This means you! + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_ARM_MODE := arm +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +LOCAL_MODULE := libwebrtc_agc +LOCAL_MODULE_TAGS := optional +LOCAL_GENERATED_SOURCES := +LOCAL_SRC_FILES := analog_agc.c \ + digital_agc.c + +# Flags passed to both C and C++ files. +MY_CFLAGS := +MY_CFLAGS_C := +MY_DEFS := '-DNO_TCMALLOC' \ + '-DNO_HEAPCHECKER' \ + '-DWEBRTC_TARGET_PC' \ + '-DWEBRTC_LINUX' \ + '-DWEBRTC_THREAD_RR' +ifeq ($(TARGET_ARCH),arm) +MY_DEFS += \ + '-DWEBRTC_ANDROID' \ + '-DANDROID' +endif +LOCAL_CFLAGS := $(MY_CFLAGS_C) $(MY_CFLAGS) $(MY_DEFS) + +# Include paths placed before CFLAGS/CPPFLAGS +LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../../.. \ + $(LOCAL_PATH)/../interface \ + $(LOCAL_PATH)/../../../../../common_audio/signal_processing_library/main/interface + +# Flags passed to only C++ (and not C) files. +LOCAL_CPPFLAGS := +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := +# Duplicate the static libraries to fix circular references +LOCAL_STATIC_LIBRARIES += $(LOCAL_STATIC_LIBRARIES) + +LOCAL_SHARED_LIBRARIES := libcutils \ + libdl \ + libstlport +LOCAL_ADDITIONAL_DEPENDENCIES := + +include external/stlport/libstlport.mk +include $(BUILD_STATIC_LIBRARY) diff --git a/src/modules/audio_processing/agc/main/source/agc.gyp b/src/modules/audio_processing/agc/main/source/agc.gyp new file mode 100644 index 0000000000..e28a4c8c68 --- /dev/null +++ b/src/modules/audio_processing/agc/main/source/agc.gyp @@ -0,0 +1,43 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +{ + 'includes': [ + '../../../../../common_settings.gypi', # Common settings + ], + 'targets': [ + { + 'target_name': 'agc', + 'type': '<(library)', + 'dependencies': [ + '../../../../../common_audio/signal_processing_library/main/source/spl.gyp:spl', + ], + 'include_dirs': [ + '../interface', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../interface', + ], + }, + 'sources': [ + '../interface/gain_control.h', + 'analog_agc.c', + 'analog_agc.h', + 'digital_agc.c', + 'digital_agc.h', + ], + }, + ], +} + +# Local Variables: +# tab-width:2 +# indent-tabs-mode:nil +# End: +# vim: set expandtab tabstop=2 shiftwidth=2: diff --git a/src/modules/audio_processing/agc/main/source/analog_agc.c b/src/modules/audio_processing/agc/main/source/analog_agc.c new file mode 100644 index 0000000000..dfb7adc621 --- /dev/null +++ b/src/modules/audio_processing/agc/main/source/analog_agc.c @@ -0,0 +1,1700 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* analog_agc.c + * + * Using a feedback system, determines an appropriate analog volume level + * given an input signal and current volume level. Targets a conservative + * signal level and is intended for use with a digital AGC to apply + * additional gain. + * + */ + +#include <assert.h> +#include <stdlib.h> +#ifdef AGC_DEBUG //test log +#include <stdio.h> +#endif +#include "analog_agc.h" + +/* The slope of in Q13*/ +static const WebRtc_Word16 kSlope1[8] = {21793, 12517, 7189, 4129, 2372, 1362, 472, 78}; + +/* The offset in Q14 */ +static const WebRtc_Word16 kOffset1[8] = {25395, 23911, 22206, 20737, 19612, 18805, 17951, + 17367}; + +/* The slope of in Q13*/ +static const WebRtc_Word16 kSlope2[8] = {2063, 1731, 1452, 1218, 1021, 857, 597, 337}; + +/* The offset in Q14 */ +static const WebRtc_Word16 kOffset2[8] = {18432, 18379, 18290, 18177, 18052, 17920, 17670, + 17286}; + +static const WebRtc_Word16 kMuteGuardTimeMs = 8000; +static const WebRtc_Word16 kInitCheck = 42; + +/* Default settings if config is not used */ +#define AGC_DEFAULT_TARGET_LEVEL 3 +#define AGC_DEFAULT_COMP_GAIN 9 +/* This is the target level for the analog part in ENV scale. To convert to RMS scale you + * have to add OFFSET_ENV_TO_RMS. + */ +#define ANALOG_TARGET_LEVEL 11 +#define ANALOG_TARGET_LEVEL_2 5 // ANALOG_TARGET_LEVEL / 2 +/* Offset between RMS scale (analog part) and ENV scale (digital part). This value actually + * varies with the FIXED_ANALOG_TARGET_LEVEL, hence we should in the future replace it with + * a table. + */ +#define OFFSET_ENV_TO_RMS 9 +/* The reference input level at which the digital part gives an output of targetLevelDbfs + * (desired level) if we have no compression gain. This level should be set high enough not + * to compress the peaks due to the dynamics. + */ +#define DIGITAL_REF_AT_0_COMP_GAIN 4 +/* Speed of reference level decrease. + */ +#define DIFF_REF_TO_ANALOG 5 + +#ifdef MIC_LEVEL_FEEDBACK +#define NUM_BLOCKS_IN_SAT_BEFORE_CHANGE_TARGET 7 +#endif +/* Size of analog gain table */ +#define GAIN_TBL_LEN 32 +/* Matlab code: + * fprintf(1, '\t%i, %i, %i, %i,\n', round(10.^(linspace(0,10,32)/20) * 2^12)); + */ +/* Q12 */ +static const WebRtc_UWord16 kGainTableAnalog[GAIN_TBL_LEN] = {4096, 4251, 4412, 4579, 4752, + 4932, 5118, 5312, 5513, 5722, 5938, 6163, 6396, 6638, 6889, 7150, 7420, 7701, 7992, + 8295, 8609, 8934, 9273, 9623, 9987, 10365, 10758, 11165, 11587, 12025, 12480, 12953}; + +/* Gain/Suppression tables for virtual Mic (in Q10) */ +static const WebRtc_UWord16 kGainTableVirtualMic[128] = {1052, 1081, 1110, 1141, 1172, 1204, + 1237, 1271, 1305, 1341, 1378, 1416, 1454, 1494, 1535, 1577, 1620, 1664, 1710, 1757, + 1805, 1854, 1905, 1957, 2010, 2065, 2122, 2180, 2239, 2301, 2364, 2428, 2495, 2563, + 2633, 2705, 2779, 2855, 2933, 3013, 3096, 3180, 3267, 3357, 3449, 3543, 3640, 3739, + 3842, 3947, 4055, 4166, 4280, 4397, 4517, 4640, 4767, 4898, 5032, 5169, 5311, 5456, + 5605, 5758, 5916, 6078, 6244, 6415, 6590, 6770, 6956, 7146, 7341, 7542, 7748, 7960, + 8178, 8402, 8631, 8867, 9110, 9359, 9615, 9878, 10148, 10426, 10711, 11004, 11305, + 11614, 11932, 12258, 12593, 12938, 13292, 13655, 14029, 14412, 14807, 15212, 15628, + 16055, 16494, 16945, 17409, 17885, 18374, 18877, 19393, 19923, 20468, 21028, 21603, + 22194, 22801, 23425, 24065, 24724, 25400, 26095, 26808, 27541, 28295, 29069, 29864, + 30681, 31520, 32382}; +static const WebRtc_UWord16 kSuppressionTableVirtualMic[128] = {1024, 1006, 988, 970, 952, + 935, 918, 902, 886, 870, 854, 839, 824, 809, 794, 780, 766, 752, 739, 726, 713, 700, + 687, 675, 663, 651, 639, 628, 616, 605, 594, 584, 573, 563, 553, 543, 533, 524, 514, + 505, 496, 487, 478, 470, 461, 453, 445, 437, 429, 421, 414, 406, 399, 392, 385, 378, + 371, 364, 358, 351, 345, 339, 333, 327, 321, 315, 309, 304, 298, 293, 288, 283, 278, + 273, 268, 263, 258, 254, 249, 244, 240, 236, 232, 227, 223, 219, 215, 211, 208, 204, + 200, 197, 193, 190, 186, 183, 180, 176, 173, 170, 167, 164, 161, 158, 155, 153, 150, + 147, 145, 142, 139, 137, 134, 132, 130, 127, 125, 123, 121, 118, 116, 114, 112, 110, + 108, 106, 104, 102}; + +/* Table for target energy levels. Values in Q(-7) + * Matlab code + * targetLevelTable = fprintf('%d,\t%d,\t%d,\t%d,\n', round((32767*10.^(-(0:63)'/20)).^2*16/2^7) */ + +static const WebRtc_Word32 kTargetLevelTable[64] = {134209536, 106606424, 84680493, 67264106, + 53429779, 42440782, 33711911, 26778323, 21270778, 16895980, 13420954, 10660642, + 8468049, 6726411, 5342978, 4244078, 3371191, 2677832, 2127078, 1689598, 1342095, + 1066064, 846805, 672641, 534298, 424408, 337119, 267783, 212708, 168960, 134210, + 106606, 84680, 67264, 53430, 42441, 33712, 26778, 21271, 16896, 13421, 10661, 8468, + 6726, 5343, 4244, 3371, 2678, 2127, 1690, 1342, 1066, 847, 673, 534, 424, 337, 268, + 213, 169, 134, 107, 85, 67}; + +int WebRtcAgc_AddMic(void *state, WebRtc_Word16 *in_mic, WebRtc_Word16 *in_mic_H, + WebRtc_Word16 samples) +{ + WebRtc_Word32 nrg, max_nrg, sample, tmp32; + WebRtc_Word32 *ptr; + WebRtc_UWord16 targetGainIdx, gain; + WebRtc_Word16 i, n, L, M, subFrames, tmp16, tmp_speech[16]; + Agc_t *stt; + stt = (Agc_t *)state; + + //default/initial values corresponding to 10ms for wb and swb + M = 10; + L = 16; + subFrames = 160; + + if (stt->fs == 8000) + { + if (samples == 80) + { + subFrames = 80; + M = 10; + L = 8; + } else if (samples == 160) + { + subFrames = 80; + M = 20; + L = 8; + } else + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "AGC->add_mic, frame %d: Invalid number of samples\n\n", + (stt->fcount + 1)); +#endif + return -1; + } + } else if (stt->fs == 16000) + { + if (samples == 160) + { + subFrames = 160; + M = 10; + L = 16; + } else if (samples == 320) + { + subFrames = 160; + M = 20; + L = 16; + } else + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "AGC->add_mic, frame %d: Invalid number of samples\n\n", + (stt->fcount + 1)); +#endif + return -1; + } + } else if (stt->fs == 32000) + { + /* SWB is processed as 160 sample for L and H bands */ + if (samples == 160) + { + subFrames = 160; + M = 10; + L = 16; + } else + { +#ifdef AGC_DEBUG + fprintf(stt->fpt, + "AGC->add_mic, frame %d: Invalid sample rate\n\n", + (stt->fcount + 1)); +#endif + return -1; + } + } + + /* Check for valid pointers based on sampling rate */ + if ((stt->fs == 32000) && (in_mic_H == NULL)) + { + return -1; + } + /* Check for valid pointer for low band */ + if (in_mic == NULL) + { + return -1; + } + + /* apply slowly varying digital gain */ + if (stt->micVol > stt->maxAnalog) + { + /* Q1 */ + tmp16 = (WebRtc_Word16)(stt->micVol - stt->maxAnalog); + tmp32 = WEBRTC_SPL_MUL_16_16(GAIN_TBL_LEN - 1, tmp16); + tmp16 = (WebRtc_Word16)(stt->maxLevel - stt->maxAnalog); + targetGainIdx = (WebRtc_UWord16)WEBRTC_SPL_DIV(tmp32, tmp16); + assert(targetGainIdx < GAIN_TBL_LEN); + + /* Increment through the table towards the target gain. + * If micVol drops below maxAnalog, we allow the gain + * to be dropped immediately. */ + if (stt->gainTableIdx < targetGainIdx) + { + stt->gainTableIdx++; + } else if (stt->gainTableIdx > targetGainIdx) + { + stt->gainTableIdx--; + } + + /* Q12 */ + gain = kGainTableAnalog[stt->gainTableIdx]; + + for (i = 0; i < samples; i++) + { + // For lower band + tmp32 = WEBRTC_SPL_MUL_16_U16(in_mic[i], gain); + sample = WEBRTC_SPL_RSHIFT_W32(tmp32, 12); + if (sample > 32767) + { + in_mic[i] = 32767; + } else if (sample < -32768) + { + in_mic[i] = -32768; + } else + { + in_mic[i] = (WebRtc_Word16)sample; + } + + // For higher band + if (stt->fs == 32000) + { + tmp32 = WEBRTC_SPL_MUL_16_U16(in_mic_H[i], gain); + sample = WEBRTC_SPL_RSHIFT_W32(tmp32, 12); + if (sample > 32767) + { + in_mic_H[i] = 32767; + } else if (sample < -32768) + { + in_mic_H[i] = -32768; + } else + { + in_mic_H[i] = (WebRtc_Word16)sample; + } + } + } + } else + { + stt->gainTableIdx = 0; + } + + /* compute envelope */ + if ((M == 10) && (stt->inQueue > 0)) + { + ptr = stt->env[1]; + } else + { + ptr = stt->env[0]; + } + + for (i = 0; i < M; i++) + { + /* iterate over samples */ + max_nrg = 0; + for (n = 0; n < L; n++) + { + nrg = WEBRTC_SPL_MUL_16_16(in_mic[i * L + n], in_mic[i * L + n]); + if (nrg > max_nrg) + { + max_nrg = nrg; + } + } + ptr[i] = max_nrg; + } + + /* compute energy */ + if ((M == 10) && (stt->inQueue > 0)) + { + ptr = stt->Rxx16w32_array[1]; + } else + { + ptr = stt->Rxx16w32_array[0]; + } + + for (i = 0; i < WEBRTC_SPL_RSHIFT_W16(M, 1); i++) + { + if (stt->fs == 16000) + { + WebRtcSpl_DownsampleBy2(&in_mic[i * 32], 32, tmp_speech, stt->filterState); + } else + { + memcpy(tmp_speech, &in_mic[i * 16], 16 * sizeof(short)); + } + /* Compute energy in blocks of 16 samples */ + ptr[i] = WebRtcSpl_DotProductWithScale(tmp_speech, tmp_speech, 16, 4); + } + + /* update queue information */ + if ((stt->inQueue == 0) && (M == 10)) + { + stt->inQueue = 1; + } else + { + stt->inQueue = 2; + } + + /* call VAD (use low band only) */ + for (i = 0; i < samples; i += subFrames) + { + WebRtcAgc_ProcessVad(&stt->vadMic, &in_mic[i], subFrames); + } + + return 0; +} + +int WebRtcAgc_AddFarend(void *state, const WebRtc_Word16 *in_far, WebRtc_Word16 samples) +{ + WebRtc_Word32 errHandle = 0; + WebRtc_Word16 i, subFrames; + Agc_t *stt; + stt = (Agc_t *)state; + + if (stt == NULL) + { + return -1; + } + + if (stt->fs == 8000) + { + if ((samples != 80) && (samples != 160)) + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "AGC->add_far_end, frame %d: Invalid number of samples\n\n", + stt->fcount); +#endif + return -1; + } + subFrames = 80; + } else if (stt->fs == 16000) + { + if ((samples != 160) && (samples != 320)) + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "AGC->add_far_end, frame %d: Invalid number of samples\n\n", + stt->fcount); +#endif + return -1; + } + subFrames = 160; + } else if (stt->fs == 32000) + { + if ((samples != 160) && (samples != 320)) + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "AGC->add_far_end, frame %d: Invalid number of samples\n\n", + stt->fcount); +#endif + return -1; + } + subFrames = 160; + } else + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "AGC->add_far_end, frame %d: Invalid sample rate\n\n", + stt->fcount + 1); +#endif + return -1; + } + + for (i = 0; i < samples; i += subFrames) + { + errHandle += WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, &in_far[i], subFrames); + } + + return errHandle; +} + +int WebRtcAgc_VirtualMic(void *agcInst, WebRtc_Word16 *in_near, WebRtc_Word16 *in_near_H, + WebRtc_Word16 samples, WebRtc_Word32 micLevelIn, + WebRtc_Word32 *micLevelOut) +{ + WebRtc_Word32 tmpFlt, micLevelTmp, gainIdx; + WebRtc_UWord16 gain; + WebRtc_Word16 ii; + Agc_t *stt; + + WebRtc_UWord32 nrg; + WebRtc_Word16 sampleCntr; + WebRtc_UWord32 frameNrg = 0; + WebRtc_UWord32 frameNrgLimit = 5500; + WebRtc_Word16 numZeroCrossing = 0; + const WebRtc_Word16 kZeroCrossingLowLim = 15; + const WebRtc_Word16 kZeroCrossingHighLim = 20; + + stt = (Agc_t *)agcInst; + + /* + * Before applying gain decide if this is a low-level signal. + * The idea is that digital AGC will not adapt to low-level + * signals. + */ + if (stt->fs != 8000) + { + frameNrgLimit = frameNrgLimit << 1; + } + + frameNrg = WEBRTC_SPL_MUL_16_16(in_near[0], in_near[0]); + for (sampleCntr = 1; sampleCntr < samples; sampleCntr++) + { + + // increment frame energy if it is less than the limit + // the correct value of the energy is not important + if (frameNrg < frameNrgLimit) + { + nrg = WEBRTC_SPL_MUL_16_16(in_near[sampleCntr], in_near[sampleCntr]); + frameNrg += nrg; + } + + // Count the zero crossings + numZeroCrossing += ((in_near[sampleCntr] ^ in_near[sampleCntr - 1]) < 0); + } + + if ((frameNrg < 500) || (numZeroCrossing <= 5)) + { + stt->lowLevelSignal = 1; + } else if (numZeroCrossing <= kZeroCrossingLowLim) + { + stt->lowLevelSignal = 0; + } else if (frameNrg <= frameNrgLimit) + { + stt->lowLevelSignal = 1; + } else if (numZeroCrossing >= kZeroCrossingHighLim) + { + stt->lowLevelSignal = 1; + } else + { + stt->lowLevelSignal = 0; + } + + micLevelTmp = WEBRTC_SPL_LSHIFT_W32(micLevelIn, stt->scale); + /* Set desired level */ + gainIdx = stt->micVol; + if (stt->micVol > stt->maxAnalog) + { + gainIdx = stt->maxAnalog; + } + if (micLevelTmp != stt->micRef) + { + /* Something has happened with the physical level, restart. */ + stt->micRef = micLevelTmp; + stt->micVol = 127; + *micLevelOut = 127; + stt->micGainIdx = 127; + gainIdx = 127; + } + /* Pre-process the signal to emulate the microphone level. */ + /* Take one step at a time in the gain table. */ + if (gainIdx > 127) + { + gain = kGainTableVirtualMic[gainIdx - 128]; + } else + { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + for (ii = 0; ii < samples; ii++) + { + tmpFlt = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_U16(in_near[ii], gain), 10); + if (tmpFlt > 32767) + { + tmpFlt = 32767; + gainIdx--; + if (gainIdx >= 127) + { + gain = kGainTableVirtualMic[gainIdx - 127]; + } else + { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + } + if (tmpFlt < -32768) + { + tmpFlt = -32768; + gainIdx--; + if (gainIdx >= 127) + { + gain = kGainTableVirtualMic[gainIdx - 127]; + } else + { + gain = kSuppressionTableVirtualMic[127 - gainIdx]; + } + } + in_near[ii] = (WebRtc_Word16)tmpFlt; + if (stt->fs == 32000) + { + tmpFlt = WEBRTC_SPL_MUL_16_U16(in_near_H[ii], gain); + tmpFlt = WEBRTC_SPL_RSHIFT_W32(tmpFlt, 10); + if (tmpFlt > 32767) + { + tmpFlt = 32767; + } + if (tmpFlt < -32768) + { + tmpFlt = -32768; + } + in_near_H[ii] = (WebRtc_Word16)tmpFlt; + } + } + /* Set the level we (finally) used */ + stt->micGainIdx = gainIdx; +// *micLevelOut = stt->micGainIdx; + *micLevelOut = WEBRTC_SPL_RSHIFT_W32(stt->micGainIdx, stt->scale); + /* Add to Mic as if it was the output from a true microphone */ + if (WebRtcAgc_AddMic(agcInst, in_near, in_near_H, samples) != 0) + { + return -1; + } + return 0; +} + +void WebRtcAgc_UpdateAgcThresholds(Agc_t *stt) +{ + + WebRtc_Word16 tmp16; +#ifdef MIC_LEVEL_FEEDBACK + int zeros; + + if (stt->micLvlSat) + { + /* Lower the analog target level since we have reached its maximum */ + zeros = WebRtcSpl_NormW32(stt->Rxx160_LPw32); + stt->targetIdxOffset = WEBRTC_SPL_RSHIFT_W16((3 * zeros) - stt->targetIdx - 2, 2); + } +#endif + + /* Set analog target level in envelope dBOv scale */ + tmp16 = (DIFF_REF_TO_ANALOG * stt->compressionGaindB) + ANALOG_TARGET_LEVEL_2; + tmp16 = WebRtcSpl_DivW32W16ResW16((WebRtc_Word32)tmp16, ANALOG_TARGET_LEVEL); + stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN + tmp16; + if (stt->analogTarget < DIGITAL_REF_AT_0_COMP_GAIN) + { + stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN; + } + if (stt->agcMode == kAgcModeFixedDigital) + { + /* Adjust for different parameter interpretation in FixedDigital mode */ + stt->analogTarget = stt->compressionGaindB; + } +#ifdef MIC_LEVEL_FEEDBACK + stt->analogTarget += stt->targetIdxOffset; +#endif + /* Since the offset between RMS and ENV is not constant, we should make this into a + * table, but for now, we'll stick with a constant, tuned for the chosen analog + * target level. + */ + stt->targetIdx = ANALOG_TARGET_LEVEL + OFFSET_ENV_TO_RMS; +#ifdef MIC_LEVEL_FEEDBACK + stt->targetIdx += stt->targetIdxOffset; +#endif + /* Analog adaptation limits */ + /* analogTargetLevel = round((32767*10^(-targetIdx/20))^2*16/2^7) */ + stt->analogTargetLevel = RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx]; /* ex. -20 dBov */ + stt->startUpperLimit = RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx - 1];/* -19 dBov */ + stt->startLowerLimit = RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx + 1];/* -21 dBov */ + stt->upperPrimaryLimit = RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx - 2];/* -18 dBov */ + stt->lowerPrimaryLimit = RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx + 2];/* -22 dBov */ + stt->upperSecondaryLimit = RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx - 5];/* -15 dBov */ + stt->lowerSecondaryLimit = RXX_BUFFER_LEN * kTargetLevelTable[stt->targetIdx + 5];/* -25 dBov */ + stt->upperLimit = stt->startUpperLimit; + stt->lowerLimit = stt->startLowerLimit; +} + +void WebRtcAgc_SaturationCtrl(Agc_t *stt, WebRtc_UWord8 *saturated, WebRtc_Word32 *env) +{ + WebRtc_Word16 i, tmpW16; + + /* Check if the signal is saturated */ + for (i = 0; i < 10; i++) + { + tmpW16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(env[i], 20); + if (tmpW16 > 875) + { + stt->envSum += tmpW16; + } + } + + if (stt->envSum > 25000) + { + *saturated = 1; + stt->envSum = 0; + } + + /* stt->envSum *= 0.99; */ + stt->envSum = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(stt->envSum, + (WebRtc_Word16)32440, 15); +} + +void WebRtcAgc_ZeroCtrl(Agc_t *stt, WebRtc_Word32 *inMicLevel, WebRtc_Word32 *env) +{ + WebRtc_Word16 i; + WebRtc_Word32 tmp32 = 0; + WebRtc_Word32 midVal; + + /* Is the input signal zero? */ + for (i = 0; i < 10; i++) + { + tmp32 += env[i]; + } + + /* Each block is allowed to have a few non-zero + * samples. + */ + if (tmp32 < 500) + { + stt->msZero += 10; + } else + { + stt->msZero = 0; + } + + if (stt->muteGuardMs > 0) + { + stt->muteGuardMs -= 10; + } + + if (stt->msZero > 500) + { + stt->msZero = 0; + + /* Increase microphone level only if it's less than 50% */ + midVal = WEBRTC_SPL_RSHIFT_W32(stt->maxAnalog + stt->minLevel + 1, 1); + if (*inMicLevel < midVal) + { + /* *inMicLevel *= 1.1; */ + tmp32 = WEBRTC_SPL_MUL(1126, *inMicLevel); + *inMicLevel = WEBRTC_SPL_RSHIFT_W32(tmp32, 10); + /* Reduces risk of a muted mic repeatedly triggering excessive levels due + * to zero signal detection. */ + *inMicLevel = WEBRTC_SPL_MIN(*inMicLevel, stt->zeroCtrlMax); + stt->micVol = *inMicLevel; + } + +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "\t\tAGC->zeroCntrl, frame %d: 500 ms under threshold, micVol:\n", + stt->fcount, stt->micVol); +#endif + + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + /* The AGC has a tendency (due to problems with the VAD parameters), to + * vastly increase the volume after a muting event. This timer prevents + * upwards adaptation for a short period. */ + stt->muteGuardMs = kMuteGuardTimeMs; + } +} + +void WebRtcAgc_SpeakerInactiveCtrl(Agc_t *stt) +{ + /* Check if the near end speaker is inactive. + * If that is the case the VAD threshold is + * increased since the VAD speech model gets + * more sensitive to any sound after a long + * silence. + */ + + WebRtc_Word32 tmp32; + WebRtc_Word16 vadThresh; + + if (stt->vadMic.stdLongTerm < 2500) + { + stt->vadThreshold = 1500; + } else + { + vadThresh = kNormalVadThreshold; + if (stt->vadMic.stdLongTerm < 4500) + { + /* Scale between min and max threshold */ + vadThresh += WEBRTC_SPL_RSHIFT_W16(4500 - stt->vadMic.stdLongTerm, 1); + } + + /* stt->vadThreshold = (31 * stt->vadThreshold + vadThresh) / 32; */ + tmp32 = (WebRtc_Word32)vadThresh; + tmp32 += WEBRTC_SPL_MUL_16_16((WebRtc_Word16)31, stt->vadThreshold); + stt->vadThreshold = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 5); + } +} + +void WebRtcAgc_ExpCurve(WebRtc_Word16 volume, WebRtc_Word16 *index) +{ + // volume in Q14 + // index in [0-7] + /* 8 different curves */ + if (volume > 5243) + { + if (volume > 7864) + { + if (volume > 12124) + { + *index = 7; + } else + { + *index = 6; + } + } else + { + if (volume > 6554) + { + *index = 5; + } else + { + *index = 4; + } + } + } else + { + if (volume > 2621) + { + if (volume > 3932) + { + *index = 3; + } else + { + *index = 2; + } + } else + { + if (volume > 1311) + { + *index = 1; + } else + { + *index = 0; + } + } + } +} + +WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, + WebRtc_Word32 *outMicLevel, + WebRtc_Word16 vadLogRatio, + WebRtc_Word16 echo, WebRtc_UWord8 *saturationWarning) +{ + WebRtc_UWord32 tmpU32; + WebRtc_Word32 Rxx16w32, tmp32; + WebRtc_Word32 inMicLevelTmp, lastMicVol; + WebRtc_Word16 i; + WebRtc_UWord8 saturated = 0; + Agc_t *stt; + + stt = (Agc_t *)state; + inMicLevelTmp = WEBRTC_SPL_LSHIFT_W32(inMicLevel, stt->scale); + + if (inMicLevelTmp > stt->maxAnalog) + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, "\tAGC->ProcessAnalog, frame %d: micLvl > maxAnalog\n", stt->fcount); +#endif + return -1; + } else if (inMicLevelTmp < stt->minLevel) + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, "\tAGC->ProcessAnalog, frame %d: micLvl < minLevel\n", stt->fcount); +#endif + return -1; + } + + if (stt->firstCall == 0) + { + WebRtc_Word32 tmpVol; + stt->firstCall = 1; + tmp32 = WEBRTC_SPL_RSHIFT_W32((stt->maxLevel - stt->minLevel) * (WebRtc_Word32)51, 9); + tmpVol = (stt->minLevel + tmp32); + + /* If the mic level is very low at start, increase it! */ + if ((inMicLevelTmp < tmpVol) && (stt->agcMode == kAgcModeAdaptiveAnalog)) + { + inMicLevelTmp = tmpVol; + } + stt->micVol = inMicLevelTmp; + } + + /* Set the mic level to the previous output value if there is digital input gain */ + if ((inMicLevelTmp == stt->maxAnalog) && (stt->micVol > stt->maxAnalog)) + { + inMicLevelTmp = stt->micVol; + } + + /* If the mic level was manually changed to a very low value raise it! */ + if ((inMicLevelTmp != stt->micVol) && (inMicLevelTmp < stt->minOutput)) + { + tmp32 = WEBRTC_SPL_RSHIFT_W32((stt->maxLevel - stt->minLevel) * (WebRtc_Word32)51, 9); + inMicLevelTmp = (stt->minLevel + tmp32); + stt->micVol = inMicLevelTmp; +#ifdef MIC_LEVEL_FEEDBACK + //stt->numBlocksMicLvlSat = 0; +#endif +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: micLvl < minLevel by manual decrease, raise vol\n", + stt->fcount); +#endif + } + + if (inMicLevelTmp != stt->micVol) + { + // Incoming level mismatch; update our level. + // This could be the case if the volume is changed manually, or if the + // sound device has a low volume resolution. + stt->micVol = inMicLevelTmp; + } + + if (inMicLevelTmp > stt->maxLevel) + { + // Always allow the user to raise the volume above the maxLevel. + stt->maxLevel = inMicLevelTmp; + } + + // Store last value here, after we've taken care of manual updates etc. + lastMicVol = stt->micVol; + + /* Checks if the signal is saturated. Also a check if individual samples + * are larger than 12000 is done. If they are the counter for increasing + * the volume level is set to -100ms + */ + WebRtcAgc_SaturationCtrl(stt, &saturated, stt->env[0]); + + /* The AGC is always allowed to lower the level if the signal is saturated */ + if (saturated == 1) + { + /* Lower the recording level + * Rxx160_LP is adjusted down because it is so slow it could + * cause the AGC to make wrong decisions. */ + /* stt->Rxx160_LPw32 *= 0.875; */ + stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(WEBRTC_SPL_RSHIFT_W32(stt->Rxx160_LPw32, 3), 7); + + stt->zeroCtrlMax = stt->micVol; + + /* stt->micVol *= 0.903; */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(29591, (WebRtc_UWord32)(tmp32)); + stt->micVol = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(tmpU32, 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 2) + { + stt->micVol = lastMicVol - 2; + } + inMicLevelTmp = stt->micVol; + +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: saturated, micVol = %d\n", + stt->fcount, stt->micVol); +#endif + + if (stt->micVol < stt->minOutput) + { + *saturationWarning = 1; + } + + /* Reset counter for decrease of volume level to avoid + * decreasing too much. The saturation control can still + * lower the level if needed. */ + stt->msTooHigh = -100; + + /* Enable the control mechanism to ensure that our measure, + * Rxx160_LP, is in the correct range. This must be done since + * the measure is very slow. */ + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + /* Reset to initial values */ + stt->msecSpeechInnerChange = kMsecSpeechInner; + stt->msecSpeechOuterChange = kMsecSpeechOuter; + stt->changeToSlowMode = 0; + + stt->muteGuardMs = 0; + + stt->upperLimit = stt->startUpperLimit; + stt->lowerLimit = stt->startLowerLimit; +#ifdef MIC_LEVEL_FEEDBACK + //stt->numBlocksMicLvlSat = 0; +#endif + } + + /* Check if the input speech is zero. If so the mic volume + * is increased. On some computers the input is zero up as high + * level as 17% */ + WebRtcAgc_ZeroCtrl(stt, &inMicLevelTmp, stt->env[0]); + + /* Check if the near end speaker is inactive. + * If that is the case the VAD threshold is + * increased since the VAD speech model gets + * more sensitive to any sound after a long + * silence. + */ + WebRtcAgc_SpeakerInactiveCtrl(stt); + + for (i = 0; i < 5; i++) + { + /* Computed on blocks of 16 samples */ + + Rxx16w32 = stt->Rxx16w32_array[0][i]; + + /* Rxx160w32 in Q(-7) */ + tmp32 = WEBRTC_SPL_RSHIFT_W32(Rxx16w32 - stt->Rxx16_vectorw32[stt->Rxx16pos], 3); + stt->Rxx160w32 = stt->Rxx160w32 + tmp32; + stt->Rxx16_vectorw32[stt->Rxx16pos] = Rxx16w32; + + /* Circular buffer */ + stt->Rxx16pos = stt->Rxx16pos++; + if (stt->Rxx16pos == RXX_BUFFER_LEN) + { + stt->Rxx16pos = 0; + } + + /* Rxx16_LPw32 in Q(-4) */ + tmp32 = WEBRTC_SPL_RSHIFT_W32(Rxx16w32 - stt->Rxx16_LPw32, kAlphaShortTerm); + stt->Rxx16_LPw32 = (stt->Rxx16_LPw32) + tmp32; + + if (vadLogRatio > stt->vadThreshold) + { + /* Speech detected! */ + + /* Check if Rxx160_LP is in the correct range. If + * it is too high/low then we set it to the maximum of + * Rxx16_LPw32 during the first 200ms of speech. + */ + if (stt->activeSpeech < 250) + { + stt->activeSpeech += 2; + + if (stt->Rxx16_LPw32 > stt->Rxx16_LPw32Max) + { + stt->Rxx16_LPw32Max = stt->Rxx16_LPw32; + } + } else if (stt->activeSpeech == 250) + { + stt->activeSpeech += 2; + tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx16_LPw32Max, 3); + stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(tmp32, RXX_BUFFER_LEN); + } + + tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx160w32 - stt->Rxx160_LPw32, kAlphaLongTerm); + stt->Rxx160_LPw32 = stt->Rxx160_LPw32 + tmp32; + + if (stt->Rxx160_LPw32 > stt->upperSecondaryLimit) + { + stt->msTooHigh += 2; + stt->msTooLow = 0; + stt->changeToSlowMode = 0; + + if (stt->msTooHigh > stt->msecSpeechOuterChange) + { + stt->msTooHigh = 0; + + /* Lower the recording level */ + /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ + tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx160_LPw32, 6); + stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(tmp32, 53); + + /* Reduce the max gain to avoid excessive oscillation + * (but never drop below the maximum analog level). + * stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; + */ + tmp32 = (15 * stt->maxLevel) + stt->micVol; + stt->maxLevel = WEBRTC_SPL_RSHIFT_W32(tmp32, 4); + stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); + + stt->zeroCtrlMax = stt->micVol; + + /* 0.95 in Q15 */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(31130, (WebRtc_UWord32)(tmp32)); + stt->micVol = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(tmpU32, 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 1) + { + stt->micVol = lastMicVol - 1; + } + inMicLevelTmp = stt->micVol; + + /* Enable the control mechanism to ensure that our measure, + * Rxx160_LP, is in the correct range. + */ + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; +#ifdef MIC_LEVEL_FEEDBACK + //stt->numBlocksMicLvlSat = 0; +#endif +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: measure > 2ndUpperLim, micVol = %d, maxLevel = %d\n", + stt->fcount, stt->micVol, stt->maxLevel); +#endif + } + } else if (stt->Rxx160_LPw32 > stt->upperLimit) + { + stt->msTooHigh += 2; + stt->msTooLow = 0; + stt->changeToSlowMode = 0; + + if (stt->msTooHigh > stt->msecSpeechInnerChange) + { + /* Lower the recording level */ + stt->msTooHigh = 0; + /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ + tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx160_LPw32, 6); + stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(tmp32, 53); + + /* Reduce the max gain to avoid excessive oscillation + * (but never drop below the maximum analog level). + * stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; + */ + tmp32 = (15 * stt->maxLevel) + stt->micVol; + stt->maxLevel = WEBRTC_SPL_RSHIFT_W32(tmp32, 4); + stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); + + stt->zeroCtrlMax = stt->micVol; + + /* 0.965 in Q15 */ + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(31621, (WebRtc_UWord32)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(tmpU32, 15) + stt->minLevel; + if (stt->micVol > lastMicVol - 1) + { + stt->micVol = lastMicVol - 1; + } + inMicLevelTmp = stt->micVol; + +#ifdef MIC_LEVEL_FEEDBACK + //stt->numBlocksMicLvlSat = 0; +#endif +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: measure > UpperLim, micVol = %d, maxLevel = %d\n", + stt->fcount, stt->micVol, stt->maxLevel); +#endif + } + } else if (stt->Rxx160_LPw32 < stt->lowerSecondaryLimit) + { + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->msTooLow += 2; + + if (stt->msTooLow > stt->msecSpeechOuterChange) + { + /* Raise the recording level */ + WebRtc_Word16 index, weightFIX; + WebRtc_Word16 volNormFIX = 16384; // =1 in Q14. + + stt->msTooLow = 0; + + /* Normalize the volume level */ + tmp32 = WEBRTC_SPL_LSHIFT_W32(inMicLevelTmp - stt->minLevel, 14); + if (stt->maxInit != stt->minLevel) + { + volNormFIX = (WebRtc_Word16)WEBRTC_SPL_DIV(tmp32, + (stt->maxInit - stt->minLevel)); + } + + /* Find correct curve */ + WebRtcAgc_ExpCurve(volNormFIX, &index); + + /* Compute weighting factor for the volume increase, 32^(-2*X)/2+1.05 */ + weightFIX = kOffset1[index] + - (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(kSlope1[index], + volNormFIX, 13); + + /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ + tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx160_LPw32, 6); + stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(tmp32, 67); + + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = ((WebRtc_UWord32)weightFIX * (WebRtc_UWord32)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(tmpU32, 14) + stt->minLevel; + if (stt->micVol < lastMicVol + 2) + { + stt->micVol = lastMicVol + 2; + } + + inMicLevelTmp = stt->micVol; + +#ifdef MIC_LEVEL_FEEDBACK + /* Count ms in level saturation */ + //if (stt->micVol > stt->maxAnalog) { + if (stt->micVol > 150) + { + /* mic level is saturated */ + stt->numBlocksMicLvlSat++; + fprintf(stderr, "Sat mic Level: %d\n", stt->numBlocksMicLvlSat); + } +#endif +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: measure < 2ndLowerLim, micVol = %d\n", + stt->fcount, stt->micVol); +#endif + } + } else if (stt->Rxx160_LPw32 < stt->lowerLimit) + { + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->msTooLow += 2; + + if (stt->msTooLow > stt->msecSpeechInnerChange) + { + /* Raise the recording level */ + WebRtc_Word16 index, weightFIX; + WebRtc_Word16 volNormFIX = 16384; // =1 in Q14. + + stt->msTooLow = 0; + + /* Normalize the volume level */ + tmp32 = WEBRTC_SPL_LSHIFT_W32(inMicLevelTmp - stt->minLevel, 14); + if (stt->maxInit != stt->minLevel) + { + volNormFIX = (WebRtc_Word16)WEBRTC_SPL_DIV(tmp32, + (stt->maxInit - stt->minLevel)); + } + + /* Find correct curve */ + WebRtcAgc_ExpCurve(volNormFIX, &index); + + /* Compute weighting factor for the volume increase, (3.^(-2.*X))/8+1 */ + weightFIX = kOffset2[index] + - (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(kSlope2[index], + volNormFIX, 13); + + /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ + tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx160_LPw32, 6); + stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(tmp32, 67); + + tmp32 = inMicLevelTmp - stt->minLevel; + tmpU32 = ((WebRtc_UWord32)weightFIX * (WebRtc_UWord32)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(tmpU32, 14) + stt->minLevel; + if (stt->micVol < lastMicVol + 1) + { + stt->micVol = lastMicVol + 1; + } + + inMicLevelTmp = stt->micVol; + +#ifdef MIC_LEVEL_FEEDBACK + /* Count ms in level saturation */ + //if (stt->micVol > stt->maxAnalog) { + if (stt->micVol > 150) + { + /* mic level is saturated */ + stt->numBlocksMicLvlSat++; + fprintf(stderr, "Sat mic Level: %d\n", stt->numBlocksMicLvlSat); + } +#endif +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: measure < LowerLim, micVol = %d\n", + stt->fcount, stt->micVol); +#endif + + } + } else + { + /* The signal is inside the desired range which is: + * lowerLimit < Rxx160_LP/640 < upperLimit + */ + if (stt->changeToSlowMode > 4000) + { + stt->msecSpeechInnerChange = 1000; + stt->msecSpeechOuterChange = 500; + stt->upperLimit = stt->upperPrimaryLimit; + stt->lowerLimit = stt->lowerPrimaryLimit; + } else + { + stt->changeToSlowMode += 2; // in milliseconds + } + stt->msTooLow = 0; + stt->msTooHigh = 0; + + stt->micVol = inMicLevelTmp; + + } +#ifdef MIC_LEVEL_FEEDBACK + if (stt->numBlocksMicLvlSat > NUM_BLOCKS_IN_SAT_BEFORE_CHANGE_TARGET) + { + stt->micLvlSat = 1; + fprintf(stderr, "target before = %d (%d)\n", stt->analogTargetLevel, stt->targetIdx); + WebRtcAgc_UpdateAgcThresholds(stt); + WebRtcAgc_CalculateGainTable(&(stt->digitalAgc.gainTable[0]), + stt->compressionGaindB, stt->targetLevelDbfs, stt->limiterEnable, + stt->analogTarget); + stt->numBlocksMicLvlSat = 0; + stt->micLvlSat = 0; + fprintf(stderr, "target offset = %d\n", stt->targetIdxOffset); + fprintf(stderr, "target after = %d (%d)\n", stt->analogTargetLevel, stt->targetIdx); + } +#endif + } + } + + /* Ensure gain is not increased in presence of echo or after a mute event + * (but allow the zeroCtrl() increase on the frame of a mute detection). + */ + if (echo == 1 || (stt->muteGuardMs > 0 && stt->muteGuardMs < kMuteGuardTimeMs)) + { + if (stt->micVol > lastMicVol) + { + stt->micVol = lastMicVol; + } + } + + /* limit the gain */ + if (stt->micVol > stt->maxLevel) + { + stt->micVol = stt->maxLevel; + } else if (stt->micVol < stt->minOutput) + { + stt->micVol = stt->minOutput; + } + + *outMicLevel = WEBRTC_SPL_RSHIFT_W32(stt->micVol, stt->scale); + if (*outMicLevel > WEBRTC_SPL_RSHIFT_W32(stt->maxAnalog, stt->scale)) + { + *outMicLevel = WEBRTC_SPL_RSHIFT_W32(stt->maxAnalog, stt->scale); + } + + return 0; +} + +int WebRtcAgc_Process(void *agcInst, const WebRtc_Word16 *in_near, + const WebRtc_Word16 *in_near_H, WebRtc_Word16 samples, + WebRtc_Word16 *out, WebRtc_Word16 *out_H, WebRtc_Word32 inMicLevel, + WebRtc_Word32 *outMicLevel, WebRtc_Word16 echo, + WebRtc_UWord8 *saturationWarning) +{ + Agc_t *stt; + WebRtc_Word32 inMicLevelTmp; + WebRtc_Word16 subFrames, i; + WebRtc_UWord8 satWarningTmp = 0; + + stt = (Agc_t *)agcInst; + + // + if (stt == NULL) + { + return -1; + } + // + + + if (stt->fs == 8000) + { + if ((samples != 80) && (samples != 160)) + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "AGC->Process, frame %d: Invalid number of samples\n\n", stt->fcount); +#endif + return -1; + } + subFrames = 80; + } else if (stt->fs == 16000) + { + if ((samples != 160) && (samples != 320)) + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "AGC->Process, frame %d: Invalid number of samples\n\n", stt->fcount); +#endif + return -1; + } + subFrames = 160; + } else if (stt->fs == 32000) + { + if ((samples != 160) && (samples != 320)) + { +#ifdef AGC_DEBUG //test log + fprintf(stt->fpt, + "AGC->Process, frame %d: Invalid number of samples\n\n", stt->fcount); +#endif + return -1; + } + subFrames = 160; + } else + { +#ifdef AGC_DEBUG// test log + fprintf(stt->fpt, + "AGC->Process, frame %d: Invalid sample rate\n\n", stt->fcount); +#endif + return -1; + } + + /* Check for valid pointers based on sampling rate */ + if (stt->fs == 32000 && in_near_H == NULL) + { + return -1; + } + /* Check for valid pointers for low band */ + if (in_near == NULL) + { + return -1; + } + + *saturationWarning = 0; + //TODO: PUT IN RANGE CHECKING FOR INPUT LEVELS + *outMicLevel = inMicLevel; + inMicLevelTmp = inMicLevel; + + memcpy(out, in_near, samples * sizeof(WebRtc_Word16)); + if (stt->fs == 32000) + { + memcpy(out_H, in_near_H, samples * sizeof(WebRtc_Word16)); + } + +#ifdef AGC_DEBUG//test log + stt->fcount++; +#endif + + for (i = 0; i < samples; i += subFrames) + { + if (WebRtcAgc_ProcessDigital(&stt->digitalAgc, &in_near[i], &in_near_H[i], &out[i], &out_H[i], + stt->fs, stt->lowLevelSignal) == -1) + { +#ifdef AGC_DEBUG//test log + fprintf(stt->fpt, "AGC->Process, frame %d: Error from DigAGC\n\n", stt->fcount); +#endif + return -1; + } + if ((stt->agcMode < kAgcModeFixedDigital) && ((stt->lowLevelSignal == 0) + || (stt->agcMode != kAgcModeAdaptiveDigital))) + { + if (WebRtcAgc_ProcessAnalog(agcInst, inMicLevelTmp, outMicLevel, + stt->vadMic.logRatio, echo, saturationWarning) == -1) + { + return -1; + } + } +#ifdef AGC_DEBUG//test log + fprintf(stt->agcLog, "%5d\t%d\t%d\t%d\n", stt->fcount, inMicLevelTmp, *outMicLevel, stt->maxLevel, stt->micVol); +#endif + + /* update queue */ + if (stt->inQueue > 1) + { + memcpy(stt->env[0], stt->env[1], 10 * sizeof(WebRtc_Word32)); + memcpy(stt->Rxx16w32_array[0], stt->Rxx16w32_array[1], 5 * sizeof(WebRtc_Word32)); + } + + if (stt->inQueue > 0) + { + stt->inQueue--; + } + + /* If 20ms frames are used the input mic level must be updated so that + * the analog AGC does not think that there has been a manual volume + * change. */ + inMicLevelTmp = *outMicLevel; + + /* Store a positive saturation warning. */ + if (*saturationWarning == 1) + { + satWarningTmp = 1; + } + } + + /* Trigger the saturation warning if displayed by any of the frames. */ + *saturationWarning = satWarningTmp; + + return 0; +} + +int WebRtcAgc_set_config(void *agcInst, WebRtcAgc_config_t agcConfig) +{ + Agc_t *stt; + stt = (Agc_t *)agcInst; + + if (stt == NULL) + { + return -1; + } + + if (stt->initFlag != kInitCheck) + { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + if (agcConfig.limiterEnable != kAgcFalse && agcConfig.limiterEnable != kAgcTrue) + { + stt->lastError = AGC_BAD_PARAMETER_ERROR; + return -1; + } + stt->limiterEnable = agcConfig.limiterEnable; + stt->compressionGaindB = agcConfig.compressionGaindB; + if ((agcConfig.targetLevelDbfs < 0) || (agcConfig.targetLevelDbfs > 31)) + { + stt->lastError = AGC_BAD_PARAMETER_ERROR; + return -1; + } + stt->targetLevelDbfs = agcConfig.targetLevelDbfs; + + if (stt->agcMode == kAgcModeFixedDigital) + { + /* Adjust for different parameter interpretation in FixedDigital mode */ + stt->compressionGaindB += agcConfig.targetLevelDbfs; + } + + /* Update threshold levels for analog adaptation */ + WebRtcAgc_UpdateAgcThresholds(stt); + + /* Recalculate gain table */ + if (WebRtcAgc_CalculateGainTable(&(stt->digitalAgc.gainTable[0]), stt->compressionGaindB, + stt->targetLevelDbfs, stt->limiterEnable, stt->analogTarget) == -1) + { +#ifdef AGC_DEBUG//test log + fprintf(stt->fpt, "AGC->set_config, frame %d: Error from calcGainTable\n\n", stt->fcount); +#endif + return -1; + } + /* Store the config in a WebRtcAgc_config_t */ + stt->usedConfig.compressionGaindB = agcConfig.compressionGaindB; + stt->usedConfig.limiterEnable = agcConfig.limiterEnable; + stt->usedConfig.targetLevelDbfs = agcConfig.targetLevelDbfs; + + return 0; +} + +int WebRtcAgc_get_config(void *agcInst, WebRtcAgc_config_t *config) +{ + Agc_t *stt; + stt = (Agc_t *)agcInst; + + if (stt == NULL) + { + return -1; + } + + if (config == NULL) + { + stt->lastError = AGC_NULL_POINTER_ERROR; + return -1; + } + + if (stt->initFlag != kInitCheck) + { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + config->limiterEnable = stt->usedConfig.limiterEnable; + config->targetLevelDbfs = stt->usedConfig.targetLevelDbfs; + config->compressionGaindB = stt->usedConfig.compressionGaindB; + + return 0; +} + +int WebRtcAgc_Create(void **agcInst) +{ + Agc_t *stt; + if (agcInst == NULL) + { + return -1; + } + stt = (Agc_t *)malloc(sizeof(Agc_t)); + + *agcInst = stt; + if (stt == NULL) + { + return -1; + } + +#ifdef AGC_DEBUG + stt->fpt = fopen("./agc_test_log.txt", "wt"); + stt->agcLog = fopen("./agc_debug_log.txt", "wt"); + stt->digitalAgc.logFile = fopen("./agc_log.txt", "wt"); +#endif + + stt->initFlag = 0; + stt->lastError = 0; + + return 0; +} + +int WebRtcAgc_Free(void *state) +{ + Agc_t *stt; + + stt = (Agc_t *)state; +#ifdef AGC_DEBUG + fclose(stt->fpt); + fclose(stt->agcLog); + fclose(stt->digitalAgc.logFile); +#endif + free(stt); + + return 0; +} + +/* minLevel - Minimum volume level + * maxLevel - Maximum volume level + */ +int WebRtcAgc_Init(void *agcInst, WebRtc_Word32 minLevel, WebRtc_Word32 maxLevel, + WebRtc_Word16 agcMode, WebRtc_UWord32 fs) +{ + WebRtc_Word32 max_add, tmp32; + WebRtc_Word16 i; + int tmpNorm; + Agc_t *stt; + + /* typecast state pointer */ + stt = (Agc_t *)agcInst; + + if (WebRtcAgc_InitDigital(&stt->digitalAgc, agcMode) != 0) + { + stt->lastError = AGC_UNINITIALIZED_ERROR; + return -1; + } + + /* Analog AGC variables */ + stt->envSum = 0; + + /* mode = 0 - Only saturation protection + * 1 - Analog Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)] + * 2 - Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)] + * 3 - Fixed Digital Gain [compressionGaindB (default 8 dB)] + */ +#ifdef AGC_DEBUG//test log + stt->fcount = 0; + fprintf(stt->fpt, "AGC->Init\n"); +#endif + if (agcMode < kAgcModeUnchanged || agcMode > kAgcModeFixedDigital) + { +#ifdef AGC_DEBUG//test log + fprintf(stt->fpt, "AGC->Init: error, incorrect mode\n\n"); +#endif + return -1; + } + stt->agcMode = agcMode; + stt->fs = fs; + + /* initialize input VAD */ + WebRtcAgc_InitVad(&stt->vadMic); + + /* If the volume range is smaller than 0-256 then + * the levels are shifted up to Q8-domain */ + tmpNorm = WebRtcSpl_NormU32((WebRtc_UWord32)maxLevel); + stt->scale = tmpNorm - 23; + if (stt->scale < 0) + { + stt->scale = 0; + } + // TODO(bjornv): Investigate if we really need to scale up a small range now when we have + // a guard against zero-increments. For now, we do not support scale up (scale = 0). + stt->scale = 0; + maxLevel = WEBRTC_SPL_LSHIFT_W32(maxLevel, stt->scale); + minLevel = WEBRTC_SPL_LSHIFT_W32(minLevel, stt->scale); + + /* Make minLevel and maxLevel static in AdaptiveDigital */ + if (stt->agcMode == kAgcModeAdaptiveDigital) + { + minLevel = 0; + maxLevel = 255; + stt->scale = 0; + } + /* The maximum supplemental volume range is based on a vague idea + * of how much lower the gain will be than the real analog gain. */ + max_add = WEBRTC_SPL_RSHIFT_W32(maxLevel - minLevel, 2); + + /* Minimum/maximum volume level that can be set */ + stt->minLevel = minLevel; + stt->maxAnalog = maxLevel; + stt->maxLevel = maxLevel + max_add; + stt->maxInit = stt->maxLevel; + + stt->zeroCtrlMax = stt->maxAnalog; + + /* Initialize micVol parameter */ + stt->micVol = stt->maxAnalog; + if (stt->agcMode == kAgcModeAdaptiveDigital) + { + stt->micVol = 127; /* Mid-point of mic level */ + } + stt->micRef = stt->micVol; + stt->micGainIdx = 127; +#ifdef MIC_LEVEL_FEEDBACK + stt->numBlocksMicLvlSat = 0; + stt->micLvlSat = 0; +#endif +#ifdef AGC_DEBUG//test log + fprintf(stt->fpt, + "AGC->Init: minLevel = %d, maxAnalog = %d, maxLevel = %d\n", + stt->minLevel, stt->maxAnalog, stt->maxLevel); +#endif + + /* Minimum output volume is 4% higher than the available lowest volume level */ + tmp32 = WEBRTC_SPL_RSHIFT_W32((stt->maxLevel - stt->minLevel) * (WebRtc_Word32)10, 8); + stt->minOutput = (stt->minLevel + tmp32); + + stt->msTooLow = 0; + stt->msTooHigh = 0; + stt->changeToSlowMode = 0; + stt->firstCall = 0; + stt->msZero = 0; + stt->muteGuardMs = 0; + stt->gainTableIdx = 0; + + stt->msecSpeechInnerChange = kMsecSpeechInner; + stt->msecSpeechOuterChange = kMsecSpeechOuter; + + stt->activeSpeech = 0; + stt->Rxx16_LPw32Max = 0; + + stt->vadThreshold = kNormalVadThreshold; + stt->inActive = 0; + + for (i = 0; i < RXX_BUFFER_LEN; i++) + { + stt->Rxx16_vectorw32[i] = (WebRtc_Word32)1000; /* -54dBm0 */ + } + stt->Rxx160w32 = 125 * RXX_BUFFER_LEN; /* (stt->Rxx16_vectorw32[0]>>3) = 125 */ + + stt->Rxx16pos = 0; + stt->Rxx16_LPw32 = (WebRtc_Word32)16284; /* Q(-4) */ + + for (i = 0; i < 5; i++) + { + stt->Rxx16w32_array[0][i] = 0; + } + for (i = 0; i < 20; i++) + { + stt->env[0][i] = 0; + } + stt->inQueue = 0; + +#ifdef MIC_LEVEL_FEEDBACK + stt->targetIdxOffset = 0; +#endif + + WebRtcSpl_MemSetW32(stt->filterState, 0, 8); + + stt->initFlag = kInitCheck; + // Default config settings. + stt->defaultConfig.limiterEnable = kAgcTrue; + stt->defaultConfig.targetLevelDbfs = AGC_DEFAULT_TARGET_LEVEL; + stt->defaultConfig.compressionGaindB = AGC_DEFAULT_COMP_GAIN; + + if (WebRtcAgc_set_config(stt, stt->defaultConfig) == -1) + { + stt->lastError = AGC_UNSPECIFIED_ERROR; + return -1; + } + stt->Rxx160_LPw32 = stt->analogTargetLevel; // Initialize rms value + + stt->lowLevelSignal = 0; + + /* Only positive values are allowed that are not too large */ + if ((minLevel >= maxLevel) || (maxLevel & 0xFC000000)) + { +#ifdef AGC_DEBUG//test log + fprintf(stt->fpt, "minLevel, maxLevel value(s) are invalid\n\n"); +#endif + return -1; + } else + { +#ifdef AGC_DEBUG//test log + fprintf(stt->fpt, "\n"); +#endif + return 0; + } +} + +int WebRtcAgc_Version(WebRtc_Word8 *versionStr, WebRtc_Word16 length) +{ + const WebRtc_Word8 version[] = "AGC 1.7.0"; + const WebRtc_Word16 versionLen = (WebRtc_Word16)strlen(version) + 1; + + if (versionStr == NULL) + { + return -1; + } + + if (versionLen > length) + { + return -1; + } + + strncpy(versionStr, version, versionLen); + return 0; +} diff --git a/src/modules/audio_processing/agc/main/source/analog_agc.h b/src/modules/audio_processing/agc/main/source/analog_agc.h new file mode 100644 index 0000000000..b32ac6581e --- /dev/null +++ b/src/modules/audio_processing/agc/main/source/analog_agc.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_ + +#include "typedefs.h" +#include "gain_control.h" +#include "digital_agc.h" + +//#define AGC_DEBUG +//#define MIC_LEVEL_FEEDBACK +#ifdef AGC_DEBUG +#include <stdio.h> +#endif + +/* Analog Automatic Gain Control variables: + * Constant declarations (inner limits inside which no changes are done) + * In the beginning the range is narrower to widen as soon as the measure + * 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0 + * and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal + * go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm + * The limits are created by running the AGC with a file having the desired + * signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined + * by out=10*log10(in/260537279.7); Set the target level to the average level + * of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in + * Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) ) + */ +#define RXX_BUFFER_LEN 10 + +static const WebRtc_Word16 kMsecSpeechInner = 520; +static const WebRtc_Word16 kMsecSpeechOuter = 340; + +static const WebRtc_Word16 kNormalVadThreshold = 400; + +static const WebRtc_Word16 kAlphaShortTerm = 6; // 1 >> 6 = 0.0156 +static const WebRtc_Word16 kAlphaLongTerm = 10; // 1 >> 10 = 0.000977 + +typedef struct +{ + // Configurable parameters/variables + WebRtc_UWord32 fs; // Sampling frequency + WebRtc_Word16 compressionGaindB; // Fixed gain level in dB + WebRtc_Word16 targetLevelDbfs; // Target level in -dBfs of envelope (default -3) + WebRtc_Word16 agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig) + WebRtc_UWord8 limiterEnable; // Enabling limiter (on/off (default off)) + WebRtcAgc_config_t defaultConfig; + WebRtcAgc_config_t usedConfig; + + // General variables + WebRtc_Word16 initFlag; + WebRtc_Word16 lastError; + + // Target level parameters + // Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7) + WebRtc_Word32 analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs + WebRtc_Word32 startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs + WebRtc_Word32 startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs + WebRtc_Word32 upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs + WebRtc_Word32 lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs + WebRtc_Word32 upperSecondaryLimit;// = RXX_BUFFER_LEN * 2677832; -17 dBfs + WebRtc_Word32 lowerSecondaryLimit;// = RXX_BUFFER_LEN * 267783; -27 dBfs + WebRtc_UWord16 targetIdx; // Table index for corresponding target level +#ifdef MIC_LEVEL_FEEDBACK + WebRtc_UWord16 targetIdxOffset; // Table index offset for level compensation +#endif + WebRtc_Word16 analogTarget; // Digital reference level in ENV scale + + // Analog AGC specific variables + WebRtc_Word32 filterState[8]; // For downsampling wb to nb + WebRtc_Word32 upperLimit; // Upper limit for mic energy + WebRtc_Word32 lowerLimit; // Lower limit for mic energy + WebRtc_Word32 Rxx160w32; // Average energy for one frame + WebRtc_Word32 Rxx16_LPw32; // Low pass filtered subframe energies + WebRtc_Word32 Rxx160_LPw32; // Low pass filtered frame energies + WebRtc_Word32 Rxx16_LPw32Max; // Keeps track of largest energy subframe + WebRtc_Word32 Rxx16_vectorw32[RXX_BUFFER_LEN];// Array with subframe energies + WebRtc_Word32 Rxx16w32_array[2][5];// Energy values of microphone signal + WebRtc_Word32 env[2][10]; // Envelope values of subframes + + WebRtc_Word16 Rxx16pos; // Current position in the Rxx16_vectorw32 + WebRtc_Word16 envSum; // Filtered scaled envelope in subframes + WebRtc_Word16 vadThreshold; // Threshold for VAD decision + WebRtc_Word16 inActive; // Inactive time in milliseconds + WebRtc_Word16 msTooLow; // Milliseconds of speech at a too low level + WebRtc_Word16 msTooHigh; // Milliseconds of speech at a too high level + WebRtc_Word16 changeToSlowMode; // Change to slow mode after some time at target + WebRtc_Word16 firstCall; // First call to the process-function + WebRtc_Word16 msZero; // Milliseconds of zero input + WebRtc_Word16 msecSpeechOuterChange;// Min ms of speech between volume changes + WebRtc_Word16 msecSpeechInnerChange;// Min ms of speech between volume changes + WebRtc_Word16 activeSpeech; // Milliseconds of active speech + WebRtc_Word16 muteGuardMs; // Counter to prevent mute action + WebRtc_Word16 inQueue; // 10 ms batch indicator + + // Microphone level variables + WebRtc_Word32 micRef; // Remember ref. mic level for virtual mic + WebRtc_UWord16 gainTableIdx; // Current position in virtual gain table + WebRtc_Word32 micGainIdx; // Gain index of mic level to increase slowly + WebRtc_Word32 micVol; // Remember volume between frames + WebRtc_Word32 maxLevel; // Max possible vol level, incl dig gain + WebRtc_Word32 maxAnalog; // Maximum possible analog volume level + WebRtc_Word32 maxInit; // Initial value of "max" + WebRtc_Word32 minLevel; // Minimum possible volume level + WebRtc_Word32 minOutput; // Minimum output volume level + WebRtc_Word32 zeroCtrlMax; // Remember max gain => don't amp low input + + WebRtc_Word16 scale; // Scale factor for internal volume levels +#ifdef MIC_LEVEL_FEEDBACK + WebRtc_Word16 numBlocksMicLvlSat; + WebRtc_UWord8 micLvlSat; +#endif + // Structs for VAD and digital_agc + AgcVad_t vadMic; + DigitalAgc_t digitalAgc; + +#ifdef AGC_DEBUG + FILE* fpt; + FILE* agcLog; + WebRtc_Word32 fcount; +#endif + + WebRtc_Word16 lowLevelSignal; +} Agc_t; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_ diff --git a/src/modules/audio_processing/agc/main/source/digital_agc.c b/src/modules/audio_processing/agc/main/source/digital_agc.c new file mode 100644 index 0000000000..2966586e48 --- /dev/null +++ b/src/modules/audio_processing/agc/main/source/digital_agc.c @@ -0,0 +1,780 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* digital_agc.c + * + */ + +#include <string.h> +#ifdef AGC_DEBUG +#include <stdio.h> +#endif +#include "digital_agc.h" +#include "gain_control.h" + +// To generate the gaintable, copy&paste the following lines to a Matlab window: +// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1; +// zeros = 0:31; lvl = 2.^(1-zeros); +// A = -10*log10(lvl) * (CompRatio - 1) / CompRatio; +// B = MaxGain - MinGain; +// gains = round(2^16*10.^(0.05 * (MinGain + B * ( log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / log(1/(1+exp(Knee*B)))))); +// fprintf(1, '\t%i, %i, %i, %i,\n', gains); +// % Matlab code for plotting the gain and input/output level characteristic (copy/paste the following 3 lines): +// in = 10*log10(lvl); out = 20*log10(gains/65536); +// subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input (dB)'); ylabel('Gain (dB)'); +// subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on; xlabel('Input (dB)'); ylabel('Output (dB)'); +// zoom on; + +// Generator table for y=log2(1+e^x) in Q8. +static const WebRtc_UWord16 kGenFuncTable[128] = { + 256, 485, 786, 1126, 1484, 1849, 2217, 2586, + 2955, 3324, 3693, 4063, 4432, 4801, 5171, 5540, + 5909, 6279, 6648, 7017, 7387, 7756, 8125, 8495, + 8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449, + 11819, 12188, 12557, 12927, 13296, 13665, 14035, 14404, + 14773, 15143, 15512, 15881, 16251, 16620, 16989, 17359, + 17728, 18097, 18466, 18836, 19205, 19574, 19944, 20313, + 20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268, + 23637, 24006, 24376, 24745, 25114, 25484, 25853, 26222, + 26592, 26961, 27330, 27700, 28069, 28438, 28808, 29177, + 29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132, + 32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086, + 35456, 35825, 36194, 36564, 36933, 37302, 37672, 38041, + 38410, 38780, 39149, 39518, 39888, 40257, 40626, 40996, + 41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950, + 44320, 44689, 45058, 45428, 45797, 46166, 46536, 46905 +}; + +static const WebRtc_Word16 kAvgDecayTime = 250; // frames; < 3000 + +WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 + WebRtc_Word16 digCompGaindB, // Q0 + WebRtc_Word16 targetLevelDbfs,// Q0 + WebRtc_UWord8 limiterEnable, + WebRtc_Word16 analogTarget) // Q0 +{ + // This function generates the compressor gain table used in the fixed digital part. + WebRtc_UWord32 tmpU32no1, tmpU32no2, absInLevel, logApprox; + WebRtc_Word32 inLevel, limiterLvl; + WebRtc_Word32 tmp32, tmp32no1, tmp32no2, numFIX, den, y32; + const WebRtc_UWord16 kLog10 = 54426; // log2(10) in Q14 + const WebRtc_UWord16 kLog10_2 = 49321; // 10*log10(2) in Q14 + const WebRtc_UWord16 kLogE_1 = 23637; // log2(e) in Q14 + WebRtc_UWord16 constMaxGain; + WebRtc_UWord16 tmpU16, intPart, fracPart; + const WebRtc_Word16 kCompRatio = 3; + const WebRtc_Word16 kSoftLimiterLeft = 1; + WebRtc_Word16 limiterOffset = 0; // Limiter offset + WebRtc_Word16 limiterIdx, limiterLvlX; + WebRtc_Word16 constLinApprox, zeroGainLvl, maxGain, diffGain; + WebRtc_Word16 i, tmp16, tmp16no1; + int zeros, zerosScale; + + // Constants +// kLogE_1 = 23637; // log2(e) in Q14 +// kLog10 = 54426; // log2(10) in Q14 +// kLog10_2 = 49321; // 10*log10(2) in Q14 + + // Calculate maximum digital gain and zero gain level + tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB - analogTarget, kCompRatio - 1); + tmp16no1 = analogTarget - targetLevelDbfs; + tmp16no1 += WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs)); + tmp32no1 = WEBRTC_SPL_MUL_16_16(maxGain, kCompRatio); + zeroGainLvl = digCompGaindB; + zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1), + kCompRatio - 1); + if ((digCompGaindB <= analogTarget) && (limiterEnable)) + { + zeroGainLvl += (analogTarget - digCompGaindB + kSoftLimiterLeft); + limiterOffset = 0; + } + + // Calculate the difference between maximum gain and gain at 0dB0v: + // diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio + // = (compRatio-1)*digCompGaindB/compRatio + tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB, kCompRatio - 1); + diffGain = WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); + if (diffGain < 0) + { + return -1; + } + + // Calculate the limiter level and index: + // limiterLvlX = analogTarget - limiterOffset + // limiterLvl = targetLevelDbfs + limiterOffset/compRatio + limiterLvlX = analogTarget - limiterOffset; + limiterIdx = 2 + + WebRtcSpl_DivW32W16ResW16(WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)limiterLvlX, 13), + WEBRTC_SPL_RSHIFT_U16(kLog10_2, 1)); + tmp16no1 = WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio); + limiterLvl = targetLevelDbfs + tmp16no1; + + // Calculate (through table lookup): + // constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8) + constMaxGain = kGenFuncTable[diffGain]; // in Q8 + + // Calculate a parameter used to approximate the fractional part of 2^x with a + // piecewise linear function in Q14: + // constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14); + constLinApprox = 22817; // in Q14 + + // Calculate a denominator used in the exponential part to convert from dB to linear scale: + // den = 20*constMaxGain (in Q8) + den = WEBRTC_SPL_MUL_16_U16(20, constMaxGain); // in Q8 + + for (i = 0; i < 32; i++) + { + // Calculate scaled input level (compressor): + // inLevel = fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio) + tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(kCompRatio - 1, i - 1); // Q0 + tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14 + inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14 + + // Calculate diffGain-inLevel, to map using the genFuncTable + inLevel = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)diffGain, 14) - inLevel; // Q14 + + // Make calculations on abs(inLevel) and compensate for the sign afterwards. + absInLevel = (WebRtc_UWord32)WEBRTC_SPL_ABS_W32(inLevel); // Q14 + + // LUT with interpolation + intPart = (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_U32(absInLevel, 14); + fracPart = (WebRtc_UWord16)(absInLevel & 0x00003FFF); // extract the fractional part + tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8 + tmpU32no1 = WEBRTC_SPL_UMUL_16_16(tmpU16, fracPart); // Q22 + tmpU32no1 += WEBRTC_SPL_LSHIFT_U32((WebRtc_UWord32)kGenFuncTable[intPart], 14); // Q22 + logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 8); // Q14 + // Compensate for negative exponent using the relation: + // log2(1 + 2^-x) = log2(1 + 2^x) - x + if (inLevel < 0) + { + zeros = WebRtcSpl_NormU32(absInLevel); + zerosScale = 0; + if (zeros < 15) + { + // Not enough space for multiplication + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(absInLevel, 15 - zeros); // Q(zeros-1) + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13) + if (zeros < 9) + { + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 9 - zeros); // Q(zeros+13) + zerosScale = 9 - zeros; + } else + { + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, zeros - 9); // Q22 + } + } else + { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28 + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 6); // Q22 + } + logApprox = 0; + if (tmpU32no2 < tmpU32no1) + { + logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1 - tmpU32no2, 8 - zerosScale); //Q14 + } + } + numFIX = WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_U16(maxGain, constMaxGain), 6); // Q14 + numFIX -= WEBRTC_SPL_MUL_32_16((WebRtc_Word32)logApprox, diffGain); // Q14 + + // Calculate ratio + // Shift numFIX as much as possible + zeros = WebRtcSpl_NormW32(numFIX); + numFIX = WEBRTC_SPL_LSHIFT_W32(numFIX, zeros); // Q(14+zeros) + + // Shift den so we end up in Qy1 + tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 8); // Q(zeros) + if (numFIX < 0) + { + numFIX -= WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1); + } else + { + numFIX += WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1); + } + y32 = WEBRTC_SPL_DIV(numFIX, tmp32no1); // in Q14 + if (limiterEnable && (i < limiterIdx)) + { + tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14 + tmp32 -= WEBRTC_SPL_LSHIFT_W32(limiterLvl, 14); // Q14 + y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20); + } + if (y32 > 39000) + { + tmp32 = WEBRTC_SPL_MUL(y32 >> 1, kLog10) + 4096; // in Q27 + tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 13); // in Q14 + } else + { + tmp32 = WEBRTC_SPL_MUL(y32, kLog10) + 8192; // in Q28 + tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 14); // in Q14 + } + tmp32 += WEBRTC_SPL_LSHIFT_W32(16, 14); // in Q14 (Make sure final output is in Q16) + + // Calculate power + if (tmp32 > 0) + { + intPart = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 14); + fracPart = (WebRtc_UWord16)(tmp32 & 0x00003FFF); // in Q14 + if (WEBRTC_SPL_RSHIFT_W32(fracPart, 13)) + { + tmp16 = WEBRTC_SPL_LSHIFT_W16(2, 14) - constLinApprox; + tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - fracPart; + tmp32no2 = WEBRTC_SPL_MUL_32_16(tmp32no2, tmp16); + tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13); + tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - tmp32no2; + } else + { + tmp16 = constLinApprox - WEBRTC_SPL_LSHIFT_W16(1, 14); + tmp32no2 = WEBRTC_SPL_MUL_32_16(fracPart, tmp16); + tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13); + } + fracPart = (WebRtc_UWord16)tmp32no2; + gainTable[i] = WEBRTC_SPL_LSHIFT_W32(1, intPart) + + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14); + } else + { + gainTable[i] = 0; + } + } + + return 0; +} + +WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode) +{ + + if (agcMode == kAgcModeFixedDigital) + { + // start at minimum to find correct gain faster + stt->capacitorSlow = 0; + } else + { + // start out with 0 dB gain + stt->capacitorSlow = 134217728; // (WebRtc_Word32)(0.125f * 32768.0f * 32768.0f); + } + stt->capacitorFast = 0; + stt->gain = 65536; + stt->gatePrevious = 0; + stt->agcMode = agcMode; +#ifdef AGC_DEBUG + stt->frameCounter = 0; +#endif + + // initialize VADs + WebRtcAgc_InitVad(&stt->vadNearend); + WebRtcAgc_InitVad(&stt->vadFarend); + + return 0; +} + +WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_far, + WebRtc_Word16 nrSamples) +{ + // Check for valid pointer + if (&stt->vadFarend == NULL) + { + return -1; + } + + // VAD for far end + WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples); + + return 0; +} + +WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_near, + const WebRtc_Word16 *in_near_H, WebRtc_Word16 *out, + WebRtc_Word16 *out_H, WebRtc_UWord32 FS, + WebRtc_Word16 lowlevelSignal) +{ + // array for gains (one value per ms, incl start & end) + WebRtc_Word32 gains[11]; + + WebRtc_Word32 out_tmp, tmp32; + WebRtc_Word32 env[10]; + WebRtc_Word32 nrg, max_nrg; + WebRtc_Word32 cur_level; + WebRtc_Word32 gain32, delta; + WebRtc_Word16 logratio; + WebRtc_Word16 lower_thr, upper_thr; + WebRtc_Word16 zeros, zeros_fast, frac; + WebRtc_Word16 decay; + WebRtc_Word16 gate, gain_adj; + WebRtc_Word16 k, n; + WebRtc_Word16 L, L2; // samples/subframe + + // determine number of samples per ms + if (FS == 8000) + { + L = 8; + L2 = 3; + } else if (FS == 16000) + { + L = 16; + L2 = 4; + } else if (FS == 32000) + { + L = 16; + L2 = 4; + } else + { + return -1; + } + + memcpy(out, in_near, 10 * L * sizeof(WebRtc_Word16)); + if (FS == 32000) + { + memcpy(out_H, in_near_H, 10 * L * sizeof(WebRtc_Word16)); + } + // VAD for near end + logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out, L * 10); + + // Account for far end VAD + if (stt->vadFarend.counter > 10) + { + tmp32 = WEBRTC_SPL_MUL_16_16(3, logratio); + logratio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 - stt->vadFarend.logRatio, 2); + } + + // Determine decay factor depending on VAD + // upper_thr = 1.0f; + // lower_thr = 0.25f; + upper_thr = 1024; // Q10 + lower_thr = 0; // Q10 + if (logratio > upper_thr) + { + // decay = -2^17 / DecayTime; -> -65 + decay = -65; + } else if (logratio < lower_thr) + { + decay = 0; + } else + { + // decay = (WebRtc_Word16)(((lower_thr - logratio) + // * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10); + // SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65 + tmp32 = WEBRTC_SPL_MUL_16_16((lower_thr - logratio), 65); + decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 10); + } + + // adjust decay factor for long silence (detected as low standard deviation) + // This is only done in the adaptive modes + if (stt->agcMode != kAgcModeFixedDigital) + { + if (stt->vadNearend.stdLongTerm < 4000) + { + decay = 0; + } else if (stt->vadNearend.stdLongTerm < 8096) + { + // decay = (WebRtc_Word16)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12); + tmp32 = WEBRTC_SPL_MUL_16_16((stt->vadNearend.stdLongTerm - 4000), decay); + decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 12); + } + + if (lowlevelSignal != 0) + { + decay = 0; + } + } +#ifdef AGC_DEBUG + stt->frameCounter++; + fprintf(stt->logFile, "%5.2f\t%d\t%d\t%d\t", (float)(stt->frameCounter) / 100, logratio, decay, stt->vadNearend.stdLongTerm); +#endif + // Find max amplitude per sub frame + // iterate over sub frames + for (k = 0; k < 10; k++) + { + // iterate over samples + max_nrg = 0; + for (n = 0; n < L; n++) + { + nrg = WEBRTC_SPL_MUL_16_16(out[k * L + n], out[k * L + n]); + if (nrg > max_nrg) + { + max_nrg = nrg; + } + } + env[k] = max_nrg; + } + + // Calculate gain per sub frame + gains[0] = stt->gain; + for (k = 0; k < 10; k++) + { + // Fast envelope follower + // decay time = -131000 / -1000 = 131 (ms) + stt->capacitorFast = AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast); + if (env[k] > stt->capacitorFast) + { + stt->capacitorFast = env[k]; + } + // Slow envelope follower + if (env[k] > stt->capacitorSlow) + { + // increase capacitorSlow + stt->capacitorSlow + = AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow), stt->capacitorSlow); + } else + { + // decrease capacitorSlow + stt->capacitorSlow + = AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow); + } + + // use maximum of both capacitors as current level + if (stt->capacitorFast > stt->capacitorSlow) + { + cur_level = stt->capacitorFast; + } else + { + cur_level = stt->capacitorSlow; + } + // Translate signal level into gain, using a piecewise linear approximation + // find number of leading zeros + zeros = WebRtcSpl_NormU32((WebRtc_UWord32)cur_level); + if (cur_level == 0) + { + zeros = 31; + } + tmp32 = (WEBRTC_SPL_LSHIFT_W32(cur_level, zeros) & 0x7FFFFFFF); + frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 19); // Q12 + tmp32 = WEBRTC_SPL_MUL((stt->gainTable[zeros-1] - stt->gainTable[zeros]), frac); + gains[k + 1] = stt->gainTable[zeros] + WEBRTC_SPL_RSHIFT_W32(tmp32, 12); +#ifdef AGC_DEBUG + if (k == 0) + { + fprintf(stt->logFile, "%d\t%d\t%d\t%d\t%d\n", env[0], cur_level, stt->capacitorFast, stt->capacitorSlow, zeros); + } +#endif + } + + // Gate processing (lower gain during absence of speech) + zeros = WEBRTC_SPL_LSHIFT_W16(zeros, 9) - WEBRTC_SPL_RSHIFT_W16(frac, 3); + // find number of leading zeros + zeros_fast = WebRtcSpl_NormU32((WebRtc_UWord32)stt->capacitorFast); + if (stt->capacitorFast == 0) + { + zeros_fast = 31; + } + tmp32 = (WEBRTC_SPL_LSHIFT_W32(stt->capacitorFast, zeros_fast) & 0x7FFFFFFF); + zeros_fast = WEBRTC_SPL_LSHIFT_W16(zeros_fast, 9); + zeros_fast -= (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 22); + + gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm; + + if (gate < 0) + { + stt->gatePrevious = 0; + } else + { + tmp32 = WEBRTC_SPL_MUL_16_16(stt->gatePrevious, 7); + gate = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)gate + tmp32, 3); + stt->gatePrevious = gate; + } + // gate < 0 -> no gate + // gate > 2500 -> max gate + if (gate > 0) + { + if (gate < 2500) + { + gain_adj = WEBRTC_SPL_RSHIFT_W16(2500 - gate, 5); + } else + { + gain_adj = 0; + } + for (k = 0; k < 10; k++) + { + if ((gains[k + 1] - stt->gainTable[0]) > 8388608) + { + // To prevent wraparound + tmp32 = WEBRTC_SPL_RSHIFT_W32((gains[k+1] - stt->gainTable[0]), 8); + tmp32 = WEBRTC_SPL_MUL(tmp32, (178 + gain_adj)); + } else + { + tmp32 = WEBRTC_SPL_MUL((gains[k+1] - stt->gainTable[0]), (178 + gain_adj)); + tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 8); + } + gains[k + 1] = stt->gainTable[0] + tmp32; + } + } + + // Limit gain to avoid overload distortion + for (k = 0; k < 10; k++) + { + // To prevent wrap around + zeros = 10; + if (gains[k + 1] > 47453132) + { + zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]); + } + gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1; + gain32 = WEBRTC_SPL_MUL(gain32, gain32); + // check for overflow + while (AGC_MUL32(WEBRTC_SPL_RSHIFT_W32(env[k], 12) + 1, gain32) + > WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)32767, 2 * (1 - zeros + 10))) + { + // multiply by 253/256 ==> -0.1 dB + if (gains[k + 1] > 8388607) + { + // Prevent wrap around + gains[k + 1] = WEBRTC_SPL_MUL(WEBRTC_SPL_RSHIFT_W32(gains[k+1], 8), 253); + } else + { + gains[k + 1] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(gains[k+1], 253), 8); + } + gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1; + gain32 = WEBRTC_SPL_MUL(gain32, gain32); + } + } + // gain reductions should be done 1 ms earlier than gain increases + for (k = 1; k < 10; k++) + { + if (gains[k] > gains[k + 1]) + { + gains[k] = gains[k + 1]; + } + } + // save start gain for next frame + stt->gain = gains[10]; + + // Apply gain + // handle first sub frame separately + delta = WEBRTC_SPL_LSHIFT_W32(gains[1] - gains[0], (4 - L2)); + gain32 = WEBRTC_SPL_LSHIFT_W32(gains[0], 4); + // iterate over samples + for (n = 0; n < L; n++) + { + // For lower band + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7)); + out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + if (out_tmp > 4095) + { + out[n] = (WebRtc_Word16)32767; + } else if (out_tmp < -4096) + { + out[n] = (WebRtc_Word16)-32768; + } else + { + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32, 4)); + out[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + } + // For higher band + if (FS == 32000) + { + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n], + WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7)); + out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + if (out_tmp > 4095) + { + out_H[n] = (WebRtc_Word16)32767; + } else if (out_tmp < -4096) + { + out_H[n] = (WebRtc_Word16)-32768; + } else + { + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n], + WEBRTC_SPL_RSHIFT_W32(gain32, 4)); + out_H[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + } + } + // + + gain32 += delta; + } + // iterate over subframes + for (k = 1; k < 10; k++) + { + delta = WEBRTC_SPL_LSHIFT_W32(gains[k+1] - gains[k], (4 - L2)); + gain32 = WEBRTC_SPL_LSHIFT_W32(gains[k], 4); + // iterate over samples + for (n = 0; n < L; n++) + { + // For lower band + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[k * L + n], + WEBRTC_SPL_RSHIFT_W32(gain32, 4)); + out[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + // For higher band + if (FS == 32000) + { + tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[k * L + n], + WEBRTC_SPL_RSHIFT_W32(gain32, 4)); + out_H[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + } + gain32 += delta; + } + } + + return 0; +} + +void WebRtcAgc_InitVad(AgcVad_t *state) +{ + WebRtc_Word16 k; + + state->HPstate = 0; // state of high pass filter + state->logRatio = 0; // log( P(active) / P(inactive) ) + // average input level (Q10) + state->meanLongTerm = WEBRTC_SPL_LSHIFT_W16(15, 10); + + // variance of input level (Q8) + state->varianceLongTerm = WEBRTC_SPL_LSHIFT_W32(500, 8); + + state->stdLongTerm = 0; // standard deviation of input level in dB + // short-term average input level (Q10) + state->meanShortTerm = WEBRTC_SPL_LSHIFT_W16(15, 10); + + // short-term variance of input level (Q8) + state->varianceShortTerm = WEBRTC_SPL_LSHIFT_W32(500, 8); + + state->stdShortTerm = 0; // short-term standard deviation of input level in dB + state->counter = 3; // counts updates + for (k = 0; k < 8; k++) + { + // downsampling filter + state->downState[k] = 0; + } +} + +WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state + const WebRtc_Word16 *in, // (i) Speech signal + WebRtc_Word16 nrSamples) // (i) number of samples +{ + WebRtc_Word32 out, nrg, tmp32, tmp32b; + WebRtc_UWord16 tmpU16; + WebRtc_Word16 k, subfr, tmp16; + WebRtc_Word16 buf1[8]; + WebRtc_Word16 buf2[4]; + WebRtc_Word16 HPstate; + WebRtc_Word16 zeros, dB; + WebRtc_Word16 *buf1_ptr; + + // process in 10 sub frames of 1 ms (to save on memory) + nrg = 0; + buf1_ptr = &buf1[0]; + HPstate = state->HPstate; + for (subfr = 0; subfr < 10; subfr++) + { + // downsample to 4 kHz + if (nrSamples == 160) + { + for (k = 0; k < 8; k++) + { + tmp32 = (WebRtc_Word32)in[2 * k] + (WebRtc_Word32)in[2 * k + 1]; + tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 1); + buf1[k] = (WebRtc_Word16)tmp32; + } + in += 16; + + WebRtcSpl_DownsampleBy2(buf1, 8, buf2, state->downState); + } else + { + WebRtcSpl_DownsampleBy2(in, 8, buf2, state->downState); + in += 8; + } + + // high pass filter and compute energy + for (k = 0; k < 4; k++) + { + out = buf2[k] + HPstate; + tmp32 = WEBRTC_SPL_MUL(600, out); + HPstate = (WebRtc_Word16)(WEBRTC_SPL_RSHIFT_W32(tmp32, 10) - buf2[k]); + tmp32 = WEBRTC_SPL_MUL(out, out); + nrg += WEBRTC_SPL_RSHIFT_W32(tmp32, 6); + } + } + state->HPstate = HPstate; + + // find number of leading zeros + if (!(0xFFFF0000 & nrg)) + { + zeros = 16; + } else + { + zeros = 0; + } + if (!(0xFF000000 & (nrg << zeros))) + { + zeros += 8; + } + if (!(0xF0000000 & (nrg << zeros))) + { + zeros += 4; + } + if (!(0xC0000000 & (nrg << zeros))) + { + zeros += 2; + } + if (!(0x80000000 & (nrg << zeros))) + { + zeros += 1; + } + + // energy level (range {-32..30}) (Q10) + dB = WEBRTC_SPL_LSHIFT_W16(15 - zeros, 11); + + // Update statistics + + if (state->counter < kAvgDecayTime) + { + // decay time = AvgDecTime * 10 ms + state->counter++; + } + + // update short-term estimate of mean energy level (Q10) + tmp32 = (WEBRTC_SPL_MUL_16_16(state->meanShortTerm, 15) + (WebRtc_Word32)dB); + state->meanShortTerm = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 4); + + // update short-term estimate of variance in energy level (Q8) + tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12); + tmp32 += WEBRTC_SPL_MUL(state->varianceShortTerm, 15); + state->varianceShortTerm = WEBRTC_SPL_RSHIFT_W32(tmp32, 4); + + // update short-term estimate of standard deviation in energy level (Q10) + tmp32 = WEBRTC_SPL_MUL_16_16(state->meanShortTerm, state->meanShortTerm); + tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceShortTerm, 12) - tmp32; + state->stdShortTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32); + + // update long-term estimate of mean energy level (Q10) + tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->counter) + (WebRtc_Word32)dB; + state->meanLongTerm = WebRtcSpl_DivW32W16ResW16(tmp32, + WEBRTC_SPL_ADD_SAT_W16(state->counter, 1)); + + // update long-term estimate of variance in energy level (Q8) + tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12); + tmp32 += WEBRTC_SPL_MUL(state->varianceLongTerm, state->counter); + state->varianceLongTerm = WebRtcSpl_DivW32W16(tmp32, + WEBRTC_SPL_ADD_SAT_W16(state->counter, 1)); + + // update long-term estimate of standard deviation in energy level (Q10) + tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->meanLongTerm); + tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceLongTerm, 12) - tmp32; + state->stdLongTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32); + + // update voice activity measure (Q10) + tmp16 = WEBRTC_SPL_LSHIFT_W16(3, 12); + tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, (dB - state->meanLongTerm)); + tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm); + tmpU16 = WEBRTC_SPL_LSHIFT_U16((WebRtc_UWord16)13, 12); + tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16); + tmp32 += WEBRTC_SPL_RSHIFT_W32(tmp32b, 10); + + state->logRatio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 6); + + // limit + if (state->logRatio > 2048) + { + state->logRatio = 2048; + } + if (state->logRatio < -2048) + { + state->logRatio = -2048; + } + + return state->logRatio; // Q10 +} diff --git a/src/modules/audio_processing/agc/main/source/digital_agc.h b/src/modules/audio_processing/agc/main/source/digital_agc.h new file mode 100644 index 0000000000..240b220661 --- /dev/null +++ b/src/modules/audio_processing/agc/main/source/digital_agc.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_DIGITAL_AGC_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_DIGITAL_AGC_H_ + +#ifdef AGC_DEBUG +#include <stdio.h> +#endif +#include "typedefs.h" +#include "signal_processing_library.h" + +// the 32 most significant bits of A(19) * B(26) >> 13 +#define AGC_MUL32(A, B) (((B)>>13)*(A) + ( ((0x00001FFF & (B))*(A)) >> 13 )) +// C + the 32 most significant bits of A * B +#define AGC_SCALEDIFF32(A, B, C) ((C) + ((B)>>16)*(A) + ( ((0x0000FFFF & (B))*(A)) >> 16 )) + +typedef struct +{ + WebRtc_Word32 downState[8]; + WebRtc_Word16 HPstate; + WebRtc_Word16 counter; + WebRtc_Word16 logRatio; // log( P(active) / P(inactive) ) (Q10) + WebRtc_Word16 meanLongTerm; // Q10 + WebRtc_Word32 varianceLongTerm; // Q8 + WebRtc_Word16 stdLongTerm; // Q10 + WebRtc_Word16 meanShortTerm; // Q10 + WebRtc_Word32 varianceShortTerm; // Q8 + WebRtc_Word16 stdShortTerm; // Q10 +} AgcVad_t; // total = 54 bytes + +typedef struct +{ + WebRtc_Word32 capacitorSlow; + WebRtc_Word32 capacitorFast; + WebRtc_Word32 gain; + WebRtc_Word32 gainTable[32]; + WebRtc_Word16 gatePrevious; + WebRtc_Word16 agcMode; + AgcVad_t vadNearend; + AgcVad_t vadFarend; +#ifdef AGC_DEBUG + FILE* logFile; + int frameCounter; +#endif +} DigitalAgc_t; + +WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *digitalAgcInst, WebRtc_Word16 agcMode); + +WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *digitalAgcInst, const WebRtc_Word16 *inNear, + const WebRtc_Word16 *inNear_H, WebRtc_Word16 *out, + WebRtc_Word16 *out_H, WebRtc_UWord32 FS, + WebRtc_Word16 lowLevelSignal); + +WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *digitalAgcInst, const WebRtc_Word16 *inFar, + WebRtc_Word16 nrSamples); + +void WebRtcAgc_InitVad(AgcVad_t *vadInst); + +WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *vadInst, // (i) VAD state + const WebRtc_Word16 *in, // (i) Speech signal + WebRtc_Word16 nrSamples); // (i) number of samples + +WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 + WebRtc_Word16 compressionGaindB, // Q0 (in dB) + WebRtc_Word16 targetLevelDbfs,// Q0 (in dB) + WebRtc_UWord8 limiterEnable, WebRtc_Word16 analogTarget); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_ diff --git a/src/modules/audio_processing/main/apm_tests.gyp b/src/modules/audio_processing/main/apm_tests.gyp new file mode 100644 index 0000000000..441abebb49 --- /dev/null +++ b/src/modules/audio_processing/main/apm_tests.gyp @@ -0,0 +1,60 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +{ + 'includes': [ + '../../../common_settings.gypi', + ], + 'targets': [ + { + 'target_name': 'unit_test', + 'type': 'executable', + 'dependencies': [ + 'source/apm.gyp:audio_processing', + '../../../system_wrappers/source/system_wrappers.gyp:system_wrappers', + '../../../common_audio/signal_processing_library/main/source/spl.gyp:spl', + + '../../../../testing/gtest.gyp:gtest', + '../../../../testing/gtest.gyp:gtest_main', + '../../../../third_party/protobuf/protobuf.gyp:protobuf_lite', + ], + 'include_dirs': [ + '../../../../testing/gtest/include', + ], + 'sources': [ + 'test/unit_test/unit_test.cc', + 'test/unit_test/audio_processing_unittest.pb.cc', + 'test/unit_test/audio_processing_unittest.pb.h', + ], + }, + { + 'target_name': 'process_test', + 'type': 'executable', + 'dependencies': [ + 'source/apm.gyp:audio_processing', + '../../../system_wrappers/source/system_wrappers.gyp:system_wrappers', + + '../../../../testing/gtest.gyp:gtest', + '../../../../testing/gtest.gyp:gtest_main', + ], + 'include_dirs': [ + '../../../../testing/gtest/include', + ], + 'sources': [ + 'test/process_test/process_test.cc', + ], + }, + + ], +} + +# Local Variables: +# tab-width:2 +# indent-tabs-mode:nil +# End: +# vim: set expandtab tabstop=2 shiftwidth=2: diff --git a/src/modules/audio_processing/main/interface/audio_processing.h b/src/modules/audio_processing/main/interface/audio_processing.h new file mode 100644 index 0000000000..dc9c2325a5 --- /dev/null +++ b/src/modules/audio_processing/main/interface/audio_processing.h @@ -0,0 +1,564 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_ + +#include "typedefs.h" +#include "module.h" + +namespace webrtc { + +class AudioFrame; +class EchoCancellation; +class EchoControlMobile; +class GainControl; +class HighPassFilter; +class LevelEstimator; +class NoiseSuppression; +class VoiceDetection; + +// The Audio Processing Module (APM) provides a collection of voice processing +// components designed for real-time communications software. +// +// APM operates on two audio streams on a frame-by-frame basis. Frames of the +// primary stream, on which all processing is applied, are passed to +// |ProcessStream()|. Frames of the reverse direction stream, which are used for +// analysis by some components, are passed to |AnalyzeReverseStream()|. On the +// client-side, this will typically be the near-end (capture) and far-end +// (render) streams, respectively. APM should be placed in the signal chain as +// close to the audio hardware abstraction layer (HAL) as possible. +// +// On the server-side, the reverse stream will normally not be used, with +// processing occurring on each incoming stream. +// +// Component interfaces follow a similar pattern and are accessed through +// corresponding getters in APM. All components are disabled at create-time, +// with default settings that are recommended for most situations. New settings +// can be applied without enabling a component. Enabling a component triggers +// memory allocation and initialization to allow it to start processing the +// streams. +// +// Thread safety is provided with the following assumptions to reduce locking +// overhead: +// 1. The stream getters and setters are called from the same thread as +// ProcessStream(). More precisely, stream functions are never called +// concurrently with ProcessStream(). +// 2. Parameter getters are never called concurrently with the corresponding +// setter. +// +// APM accepts only 16-bit linear PCM audio data in frames of 10 ms. Multiple +// channels should be interleaved. +// +// Usage example, omitting error checking: +// AudioProcessing* apm = AudioProcessing::Create(0); +// apm->set_sample_rate_hz(32000); // Super-wideband processing. +// +// // Mono capture and stereo render. +// apm->set_num_channels(1, 1); +// apm->set_num_reverse_channels(2); +// +// apm->high_pass_filter()->Enable(true); +// +// apm->echo_cancellation()->enable_drift_compensation(false); +// apm->echo_cancellation()->Enable(true); +// +// apm->noise_reduction()->set_level(kHighSuppression); +// apm->noise_reduction()->Enable(true); +// +// apm->gain_control()->set_analog_level_limits(0, 255); +// apm->gain_control()->set_mode(kAdaptiveAnalog); +// apm->gain_control()->Enable(true); +// +// apm->voice_detection()->Enable(true); +// +// // Start a voice call... +// +// // ... Render frame arrives bound for the audio HAL ... +// apm->AnalyzeReverseStream(render_frame); +// +// // ... Capture frame arrives from the audio HAL ... +// // Call required set_stream_ functions. +// apm->set_stream_delay_ms(delay_ms); +// apm->gain_control()->set_stream_analog_level(analog_level); +// +// apm->ProcessStream(capture_frame); +// +// // Call required stream_ functions. +// analog_level = apm->gain_control()->stream_analog_level(); +// has_voice = apm->stream_has_voice(); +// +// // Repeate render and capture processing for the duration of the call... +// // Start a new call... +// apm->Initialize(); +// +// // Close the application... +// AudioProcessing::Destroy(apm); +// apm = NULL; +// +class AudioProcessing : public Module { + public: + // Creates a APM instance, with identifier |id|. Use one instance for every + // primary audio stream requiring processing. On the client-side, this would + // typically be one instance for the near-end stream, and additional instances + // for each far-end stream which requires processing. On the server-side, + // this would typically be one instance for every incoming stream. + static AudioProcessing* Create(int id); + + // Destroys a |apm| instance. + static void Destroy(AudioProcessing* apm); + + // Initializes internal states, while retaining all user settings. This + // should be called before beginning to process a new audio stream. However, + // it is not necessary to call before processing the first stream after + // creation. + virtual int Initialize() = 0; + + // Sets the sample |rate| in Hz for both the primary and reverse audio + // streams. 8000, 16000 or 32000 Hz are permitted. + virtual int set_sample_rate_hz(int rate) = 0; + virtual int sample_rate_hz() const = 0; + + // Sets the number of channels for the primary audio stream. Input frames must + // contain a number of channels given by |input_channels|, while output frames + // will be returned with number of channels given by |output_channels|. + virtual int set_num_channels(int input_channels, int output_channels) = 0; + virtual int num_input_channels() const = 0; + virtual int num_output_channels() const = 0; + + // Sets the number of channels for the reverse audio stream. Input frames must + // contain a number of channels given by |channels|. + virtual int set_num_reverse_channels(int channels) = 0; + virtual int num_reverse_channels() const = 0; + + // Processes a 10 ms |frame| of the primary audio stream. On the client-side, + // this is the near-end (or captured) audio. + // + // If needed for enabled functionality, any function with the set_stream_ tag + // must be called prior to processing the current frame. Any getter function + // with the stream_ tag which is needed should be called after processing. + // + // The |_frequencyInHz|, |_audioChannel|, and |_payloadDataLengthInSamples| + // members of |frame| must be valid, and correspond to settings supplied + // to APM. + virtual int ProcessStream(AudioFrame* frame) = 0; + + // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame + // will not be modified. On the client-side, this is the far-end (or to be + // rendered) audio. + // + // It is only necessary to provide this if echo processing is enabled, as the + // reverse stream forms the echo reference signal. It is recommended, but not + // necessary, to provide if gain control is enabled. On the server-side this + // typically will not be used. If you're not sure what to pass in here, + // chances are you don't need to use it. + // + // The |_frequencyInHz|, |_audioChannel|, and |_payloadDataLengthInSamples| + // members of |frame| must be valid. + // + // TODO(ajm): add const to input; requires an implementation fix. + virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; + + // This must be called if and only if echo processing is enabled. + // + // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end + // frame and ProcessStream() receiving a near-end frame containing the + // corresponding echo. On the client-side this can be expressed as + // delay = (t_render - t_analyze) + (t_process - t_capture) + // where, + // - t_analyze is the time a frame is passed to AnalyzeReverseStream() and + // t_render is the time the first sample of the same frame is rendered by + // the audio hardware. + // - t_capture is the time the first sample of a frame is captured by the + // audio hardware and t_pull is the time the same frame is passed to + // ProcessStream(). + virtual int set_stream_delay_ms(int delay) = 0; + virtual int stream_delay_ms() const = 0; + + // Starts recording debugging information to a file specified by |filename|, + // a NULL-terminated string. If there is an ongoing recording, the old file + // will be closed, and recording will continue in the newly specified file. + // An already existing file will be overwritten without warning. + static const int kMaxFilenameSize = 1024; + virtual int StartDebugRecording(const char filename[kMaxFilenameSize]) = 0; + + // Stops recording debugging information, and closes the file. Recording + // cannot be resumed in the same file (without overwriting it). + virtual int StopDebugRecording() = 0; + + // These provide access to the component interfaces and should never return + // NULL. The pointers will be valid for the lifetime of the APM instance. + // The memory for these objects is entirely managed internally. + virtual EchoCancellation* echo_cancellation() const = 0; + virtual EchoControlMobile* echo_control_mobile() const = 0; + virtual GainControl* gain_control() const = 0; + virtual HighPassFilter* high_pass_filter() const = 0; + virtual LevelEstimator* level_estimator() const = 0; + virtual NoiseSuppression* noise_suppression() const = 0; + virtual VoiceDetection* voice_detection() const = 0; + + struct Statistic { + int instant; // Instantaneous value. + int average; // Long-term average. + int maximum; // Long-term maximum. + int minimum; // Long-term minimum. + }; + + // Fatal errors. + enum Errors { + kNoError = 0, + kUnspecifiedError = -1, + kCreationFailedError = -2, + kUnsupportedComponentError = -3, + kUnsupportedFunctionError = -4, + kNullPointerError = -5, + kBadParameterError = -6, + kBadSampleRateError = -7, + kBadDataLengthError = -8, + kBadNumberChannelsError = -9, + kFileError = -10, + kStreamParameterNotSetError = -11, + kNotEnabledError = -12 + }; + + // Warnings are non-fatal. + enum Warnings { + // This results when a set_stream_ parameter is out of range. Processing + // will continue, but the parameter may have been truncated. + kBadStreamParameterWarning = -13, + }; + + // Inherited from Module. + virtual WebRtc_Word32 TimeUntilNextProcess() { return -1; }; + virtual WebRtc_Word32 Process() { return -1; }; + + protected: + virtual ~AudioProcessing() {}; +}; + +// The acoustic echo cancellation (AEC) component provides better performance +// than AECM but also requires more processing power and is dependent on delay +// stability and reporting accuracy. As such it is well-suited and recommended +// for PC and IP phone applications. +// +// Not recommended to be enabled on the server-side. +class EchoCancellation { + public: + // EchoCancellation and EchoControlMobile may not be enabled simultaneously. + // Enabling one will disable the other. + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // Differences in clock speed on the primary and reverse streams can impact + // the AEC performance. On the client-side, this could be seen when different + // render and capture devices are used, particularly with webcams. + // + // This enables a compensation mechanism, and requires that + // |set_device_sample_rate_hz()| and |set_stream_drift_samples()| be called. + virtual int enable_drift_compensation(bool enable) = 0; + virtual bool is_drift_compensation_enabled() const = 0; + + // Provides the sampling rate of the audio devices. It is assumed the render + // and capture devices use the same nominal sample rate. Required if and only + // if drift compensation is enabled. + virtual int set_device_sample_rate_hz(int rate) = 0; + virtual int device_sample_rate_hz() const = 0; + + // Sets the difference between the number of samples rendered and captured by + // the audio devices since the last call to |ProcessStream()|. Must be called + // if and only if drift compensation is enabled, prior to |ProcessStream()|. + virtual int set_stream_drift_samples(int drift) = 0; + virtual int stream_drift_samples() const = 0; + + enum SuppressionLevel { + kLowSuppression, + kModerateSuppression, + kHighSuppression + }; + + // Sets the aggressiveness of the suppressor. A higher level trades off + // double-talk performance for increased echo suppression. + virtual int set_suppression_level(SuppressionLevel level) = 0; + virtual SuppressionLevel suppression_level() const = 0; + + // Returns false if the current frame almost certainly contains no echo + // and true if it _might_ contain echo. + virtual bool stream_has_echo() const = 0; + + // Enables the computation of various echo metrics. These are obtained + // through |GetMetrics()|. + virtual int enable_metrics(bool enable) = 0; + virtual bool are_metrics_enabled() const = 0; + + // Each statistic is reported in dB. + // P_far: Far-end (render) signal power. + // P_echo: Near-end (capture) echo signal power. + // P_out: Signal power at the output of the AEC. + // P_a: Internal signal power at the point before the AEC's non-linear + // processor. + struct Metrics { + // RERL = ERL + ERLE + AudioProcessing::Statistic residual_echo_return_loss; + + // ERL = 10log_10(P_far / P_echo) + AudioProcessing::Statistic echo_return_loss; + + // ERLE = 10log_10(P_echo / P_out) + AudioProcessing::Statistic echo_return_loss_enhancement; + + // (Pre non-linear processing suppression) A_NLP = 10log_10(P_echo / P_a) + AudioProcessing::Statistic a_nlp; + }; + + // TODO(ajm): discuss the metrics update period. + virtual int GetMetrics(Metrics* metrics) = 0; + + protected: + virtual ~EchoCancellation() {}; +}; + +// The acoustic echo control for mobile (AECM) component is a low complexity +// robust option intended for use on mobile devices. +// +// Not recommended to be enabled on the server-side. +class EchoControlMobile { + public: + // EchoCancellation and EchoControlMobile may not be enabled simultaneously. + // Enabling one will disable the other. + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // Recommended settings for particular audio routes. In general, the louder + // the echo is expected to be, the higher this value should be set. The + // preferred setting may vary from device to device. + enum RoutingMode { + kQuietEarpieceOrHeadset, + kEarpiece, + kLoudEarpiece, + kSpeakerphone, + kLoudSpeakerphone + }; + + // Sets echo control appropriate for the audio routing |mode| on the device. + // It can and should be updated during a call if the audio routing changes. + virtual int set_routing_mode(RoutingMode mode) = 0; + virtual RoutingMode routing_mode() const = 0; + + // Comfort noise replaces suppressed background noise to maintain a + // consistent signal level. + virtual int enable_comfort_noise(bool enable) = 0; + virtual bool is_comfort_noise_enabled() const = 0; + + protected: + virtual ~EchoControlMobile() {}; +}; + +// The automatic gain control (AGC) component brings the signal to an +// appropriate range. This is done by applying a digital gain directly and, in +// the analog mode, prescribing an analog gain to be applied at the audio HAL. +// +// Recommended to be enabled on the client-side. +class GainControl { + public: + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // When an analog mode is set, this must be called prior to |ProcessStream()| + // to pass the current analog level from the audio HAL. Must be within the + // range provided to |set_analog_level_limits()|. + virtual int set_stream_analog_level(int level) = 0; + + // When an analog mode is set, this should be called after |ProcessStream()| + // to obtain the recommended new analog level for the audio HAL. It is the + // users responsibility to apply this level. + virtual int stream_analog_level() = 0; + + enum Mode { + // Adaptive mode intended for use if an analog volume control is available + // on the capture device. It will require the user to provide coupling + // between the OS mixer controls and AGC through the |stream_analog_level()| + // functions. + // + // It consists of an analog gain prescription for the audio device and a + // digital compression stage. + kAdaptiveAnalog, + + // Adaptive mode intended for situations in which an analog volume control + // is unavailable. It operates in a similar fashion to the adaptive analog + // mode, but with scaling instead applied in the digital domain. As with + // the analog mode, it additionally uses a digital compression stage. + kAdaptiveDigital, + + // Fixed mode which enables only the digital compression stage also used by + // the two adaptive modes. + // + // It is distinguished from the adaptive modes by considering only a + // short time-window of the input signal. It applies a fixed gain through + // most of the input level range, and compresses (gradually reduces gain + // with increasing level) the input signal at higher levels. This mode is + // preferred on embedded devices where the capture signal level is + // predictable, so that a known gain can be applied. + kFixedDigital + }; + + virtual int set_mode(Mode mode) = 0; + virtual Mode mode() const = 0; + + // Sets the target peak |level| (or envelope) of the AGC in dBFs (decibels + // from digital full-scale). The convention is to use positive values. For + // instance, passing in a value of 3 corresponds to -3 dBFs, or a target + // level 3 dB below full-scale. Limited to [0, 31]. + // + // TODO(ajm): use a negative value here instead, if/when VoE will similarly + // update its interface. + virtual int set_target_level_dbfs(int level) = 0; + virtual int target_level_dbfs() const = 0; + + // Sets the maximum |gain| the digital compression stage may apply, in dB. A + // higher number corresponds to greater compression, while a value of 0 will + // leave the signal uncompressed. Limited to [0, 90]. + virtual int set_compression_gain_db(int gain) = 0; + virtual int compression_gain_db() const = 0; + + // When enabled, the compression stage will hard limit the signal to the + // target level. Otherwise, the signal will be compressed but not limited + // above the target level. + virtual int enable_limiter(bool enable) = 0; + virtual bool is_limiter_enabled() const = 0; + + // Sets the |minimum| and |maximum| analog levels of the audio capture device. + // Must be set if and only if an analog mode is used. Limited to [0, 65535]. + virtual int set_analog_level_limits(int minimum, + int maximum) = 0; + virtual int analog_level_minimum() const = 0; + virtual int analog_level_maximum() const = 0; + + // Returns true if the AGC has detected a saturation event (period where the + // signal reaches digital full-scale) in the current frame and the analog + // level cannot be reduced. + // + // This could be used as an indicator to reduce or disable analog mic gain at + // the audio HAL. + virtual bool stream_is_saturated() const = 0; + + protected: + virtual ~GainControl() {}; +}; + +// A filtering component which removes DC offset and low-frequency noise. +// Recommended to be enabled on the client-side. +class HighPassFilter { + public: + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + protected: + virtual ~HighPassFilter() {}; +}; + +// An estimation component used to retrieve level metrics. +class LevelEstimator { + public: + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // The metrics are reported in dBFs calculated as: + // Level = 10log_10(P_s / P_max) [dBFs], where + // P_s is the signal power and P_max is the maximum possible (or peak) + // power. With 16-bit signals, P_max = (2^15)^2. + struct Metrics { + AudioProcessing::Statistic signal; // Overall signal level. + AudioProcessing::Statistic speech; // Speech level. + AudioProcessing::Statistic noise; // Noise level. + }; + + virtual int GetMetrics(Metrics* metrics, Metrics* reverse_metrics) = 0; + + //virtual int enable_noise_warning(bool enable) = 0; + //bool is_noise_warning_enabled() const = 0; + //virtual bool stream_has_high_noise() const = 0; + + protected: + virtual ~LevelEstimator() {}; +}; + +// The noise suppression (NS) component attempts to remove noise while +// retaining speech. Recommended to be enabled on the client-side. +// +// Recommended to be enabled on the client-side. +class NoiseSuppression { + public: + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // Determines the aggressiveness of the suppression. Increasing the level + // will reduce the noise level at the expense of a higher speech distortion. + enum Level { + kLow, + kModerate, + kHigh, + kVeryHigh + }; + + virtual int set_level(Level level) = 0; + virtual Level level() const = 0; + + protected: + virtual ~NoiseSuppression() {}; +}; + +// The voice activity detection (VAD) component analyzes the stream to +// determine if voice is present. A facility is also provided to pass in an +// external VAD decision. +class VoiceDetection { + public: + virtual int Enable(bool enable) = 0; + virtual bool is_enabled() const = 0; + + // Returns true if voice is detected in the current frame. Should be called + // after |ProcessStream()|. + virtual bool stream_has_voice() const = 0; + + // Some of the APM functionality requires a VAD decision. In the case that + // a decision is externally available for the current frame, it can be passed + // in here, before |ProcessStream()| is called. + // + // VoiceDetection does _not_ need to be enabled to use this. If it happens to + // be enabled, detection will be skipped for any frame in which an external + // VAD decision is provided. + virtual int set_stream_has_voice(bool has_voice) = 0; + + // Specifies the likelihood that a frame will be declared to contain voice. + // A higher value makes it more likely that speech will not be clipped, at + // the expense of more noise being detected as voice. + enum Likelihood { + kVeryLowLikelihood, + kLowLikelihood, + kModerateLikelihood, + kHighLikelihood + }; + + virtual int set_likelihood(Likelihood likelihood) = 0; + virtual Likelihood likelihood() const = 0; + + // Sets the |size| of the frames in ms on which the VAD will operate. Larger + // frames will improve detection accuracy, but reduce the frequency of + // updates. + // + // This does not impact the size of frames passed to |ProcessStream()|. + virtual int set_frame_size_ms(int size) = 0; + virtual int frame_size_ms() const = 0; + + protected: + virtual ~VoiceDetection() {}; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_ diff --git a/src/modules/audio_processing/main/source/Android.mk b/src/modules/audio_processing/main/source/Android.mk new file mode 100644 index 0000000000..634ad6ad4b --- /dev/null +++ b/src/modules/audio_processing/main/source/Android.mk @@ -0,0 +1,75 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_ARM_MODE := arm +LOCAL_MODULE := libwebrtc_apm +LOCAL_MODULE_TAGS := optional +LOCAL_CPP_EXTENSION := .cc +LOCAL_GENERATED_SOURCES := +LOCAL_SRC_FILES := audio_buffer.cc \ + audio_processing_impl.cc \ + echo_cancellation_impl.cc \ + echo_control_mobile_impl.cc \ + gain_control_impl.cc \ + high_pass_filter_impl.cc \ + level_estimator_impl.cc \ + noise_suppression_impl.cc \ + splitting_filter.cc \ + processing_component.cc \ + voice_detection_impl.cc + +# Flags passed to both C and C++ files. +MY_CFLAGS := +MY_CFLAGS_C := +MY_DEFS := '-DNO_TCMALLOC' \ + '-DNO_HEAPCHECKER' \ + '-DWEBRTC_TARGET_PC' \ + '-DWEBRTC_LINUX' \ + '-DWEBRTC_THREAD_RR' \ + '-DWEBRTC_NS_FIXED' +# floating point +# -DWEBRTC_NS_FLOAT' +ifeq ($(TARGET_ARCH),arm) +MY_DEFS += \ + '-DWEBRTC_ANDROID' \ + '-DANDROID' +endif +LOCAL_CFLAGS := $(MY_CFLAGS_C) $(MY_CFLAGS) $(MY_DEFS) + +# Include paths placed before CFLAGS/CPPFLAGS +LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../.. \ + $(LOCAL_PATH)/../interface \ + $(LOCAL_PATH)/../../../interface \ + $(LOCAL_PATH)/../../../../system_wrappers/interface \ + $(LOCAL_PATH)/../../aec/main/interface \ + $(LOCAL_PATH)/../../aecm/main/interface \ + $(LOCAL_PATH)/../../agc/main/interface \ + $(LOCAL_PATH)/../../ns/main/interface \ + $(LOCAL_PATH)/../../../../common_audio/signal_processing_library/main/interface \ + $(LOCAL_PATH)/../../../../common_audio/vad/main/interface + +# Flags passed to only C++ (and not C) files. +LOCAL_CPPFLAGS := + +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := + +LOCAL_SHARED_LIBRARIES := libcutils \ + libdl \ + libstlport + +LOCAL_ADDITIONAL_DEPENDENCIES := + +include external/stlport/libstlport.mk +include $(BUILD_STATIC_LIBRARY) + diff --git a/src/modules/audio_processing/main/source/apm.gyp b/src/modules/audio_processing/main/source/apm.gyp new file mode 100644 index 0000000000..93811c71f9 --- /dev/null +++ b/src/modules/audio_processing/main/source/apm.gyp @@ -0,0 +1,77 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +{ + 'includes': [ + '../../../../common_settings.gypi', # Common settings + ], + 'targets': [ + { + 'target_name': 'audio_processing', + 'type': '<(library)', + 'conditions': [ + ['prefer_fixed_point==1', { + 'dependencies': ['../../ns/main/source/ns.gyp:ns_fix'], + 'defines': ['WEBRTC_NS_FIXED'], + }, { # else: prefer_fixed_point==0 + 'dependencies': ['../../ns/main/source/ns.gyp:ns'], + 'defines': ['WEBRTC_NS_FLOAT'], + }], + ], + 'dependencies': [ + '../../../../system_wrappers/source/system_wrappers.gyp:system_wrappers', + '../../aec/main/source/aec.gyp:aec', + '../../aecm/main/source/aecm.gyp:aecm', + '../../agc/main/source/agc.gyp:agc', + '../../../../common_audio/signal_processing_library/main/source/spl.gyp:spl', + '../../../../common_audio/vad/main/source/vad.gyp:vad', + ], + 'include_dirs': [ + '../interface', + '../../../interface', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../interface', + '../../../interface', + ], + }, + 'sources': [ + '../interface/audio_processing.h', + 'audio_buffer.cc', + 'audio_buffer.h', + 'audio_processing_impl.cc', + 'audio_processing_impl.h', + 'echo_cancellation_impl.cc', + 'echo_cancellation_impl.h', + 'echo_control_mobile_impl.cc', + 'echo_control_mobile_impl.h', + 'gain_control_impl.cc', + 'gain_control_impl.h', + 'high_pass_filter_impl.cc', + 'high_pass_filter_impl.h', + 'level_estimator_impl.cc', + 'level_estimator_impl.h', + 'noise_suppression_impl.cc', + 'noise_suppression_impl.h', + 'splitting_filter.cc', + 'splitting_filter.h', + 'processing_component.cc', + 'processing_component.h', + 'voice_detection_impl.cc', + 'voice_detection_impl.h', + ], + }, + ], +} + +# Local Variables: +# tab-width:2 +# indent-tabs-mode:nil +# End: +# vim: set expandtab tabstop=2 shiftwidth=2: diff --git a/src/modules/audio_processing/main/source/audio_buffer.cc b/src/modules/audio_processing/main/source/audio_buffer.cc new file mode 100644 index 0000000000..6b20fcecee --- /dev/null +++ b/src/modules/audio_processing/main/source/audio_buffer.cc @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio_buffer.h" + +#include "module_common_types.h" + +namespace webrtc { +namespace { + +enum { + kSamplesPer8kHzChannel = 80, + kSamplesPer16kHzChannel = 160, + kSamplesPer32kHzChannel = 320 +}; + +void StereoToMono(const WebRtc_Word16* left, const WebRtc_Word16* right, + WebRtc_Word16* out, int samples_per_channel) { + WebRtc_Word32 data_int32 = 0; + for (int i = 0; i < samples_per_channel; i++) { + data_int32 = (left[i] + right[i]) >> 1; + if (data_int32 > 32767) { + data_int32 = 32767; + } else if (data_int32 < -32768) { + data_int32 = -32768; + } + + out[i] = static_cast<WebRtc_Word16>(data_int32); + } +} +} // namespace + +struct AudioChannel { + AudioChannel() { + memset(data, 0, sizeof(data)); + } + + WebRtc_Word16 data[kSamplesPer32kHzChannel]; +}; + +struct SplitAudioChannel { + SplitAudioChannel() { + memset(low_pass_data, 0, sizeof(low_pass_data)); + memset(high_pass_data, 0, sizeof(high_pass_data)); + memset(analysis_filter_state1, 0, sizeof(analysis_filter_state1)); + memset(analysis_filter_state2, 0, sizeof(analysis_filter_state2)); + memset(synthesis_filter_state1, 0, sizeof(synthesis_filter_state1)); + memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2)); + } + + WebRtc_Word16 low_pass_data[kSamplesPer16kHzChannel]; + WebRtc_Word16 high_pass_data[kSamplesPer16kHzChannel]; + + WebRtc_Word32 analysis_filter_state1[6]; + WebRtc_Word32 analysis_filter_state2[6]; + WebRtc_Word32 synthesis_filter_state1[6]; + WebRtc_Word32 synthesis_filter_state2[6]; +}; + +// TODO(am): check range of input parameters? +AudioBuffer::AudioBuffer(WebRtc_Word32 max_num_channels, + WebRtc_Word32 samples_per_channel) + : max_num_channels_(max_num_channels), + num_channels_(0), + num_mixed_channels_(0), + num_mixed_low_pass_channels_(0), + samples_per_channel_(samples_per_channel), + samples_per_split_channel_(samples_per_channel), + reference_copied_(false), + data_(NULL), + channels_(NULL), + split_channels_(NULL), + mixed_low_pass_channels_(NULL), + low_pass_reference_channels_(NULL) { + if (max_num_channels_ > 1) { + channels_ = new AudioChannel[max_num_channels_]; + mixed_low_pass_channels_ = new AudioChannel[max_num_channels_]; + } + low_pass_reference_channels_ = new AudioChannel[max_num_channels_]; + + if (samples_per_channel_ == kSamplesPer32kHzChannel) { + split_channels_ = new SplitAudioChannel[max_num_channels_]; + samples_per_split_channel_ = kSamplesPer16kHzChannel; + } +} + +AudioBuffer::~AudioBuffer() { + if (channels_ != NULL) { + delete [] channels_; + } + + if (mixed_low_pass_channels_ != NULL) { + delete [] mixed_low_pass_channels_; + } + + if (low_pass_reference_channels_ != NULL) { + delete [] low_pass_reference_channels_; + } + + if (split_channels_ != NULL) { + delete [] split_channels_; + } +} + +WebRtc_Word16* AudioBuffer::data(WebRtc_Word32 channel) const { + assert(channel >= 0 && channel < num_channels_); + if (data_ != NULL) { + return data_; + } + + return channels_[channel].data; +} + +WebRtc_Word16* AudioBuffer::low_pass_split_data(WebRtc_Word32 channel) const { + assert(channel >= 0 && channel < num_channels_); + if (split_channels_ == NULL) { + return data(channel); + } + + return split_channels_[channel].low_pass_data; +} + +WebRtc_Word16* AudioBuffer::high_pass_split_data(WebRtc_Word32 channel) const { + assert(channel >= 0 && channel < num_channels_); + if (split_channels_ == NULL) { + return NULL; + } + + return split_channels_[channel].high_pass_data; +} + +WebRtc_Word16* AudioBuffer::mixed_low_pass_data(WebRtc_Word32 channel) const { + assert(channel >= 0 && channel < num_mixed_low_pass_channels_); + + return mixed_low_pass_channels_[channel].data; +} + +WebRtc_Word16* AudioBuffer::low_pass_reference(WebRtc_Word32 channel) const { + assert(channel >= 0 && channel < num_channels_); + if (!reference_copied_) { + return NULL; + } + + return low_pass_reference_channels_[channel].data; +} + +WebRtc_Word32* AudioBuffer::analysis_filter_state1(WebRtc_Word32 channel) const { + assert(channel >= 0 && channel < num_channels_); + return split_channels_[channel].analysis_filter_state1; +} + +WebRtc_Word32* AudioBuffer::analysis_filter_state2(WebRtc_Word32 channel) const { + assert(channel >= 0 && channel < num_channels_); + return split_channels_[channel].analysis_filter_state2; +} + +WebRtc_Word32* AudioBuffer::synthesis_filter_state1(WebRtc_Word32 channel) const { + assert(channel >= 0 && channel < num_channels_); + return split_channels_[channel].synthesis_filter_state1; +} + +WebRtc_Word32* AudioBuffer::synthesis_filter_state2(WebRtc_Word32 channel) const { + assert(channel >= 0 && channel < num_channels_); + return split_channels_[channel].synthesis_filter_state2; +} + +WebRtc_Word32 AudioBuffer::num_channels() const { + return num_channels_; +} + +WebRtc_Word32 AudioBuffer::samples_per_channel() const { + return samples_per_channel_; +} + +WebRtc_Word32 AudioBuffer::samples_per_split_channel() const { + return samples_per_split_channel_; +} + +// TODO(ajm): Do deinterleaving and mixing in one step? +void AudioBuffer::DeinterleaveFrom(AudioFrame* audioFrame) { + assert(audioFrame->_audioChannel <= max_num_channels_); + assert(audioFrame->_payloadDataLengthInSamples == samples_per_channel_); + + num_channels_ = audioFrame->_audioChannel; + num_mixed_channels_ = 0; + num_mixed_low_pass_channels_ = 0; + reference_copied_ = false; + + if (num_channels_ == 1) { + // We can get away with a pointer assignment in this case. + data_ = audioFrame->_payloadData; + return; + } + + for (int i = 0; i < num_channels_; i++) { + WebRtc_Word16* deinterleaved = channels_[i].data; + WebRtc_Word16* interleaved = audioFrame->_payloadData; + WebRtc_Word32 interleaved_idx = i; + for (int j = 0; j < samples_per_channel_; j++) { + deinterleaved[j] = interleaved[interleaved_idx]; + interleaved_idx += num_channels_; + } + } +} + +void AudioBuffer::InterleaveTo(AudioFrame* audioFrame) const { + assert(audioFrame->_audioChannel == num_channels_); + assert(audioFrame->_payloadDataLengthInSamples == samples_per_channel_); + + if (num_channels_ == 1) { + if (num_mixed_channels_ == 1) { + memcpy(audioFrame->_payloadData, + channels_[0].data, + sizeof(WebRtc_Word16) * samples_per_channel_); + } else { + // These should point to the same buffer in this case. + assert(data_ == audioFrame->_payloadData); + } + + return; + } + + for (int i = 0; i < num_channels_; i++) { + WebRtc_Word16* deinterleaved = channels_[i].data; + WebRtc_Word16* interleaved = audioFrame->_payloadData; + WebRtc_Word32 interleaved_idx = i; + for (int j = 0; j < samples_per_channel_; j++) { + interleaved[interleaved_idx] = deinterleaved[j]; + interleaved_idx += num_channels_; + } + } +} + +// TODO(ajm): would be good to support the no-mix case with pointer assignment. +// TODO(ajm): handle mixing to multiple channels? +void AudioBuffer::Mix(WebRtc_Word32 num_mixed_channels) { + // We currently only support the stereo to mono case. + assert(num_channels_ == 2); + assert(num_mixed_channels == 1); + + StereoToMono(channels_[0].data, + channels_[1].data, + channels_[0].data, + samples_per_channel_); + + num_channels_ = num_mixed_channels; + num_mixed_channels_ = num_mixed_channels; +} + +void AudioBuffer::CopyAndMixLowPass(WebRtc_Word32 num_mixed_channels) { + // We currently only support the stereo to mono case. + assert(num_channels_ == 2); + assert(num_mixed_channels == 1); + + StereoToMono(low_pass_split_data(0), + low_pass_split_data(1), + mixed_low_pass_channels_[0].data, + samples_per_split_channel_); + + num_mixed_low_pass_channels_ = num_mixed_channels; +} + +void AudioBuffer::CopyLowPassToReference() { + reference_copied_ = true; + for (int i = 0; i < num_channels_; i++) { + memcpy(low_pass_reference_channels_[i].data, + low_pass_split_data(i), + sizeof(WebRtc_Word16) * samples_per_split_channel_); + } +} +} // namespace webrtc diff --git a/src/modules/audio_processing/main/source/audio_buffer.h b/src/modules/audio_processing/main/source/audio_buffer.h new file mode 100644 index 0000000000..15f850b67b --- /dev/null +++ b/src/modules/audio_processing/main/source/audio_buffer.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_ + +#include "typedefs.h" + + +namespace webrtc { + +struct AudioChannel; +struct SplitAudioChannel; +class AudioFrame; + +class AudioBuffer { + public: + AudioBuffer(WebRtc_Word32 max_num_channels, WebRtc_Word32 samples_per_channel); + virtual ~AudioBuffer(); + + WebRtc_Word32 num_channels() const; + WebRtc_Word32 samples_per_channel() const; + WebRtc_Word32 samples_per_split_channel() const; + + WebRtc_Word16* data(WebRtc_Word32 channel) const; + WebRtc_Word16* low_pass_split_data(WebRtc_Word32 channel) const; + WebRtc_Word16* high_pass_split_data(WebRtc_Word32 channel) const; + WebRtc_Word16* mixed_low_pass_data(WebRtc_Word32 channel) const; + WebRtc_Word16* low_pass_reference(WebRtc_Word32 channel) const; + + WebRtc_Word32* analysis_filter_state1(WebRtc_Word32 channel) const; + WebRtc_Word32* analysis_filter_state2(WebRtc_Word32 channel) const; + WebRtc_Word32* synthesis_filter_state1(WebRtc_Word32 channel) const; + WebRtc_Word32* synthesis_filter_state2(WebRtc_Word32 channel) const; + + void DeinterleaveFrom(AudioFrame* audioFrame); + void InterleaveTo(AudioFrame* audioFrame) const; + void Mix(WebRtc_Word32 num_mixed_channels); + void CopyAndMixLowPass(WebRtc_Word32 num_mixed_channels); + void CopyLowPassToReference(); + + private: + const WebRtc_Word32 max_num_channels_; + WebRtc_Word32 num_channels_; + WebRtc_Word32 num_mixed_channels_; + WebRtc_Word32 num_mixed_low_pass_channels_; + const WebRtc_Word32 samples_per_channel_; + WebRtc_Word32 samples_per_split_channel_; + bool reference_copied_; + + WebRtc_Word16* data_; + // TODO(ajm): Prefer to make these vectors if permitted... + AudioChannel* channels_; + SplitAudioChannel* split_channels_; + // TODO(ajm): improve this, we don't need the full 32 kHz space here. + AudioChannel* mixed_low_pass_channels_; + AudioChannel* low_pass_reference_channels_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_ diff --git a/src/modules/audio_processing/main/source/audio_processing_impl.cc b/src/modules/audio_processing/main/source/audio_processing_impl.cc new file mode 100644 index 0000000000..6440e36ec4 --- /dev/null +++ b/src/modules/audio_processing/main/source/audio_processing_impl.cc @@ -0,0 +1,636 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "audio_processing_impl.h" + +#include <cassert> + +#include "module_common_types.h" + +#include "critical_section_wrapper.h" +#include "file_wrapper.h" + +#include "audio_buffer.h" +#include "echo_cancellation_impl.h" +#include "echo_control_mobile_impl.h" +#include "high_pass_filter_impl.h" +#include "gain_control_impl.h" +#include "level_estimator_impl.h" +#include "noise_suppression_impl.h" +#include "processing_component.h" +#include "splitting_filter.h" +#include "voice_detection_impl.h" + +namespace webrtc { +namespace { + +enum Events { + kInitializeEvent, + kRenderEvent, + kCaptureEvent +}; + +const char kMagicNumber[] = "#!vqetrace1.2"; +} // namespace + +AudioProcessing* AudioProcessing::Create(int id) { + /*WEBRTC_TRACE(webrtc::kTraceModuleCall, + webrtc::kTraceAudioProcessing, + id, + "AudioProcessing::Create()");*/ + + AudioProcessingImpl* apm = new AudioProcessingImpl(id); + if (apm->Initialize() != kNoError) { + delete apm; + apm = NULL; + } + + return apm; +} + +void AudioProcessing::Destroy(AudioProcessing* apm) { + delete static_cast<AudioProcessingImpl*>(apm); +} + +AudioProcessingImpl::AudioProcessingImpl(int id) + : id_(id), + echo_cancellation_(NULL), + echo_control_mobile_(NULL), + gain_control_(NULL), + high_pass_filter_(NULL), + level_estimator_(NULL), + noise_suppression_(NULL), + voice_detection_(NULL), + debug_file_(FileWrapper::Create()), + crit_(CriticalSectionWrapper::CreateCriticalSection()), + render_audio_(NULL), + capture_audio_(NULL), + sample_rate_hz_(kSampleRate16kHz), + split_sample_rate_hz_(kSampleRate16kHz), + samples_per_channel_(sample_rate_hz_ / 100), + stream_delay_ms_(0), + was_stream_delay_set_(false), + num_render_input_channels_(1), + num_capture_input_channels_(1), + num_capture_output_channels_(1) { + + echo_cancellation_ = new EchoCancellationImpl(this); + component_list_.push_back(echo_cancellation_); + + echo_control_mobile_ = new EchoControlMobileImpl(this); + component_list_.push_back(echo_control_mobile_); + + gain_control_ = new GainControlImpl(this); + component_list_.push_back(gain_control_); + + high_pass_filter_ = new HighPassFilterImpl(this); + component_list_.push_back(high_pass_filter_); + + level_estimator_ = new LevelEstimatorImpl(this); + component_list_.push_back(level_estimator_); + + noise_suppression_ = new NoiseSuppressionImpl(this); + component_list_.push_back(noise_suppression_); + + voice_detection_ = new VoiceDetectionImpl(this); + component_list_.push_back(voice_detection_); +} + +AudioProcessingImpl::~AudioProcessingImpl() { + while (!component_list_.empty()) { + ProcessingComponent* component = component_list_.front(); + component->Destroy(); + delete component; + component_list_.pop_front(); + } + + if (debug_file_->Open()) { + debug_file_->CloseFile(); + } + delete debug_file_; + debug_file_ = NULL; + + delete crit_; + crit_ = NULL; + + if (render_audio_ != NULL) { + delete render_audio_; + render_audio_ = NULL; + } + + if (capture_audio_ != NULL) { + delete capture_audio_; + capture_audio_ = NULL; + } +} + +CriticalSectionWrapper* AudioProcessingImpl::crit() const { + return crit_; +} + +int AudioProcessingImpl::split_sample_rate_hz() const { + return split_sample_rate_hz_; +} + +int AudioProcessingImpl::Initialize() { + CriticalSectionScoped crit_scoped(*crit_); + return InitializeLocked(); +} + +int AudioProcessingImpl::InitializeLocked() { + if (render_audio_ != NULL) { + delete render_audio_; + render_audio_ = NULL; + } + + if (capture_audio_ != NULL) { + delete capture_audio_; + capture_audio_ = NULL; + } + + render_audio_ = new AudioBuffer(num_render_input_channels_, + samples_per_channel_); + capture_audio_ = new AudioBuffer(num_capture_input_channels_, + samples_per_channel_); + + was_stream_delay_set_ = false; + + // Initialize all components. + std::list<ProcessingComponent*>::iterator it; + for (it = component_list_.begin(); it != component_list_.end(); it++) { + int err = (*it)->Initialize(); + if (err != kNoError) { + return err; + } + } + + return kNoError; +} + +int AudioProcessingImpl::set_sample_rate_hz(int rate) { + CriticalSectionScoped crit_scoped(*crit_); + if (rate != kSampleRate8kHz && + rate != kSampleRate16kHz && + rate != kSampleRate32kHz) { + return kBadParameterError; + } + + sample_rate_hz_ = rate; + samples_per_channel_ = rate / 100; + + if (sample_rate_hz_ == kSampleRate32kHz) { + split_sample_rate_hz_ = kSampleRate16kHz; + } else { + split_sample_rate_hz_ = sample_rate_hz_; + } + + return InitializeLocked(); +} + +int AudioProcessingImpl::sample_rate_hz() const { + return sample_rate_hz_; +} + +int AudioProcessingImpl::set_num_reverse_channels(int channels) { + CriticalSectionScoped crit_scoped(*crit_); + // Only stereo supported currently. + if (channels > 2 || channels < 1) { + return kBadParameterError; + } + + num_render_input_channels_ = channels; + + return InitializeLocked(); +} + +int AudioProcessingImpl::num_reverse_channels() const { + return num_render_input_channels_; +} + +int AudioProcessingImpl::set_num_channels( + int input_channels, + int output_channels) { + CriticalSectionScoped crit_scoped(*crit_); + if (output_channels > input_channels) { + return kBadParameterError; + } + + // Only stereo supported currently. + if (input_channels > 2 || input_channels < 1) { + return kBadParameterError; + } + + if (output_channels > 2 || output_channels < 1) { + return kBadParameterError; + } + + num_capture_input_channels_ = input_channels; + num_capture_output_channels_ = output_channels; + + return InitializeLocked(); +} + +int AudioProcessingImpl::num_input_channels() const { + return num_capture_input_channels_; +} + +int AudioProcessingImpl::num_output_channels() const { + return num_capture_output_channels_; +} + +int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { + CriticalSectionScoped crit_scoped(*crit_); + int err = kNoError; + + if (frame == NULL) { + return kNullPointerError; + } + + if (frame->_frequencyInHz != + static_cast<WebRtc_UWord32>(sample_rate_hz_)) { + return kBadSampleRateError; + } + + if (frame->_audioChannel != num_capture_input_channels_) { + return kBadNumberChannelsError; + } + + if (frame->_payloadDataLengthInSamples != samples_per_channel_) { + return kBadDataLengthError; + } + + if (debug_file_->Open()) { + WebRtc_UWord8 event = kCaptureEvent; + if (!debug_file_->Write(&event, sizeof(event))) { + return kFileError; + } + + if (!debug_file_->Write(&frame->_frequencyInHz, + sizeof(frame->_frequencyInHz))) { + return kFileError; + } + + if (!debug_file_->Write(&frame->_audioChannel, + sizeof(frame->_audioChannel))) { + return kFileError; + } + + if (!debug_file_->Write(&frame->_payloadDataLengthInSamples, + sizeof(frame->_payloadDataLengthInSamples))) { + return kFileError; + } + + if (!debug_file_->Write(frame->_payloadData, + sizeof(WebRtc_Word16) * frame->_payloadDataLengthInSamples * + frame->_audioChannel)) { + return kFileError; + } + } + + capture_audio_->DeinterleaveFrom(frame); + + // TODO(ajm): experiment with mixing and AEC placement. + if (num_capture_output_channels_ < num_capture_input_channels_) { + capture_audio_->Mix(num_capture_output_channels_); + + frame->_audioChannel = num_capture_output_channels_; + } + + if (sample_rate_hz_ == kSampleRate32kHz) { + for (int i = 0; i < num_capture_input_channels_; i++) { + // Split into a low and high band. + SplittingFilterAnalysis(capture_audio_->data(i), + capture_audio_->low_pass_split_data(i), + capture_audio_->high_pass_split_data(i), + capture_audio_->analysis_filter_state1(i), + capture_audio_->analysis_filter_state2(i)); + } + } + + err = high_pass_filter_->ProcessCaptureAudio(capture_audio_); + if (err != kNoError) { + return err; + } + + err = gain_control_->AnalyzeCaptureAudio(capture_audio_); + if (err != kNoError) { + return err; + } + + err = echo_cancellation_->ProcessCaptureAudio(capture_audio_); + if (err != kNoError) { + return err; + } + + if (echo_control_mobile_->is_enabled() && + noise_suppression_->is_enabled()) { + capture_audio_->CopyLowPassToReference(); + } + + err = noise_suppression_->ProcessCaptureAudio(capture_audio_); + if (err != kNoError) { + return err; + } + + err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_); + if (err != kNoError) { + return err; + } + + err = voice_detection_->ProcessCaptureAudio(capture_audio_); + if (err != kNoError) { + return err; + } + + err = gain_control_->ProcessCaptureAudio(capture_audio_); + if (err != kNoError) { + return err; + } + + //err = level_estimator_->ProcessCaptureAudio(capture_audio_); + //if (err != kNoError) { + // return err; + //} + + if (sample_rate_hz_ == kSampleRate32kHz) { + for (int i = 0; i < num_capture_output_channels_; i++) { + // Recombine low and high bands. + SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i), + capture_audio_->high_pass_split_data(i), + capture_audio_->data(i), + capture_audio_->synthesis_filter_state1(i), + capture_audio_->synthesis_filter_state2(i)); + } + } + + capture_audio_->InterleaveTo(frame); + + return kNoError; +} + +int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { + CriticalSectionScoped crit_scoped(*crit_); + int err = kNoError; + + if (frame == NULL) { + return kNullPointerError; + } + + if (frame->_frequencyInHz != + static_cast<WebRtc_UWord32>(sample_rate_hz_)) { + return kBadSampleRateError; + } + + if (frame->_audioChannel != num_render_input_channels_) { + return kBadNumberChannelsError; + } + + if (frame->_payloadDataLengthInSamples != samples_per_channel_) { + return kBadDataLengthError; + } + + if (debug_file_->Open()) { + WebRtc_UWord8 event = kRenderEvent; + if (!debug_file_->Write(&event, sizeof(event))) { + return kFileError; + } + + if (!debug_file_->Write(&frame->_frequencyInHz, + sizeof(frame->_frequencyInHz))) { + return kFileError; + } + + if (!debug_file_->Write(&frame->_audioChannel, + sizeof(frame->_audioChannel))) { + return kFileError; + } + + if (!debug_file_->Write(&frame->_payloadDataLengthInSamples, + sizeof(frame->_payloadDataLengthInSamples))) { + return kFileError; + } + + if (!debug_file_->Write(frame->_payloadData, + sizeof(WebRtc_Word16) * frame->_payloadDataLengthInSamples * + frame->_audioChannel)) { + return kFileError; + } + } + + render_audio_->DeinterleaveFrom(frame); + + // TODO(ajm): turn the splitting filter into a component? + if (sample_rate_hz_ == kSampleRate32kHz) { + for (int i = 0; i < num_render_input_channels_; i++) { + // Split into low and high band. + SplittingFilterAnalysis(render_audio_->data(i), + render_audio_->low_pass_split_data(i), + render_audio_->high_pass_split_data(i), + render_audio_->analysis_filter_state1(i), + render_audio_->analysis_filter_state2(i)); + } + } + + // TODO(ajm): warnings possible from components? + err = echo_cancellation_->ProcessRenderAudio(render_audio_); + if (err != kNoError) { + return err; + } + + err = echo_control_mobile_->ProcessRenderAudio(render_audio_); + if (err != kNoError) { + return err; + } + + err = gain_control_->ProcessRenderAudio(render_audio_); + if (err != kNoError) { + return err; + } + + //err = level_estimator_->AnalyzeReverseStream(render_audio_); + //if (err != kNoError) { + // return err; + //} + + was_stream_delay_set_ = false; + return err; // TODO(ajm): this is for returning warnings; necessary? +} + +int AudioProcessingImpl::set_stream_delay_ms(int delay) { + was_stream_delay_set_ = true; + if (delay < 0) { + return kBadParameterError; + } + + // TODO(ajm): the max is rather arbitrarily chosen; investigate. + if (delay > 500) { + stream_delay_ms_ = 500; + return kBadStreamParameterWarning; + } + + stream_delay_ms_ = delay; + return kNoError; +} + +int AudioProcessingImpl::stream_delay_ms() const { + return stream_delay_ms_; +} + +bool AudioProcessingImpl::was_stream_delay_set() const { + return was_stream_delay_set_; +} + +int AudioProcessingImpl::StartDebugRecording( + const char filename[AudioProcessing::kMaxFilenameSize]) { + CriticalSectionScoped crit_scoped(*crit_); + assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize); + + if (filename == NULL) { + return kNullPointerError; + } + + // Stop any ongoing recording. + if (debug_file_->Open()) { + if (debug_file_->CloseFile() == -1) { + return kFileError; + } + } + + if (debug_file_->OpenFile(filename, false) == -1) { + debug_file_->CloseFile(); + return kFileError; + } + + if (debug_file_->WriteText("%s\n", kMagicNumber) == -1) { + debug_file_->CloseFile(); + return kFileError; + } + + // TODO(ajm): should we do this? If so, we need the number of channels etc. + // Record the default sample rate. + WebRtc_UWord8 event = kInitializeEvent; + if (!debug_file_->Write(&event, sizeof(event))) { + return kFileError; + } + + if (!debug_file_->Write(&sample_rate_hz_, sizeof(sample_rate_hz_))) { + return kFileError; + } + + return kNoError; +} + +int AudioProcessingImpl::StopDebugRecording() { + CriticalSectionScoped crit_scoped(*crit_); + // We just return if recording hasn't started. + if (debug_file_->Open()) { + if (debug_file_->CloseFile() == -1) { + return kFileError; + } + } + + return kNoError; +} + +EchoCancellation* AudioProcessingImpl::echo_cancellation() const { + return echo_cancellation_; +} + +EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const { + return echo_control_mobile_; +} + +GainControl* AudioProcessingImpl::gain_control() const { + return gain_control_; +} + +HighPassFilter* AudioProcessingImpl::high_pass_filter() const { + return high_pass_filter_; +} + +LevelEstimator* AudioProcessingImpl::level_estimator() const { + return level_estimator_; +} + +NoiseSuppression* AudioProcessingImpl::noise_suppression() const { + return noise_suppression_; +} + +VoiceDetection* AudioProcessingImpl::voice_detection() const { + return voice_detection_; +} + +WebRtc_Word32 AudioProcessingImpl::Version(WebRtc_Word8* version, + WebRtc_UWord32& bytes_remaining, WebRtc_UWord32& position) const { + if (version == NULL) { + /*WEBRTC_TRACE(webrtc::kTraceError, + webrtc::kTraceAudioProcessing, + -1, + "Null version pointer");*/ + return kNullPointerError; + } + memset(&version[position], 0, bytes_remaining); + + WebRtc_Word8 my_version[] = "AudioProcessing 1.0.0"; + // Includes null termination. + WebRtc_UWord32 length = static_cast<WebRtc_UWord32>(strlen(my_version)); + if (bytes_remaining < length) { + /*WEBRTC_TRACE(webrtc::kTraceError, + webrtc::kTraceAudioProcessing, + -1, + "Buffer of insufficient length");*/ + return kBadParameterError; + } + memcpy(&version[position], my_version, length); + bytes_remaining -= length; + position += length; + + std::list<ProcessingComponent*>::const_iterator it; + for (it = component_list_.begin(); it != component_list_.end(); it++) { + char component_version[256]; + strcpy(component_version, "\n"); + int err = (*it)->get_version(&component_version[1], + sizeof(component_version) - 1); + if (err != kNoError) { + return err; + } + if (strncmp(&component_version[1], "\0", 1) == 0) { + // Assume empty if first byte is NULL. + continue; + } + + length = static_cast<WebRtc_UWord32>(strlen(component_version)); + if (bytes_remaining < length) { + /*WEBRTC_TRACE(webrtc::kTraceError, + webrtc::kTraceAudioProcessing, + -1, + "Buffer of insufficient length");*/ + return kBadParameterError; + } + memcpy(&version[position], component_version, length); + bytes_remaining -= length; + position += length; + } + + return kNoError; +} + +WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) { + CriticalSectionScoped crit_scoped(*crit_); + /*WEBRTC_TRACE(webrtc::kTraceModuleCall, + webrtc::kTraceAudioProcessing, + id_, + "ChangeUniqueId(new id = %d)", + id);*/ + id_ = id; + + return kNoError; +} +} // namespace webrtc diff --git a/src/modules/audio_processing/main/source/audio_processing_impl.h b/src/modules/audio_processing/main/source/audio_processing_impl.h new file mode 100644 index 0000000000..9707bde248 --- /dev/null +++ b/src/modules/audio_processing/main/source/audio_processing_impl.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_ + +#include <list> + +#include "audio_processing.h" + +namespace webrtc { +class CriticalSectionWrapper; +class FileWrapper; + +class AudioBuffer; +class EchoCancellationImpl; +class EchoControlMobileImpl; +class GainControlImpl; +class HighPassFilterImpl; +class LevelEstimatorImpl; +class NoiseSuppressionImpl; +class ProcessingComponent; +class VoiceDetectionImpl; + +class AudioProcessingImpl : public AudioProcessing { + public: + enum { + kSampleRate8kHz = 8000, + kSampleRate16kHz = 16000, + kSampleRate32kHz = 32000 + }; + + explicit AudioProcessingImpl(int id); + virtual ~AudioProcessingImpl(); + + CriticalSectionWrapper* crit() const; + + int split_sample_rate_hz() const; + bool was_stream_delay_set() const; + + // AudioProcessing methods. + virtual int Initialize(); + virtual int InitializeLocked(); + virtual int set_sample_rate_hz(int rate); + virtual int sample_rate_hz() const; + virtual int set_num_channels(int input_channels, int output_channels); + virtual int num_input_channels() const; + virtual int num_output_channels() const; + virtual int set_num_reverse_channels(int channels); + virtual int num_reverse_channels() const; + virtual int ProcessStream(AudioFrame* frame); + virtual int AnalyzeReverseStream(AudioFrame* frame); + virtual int set_stream_delay_ms(int delay); + virtual int stream_delay_ms() const; + virtual int StartDebugRecording(const char filename[kMaxFilenameSize]); + virtual int StopDebugRecording(); + virtual EchoCancellation* echo_cancellation() const; + virtual EchoControlMobile* echo_control_mobile() const; + virtual GainControl* gain_control() const; + virtual HighPassFilter* high_pass_filter() const; + virtual LevelEstimator* level_estimator() const; + virtual NoiseSuppression* noise_suppression() const; + virtual VoiceDetection* voice_detection() const; + + // Module methods. + virtual WebRtc_Word32 Version(WebRtc_Word8* version, + WebRtc_UWord32& remainingBufferInBytes, + WebRtc_UWord32& position) const; + virtual WebRtc_Word32 ChangeUniqueId(const WebRtc_Word32 id); + + private: + int id_; + + EchoCancellationImpl* echo_cancellation_; + EchoControlMobileImpl* echo_control_mobile_; + GainControlImpl* gain_control_; + HighPassFilterImpl* high_pass_filter_; + LevelEstimatorImpl* level_estimator_; + NoiseSuppressionImpl* noise_suppression_; + VoiceDetectionImpl* voice_detection_; + + std::list<ProcessingComponent*> component_list_; + + FileWrapper* debug_file_; + CriticalSectionWrapper* crit_; + + AudioBuffer* render_audio_; + AudioBuffer* capture_audio_; + + int sample_rate_hz_; + int split_sample_rate_hz_; + int samples_per_channel_; + int stream_delay_ms_; + bool was_stream_delay_set_; + + int num_render_input_channels_; + int num_capture_input_channels_; + int num_capture_output_channels_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_ diff --git a/src/modules/audio_processing/main/source/echo_cancellation_impl.cc b/src/modules/audio_processing/main/source/echo_cancellation_impl.cc new file mode 100644 index 0000000000..886d5f158c --- /dev/null +++ b/src/modules/audio_processing/main/source/echo_cancellation_impl.cc @@ -0,0 +1,348 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "echo_cancellation_impl.h" + +#include <cassert> +#include <string.h> + +#include "critical_section_wrapper.h" +#include "echo_cancellation.h" + +#include "audio_processing_impl.h" +#include "audio_buffer.h" + +namespace webrtc { + +typedef void Handle; + +namespace { +WebRtc_Word16 MapSetting(EchoCancellation::SuppressionLevel level) { + switch (level) { + case EchoCancellation::kLowSuppression: + return kAecNlpConservative; + case EchoCancellation::kModerateSuppression: + return kAecNlpModerate; + case EchoCancellation::kHighSuppression: + return kAecNlpAggressive; + default: + return -1; + } +} + +int MapError(int err) { + switch (err) { + case AEC_UNSUPPORTED_FUNCTION_ERROR: + return AudioProcessing::kUnsupportedFunctionError; + break; + case AEC_BAD_PARAMETER_ERROR: + return AudioProcessing::kBadParameterError; + break; + case AEC_BAD_PARAMETER_WARNING: + return AudioProcessing::kBadStreamParameterWarning; + break; + default: + // AEC_UNSPECIFIED_ERROR + // AEC_UNINITIALIZED_ERROR + // AEC_NULL_POINTER_ERROR + return AudioProcessing::kUnspecifiedError; + } +} +} // namespace + +EchoCancellationImpl::EchoCancellationImpl(const AudioProcessingImpl* apm) + : ProcessingComponent(apm), + apm_(apm), + drift_compensation_enabled_(false), + metrics_enabled_(false), + suppression_level_(kModerateSuppression), + device_sample_rate_hz_(48000), + stream_drift_samples_(0), + was_stream_drift_set_(false), + stream_has_echo_(false) {} + +EchoCancellationImpl::~EchoCancellationImpl() {} + +int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) { + if (!is_component_enabled()) { + return apm_->kNoError; + } + + assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_channels() == apm_->num_reverse_channels()); + + int err = apm_->kNoError; + + // The ordering convention must be followed to pass to the correct AEC. + size_t handle_index = 0; + for (int i = 0; i < apm_->num_output_channels(); i++) { + for (int j = 0; j < audio->num_channels(); j++) { + Handle* my_handle = static_cast<Handle*>(handle(handle_index)); + err = WebRtcAec_BufferFarend( + my_handle, + audio->low_pass_split_data(j), + static_cast<WebRtc_Word16>(audio->samples_per_split_channel())); + + if (err != apm_->kNoError) { + return GetHandleError(my_handle); // TODO(ajm): warning possible? + } + + handle_index++; + } + } + + return apm_->kNoError; +} + +int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) { + if (!is_component_enabled()) { + return apm_->kNoError; + } + + if (!apm_->was_stream_delay_set()) { + return apm_->kStreamParameterNotSetError; + } + + if (drift_compensation_enabled_ && !was_stream_drift_set_) { + return apm_->kStreamParameterNotSetError; + } + + assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_channels() == apm_->num_output_channels()); + + int err = apm_->kNoError; + + // The ordering convention must be followed to pass to the correct AEC. + size_t handle_index = 0; + stream_has_echo_ = false; + for (int i = 0; i < audio->num_channels(); i++) { + for (int j = 0; j < apm_->num_reverse_channels(); j++) { + Handle* my_handle = handle(handle_index); + err = WebRtcAec_Process( + my_handle, + audio->low_pass_split_data(i), + audio->high_pass_split_data(i), + audio->low_pass_split_data(i), + audio->high_pass_split_data(i), + static_cast<WebRtc_Word16>(audio->samples_per_split_channel()), + apm_->stream_delay_ms(), + stream_drift_samples_); + + if (err != apm_->kNoError) { + err = GetHandleError(my_handle); + // TODO(ajm): Figure out how to return warnings properly. + if (err != apm_->kBadStreamParameterWarning) { + return err; + } + } + + WebRtc_Word16 status = 0; + err = WebRtcAec_get_echo_status(my_handle, &status); + if (err != apm_->kNoError) { + return GetHandleError(my_handle); + } + + if (status == 1) { + stream_has_echo_ = true; + } + + handle_index++; + } + } + + was_stream_drift_set_ = false; + return apm_->kNoError; +} + +int EchoCancellationImpl::Enable(bool enable) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + // Ensure AEC and AECM are not both enabled. + if (enable && apm_->echo_control_mobile()->is_enabled()) { + return apm_->kBadParameterError; + } + + return EnableComponent(enable); +} + +bool EchoCancellationImpl::is_enabled() const { + return is_component_enabled(); +} + +int EchoCancellationImpl::set_suppression_level(SuppressionLevel level) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + if (MapSetting(level) == -1) { + return apm_->kBadParameterError; + } + + suppression_level_ = level; + return Configure(); +} + +EchoCancellation::SuppressionLevel EchoCancellationImpl::suppression_level() + const { + return suppression_level_; +} + +int EchoCancellationImpl::enable_drift_compensation(bool enable) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + drift_compensation_enabled_ = enable; + return Configure(); +} + +bool EchoCancellationImpl::is_drift_compensation_enabled() const { + return drift_compensation_enabled_; +} + +int EchoCancellationImpl::set_device_sample_rate_hz(int rate) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + if (rate < 8000 || rate > 96000) { + return apm_->kBadParameterError; + } + + device_sample_rate_hz_ = rate; + return Initialize(); +} + +int EchoCancellationImpl::device_sample_rate_hz() const { + return device_sample_rate_hz_; +} + +int EchoCancellationImpl::set_stream_drift_samples(int drift) { + was_stream_drift_set_ = true; + stream_drift_samples_ = drift; + return apm_->kNoError; +} + +int EchoCancellationImpl::stream_drift_samples() const { + return stream_drift_samples_; +} + +int EchoCancellationImpl::enable_metrics(bool enable) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + metrics_enabled_ = enable; + return Configure(); +} + +bool EchoCancellationImpl::are_metrics_enabled() const { + return metrics_enabled_; +} + +// TODO(ajm): we currently just use the metrics from the first AEC. Think more +// aboue the best way to extend this to multi-channel. +int EchoCancellationImpl::GetMetrics(Metrics* metrics) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + if (metrics == NULL) { + return apm_->kNullPointerError; + } + + if (!is_component_enabled() || !metrics_enabled_) { + return apm_->kNotEnabledError; + } + + AecMetrics my_metrics; + memset(&my_metrics, 0, sizeof(my_metrics)); + memset(metrics, 0, sizeof(Metrics)); + + Handle* my_handle = static_cast<Handle*>(handle(0)); + int err = WebRtcAec_GetMetrics(my_handle, &my_metrics); + if (err != apm_->kNoError) { + return GetHandleError(my_handle); + } + + metrics->residual_echo_return_loss.instant = my_metrics.rerl.instant; + metrics->residual_echo_return_loss.average = my_metrics.rerl.average; + metrics->residual_echo_return_loss.maximum = my_metrics.rerl.max; + metrics->residual_echo_return_loss.minimum = my_metrics.rerl.min; + + metrics->echo_return_loss.instant = my_metrics.erl.instant; + metrics->echo_return_loss.average = my_metrics.erl.average; + metrics->echo_return_loss.maximum = my_metrics.erl.max; + metrics->echo_return_loss.minimum = my_metrics.erl.min; + + metrics->echo_return_loss_enhancement.instant = my_metrics.erle.instant; + metrics->echo_return_loss_enhancement.average = my_metrics.erle.average; + metrics->echo_return_loss_enhancement.maximum = my_metrics.erle.max; + metrics->echo_return_loss_enhancement.minimum = my_metrics.erle.min; + + metrics->a_nlp.instant = my_metrics.aNlp.instant; + metrics->a_nlp.average = my_metrics.aNlp.average; + metrics->a_nlp.maximum = my_metrics.aNlp.max; + metrics->a_nlp.minimum = my_metrics.aNlp.min; + + return apm_->kNoError; +} + +bool EchoCancellationImpl::stream_has_echo() const { + return stream_has_echo_; +} + +int EchoCancellationImpl::Initialize() { + int err = ProcessingComponent::Initialize(); + if (err != apm_->kNoError || !is_component_enabled()) { + return err; + } + + was_stream_drift_set_ = false; + + return apm_->kNoError; +} + +int EchoCancellationImpl::get_version(char* version, + int version_len_bytes) const { + if (WebRtcAec_get_version(version, version_len_bytes) != 0) { + return apm_->kBadParameterError; + } + + return apm_->kNoError; +} + +void* EchoCancellationImpl::CreateHandle() const { + Handle* handle = NULL; + if (WebRtcAec_Create(&handle) != apm_->kNoError) { + handle = NULL; + } else { + assert(handle != NULL); + } + + return handle; +} + +int EchoCancellationImpl::DestroyHandle(void* handle) const { + assert(handle != NULL); + return WebRtcAec_Free(static_cast<Handle*>(handle)); +} + +int EchoCancellationImpl::InitializeHandle(void* handle) const { + assert(handle != NULL); + return WebRtcAec_Init(static_cast<Handle*>(handle), + apm_->sample_rate_hz(), + device_sample_rate_hz_); +} + +int EchoCancellationImpl::ConfigureHandle(void* handle) const { + assert(handle != NULL); + AecConfig config; + config.metricsMode = metrics_enabled_; + config.nlpMode = MapSetting(suppression_level_); + config.skewMode = drift_compensation_enabled_; + + return WebRtcAec_set_config(static_cast<Handle*>(handle), config); +} + +int EchoCancellationImpl::num_handles_required() const { + return apm_->num_output_channels() * + apm_->num_reverse_channels(); +} + +int EchoCancellationImpl::GetHandleError(void* handle) const { + assert(handle != NULL); + return MapError(WebRtcAec_get_error_code(static_cast<Handle*>(handle))); +} +} // namespace webrtc diff --git a/src/modules/audio_processing/main/source/echo_cancellation_impl.h b/src/modules/audio_processing/main/source/echo_cancellation_impl.h new file mode 100644 index 0000000000..380a69849f --- /dev/null +++ b/src/modules/audio_processing/main/source/echo_cancellation_impl.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_ + +#include "audio_processing.h" +#include "processing_component.h" + +namespace webrtc { +class AudioProcessingImpl; +class AudioBuffer; + +class EchoCancellationImpl : public EchoCancellation, + public ProcessingComponent { + public: + explicit EchoCancellationImpl(const AudioProcessingImpl* apm); + virtual ~EchoCancellationImpl(); + + int ProcessRenderAudio(const AudioBuffer* audio); + int ProcessCaptureAudio(AudioBuffer* audio); + + // EchoCancellation implementation. + virtual bool is_enabled() const; + + // ProcessingComponent implementation. + virtual int Initialize(); + virtual int get_version(char* version, int version_len_bytes) const; + + private: + // EchoCancellation implementation. + virtual int Enable(bool enable); + virtual int enable_drift_compensation(bool enable); + virtual bool is_drift_compensation_enabled() const; + virtual int set_device_sample_rate_hz(int rate); + virtual int device_sample_rate_hz() const; + virtual int set_stream_drift_samples(int drift); + virtual int stream_drift_samples() const; + virtual int set_suppression_level(SuppressionLevel level); + virtual SuppressionLevel suppression_level() const; + virtual int enable_metrics(bool enable); + virtual bool are_metrics_enabled() const; + virtual bool stream_has_echo() const; + virtual int GetMetrics(Metrics* metrics); + + // ProcessingComponent implementation. + virtual void* CreateHandle() const; + virtual int InitializeHandle(void* handle) const; + virtual int ConfigureHandle(void* handle) const; + virtual int DestroyHandle(void* handle) const; + virtual int num_handles_required() const; + virtual int GetHandleError(void* handle) const; + + const AudioProcessingImpl* apm_; + bool drift_compensation_enabled_; + bool metrics_enabled_; + SuppressionLevel suppression_level_; + int device_sample_rate_hz_; + int stream_drift_samples_; + bool was_stream_drift_set_; + bool stream_has_echo_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_ diff --git a/src/modules/audio_processing/main/source/echo_control_mobile_impl.cc b/src/modules/audio_processing/main/source/echo_control_mobile_impl.cc new file mode 100644 index 0000000000..1cd2502e2f --- /dev/null +++ b/src/modules/audio_processing/main/source/echo_control_mobile_impl.cc @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "echo_control_mobile_impl.h" + +#include <cassert> + +#include "critical_section_wrapper.h" +#include "echo_control_mobile.h" + +#include "audio_processing_impl.h" +#include "audio_buffer.h" + +namespace webrtc { + +typedef void Handle; + +namespace { +WebRtc_Word16 MapSetting(EchoControlMobile::RoutingMode mode) { + switch (mode) { + case EchoControlMobile::kQuietEarpieceOrHeadset: + return 0; + case EchoControlMobile::kEarpiece: + return 1; + case EchoControlMobile::kLoudEarpiece: + return 2; + case EchoControlMobile::kSpeakerphone: + return 3; + case EchoControlMobile::kLoudSpeakerphone: + return 4; + default: + return -1; + } +} + +int MapError(int err) { + switch (err) { + case AECM_UNSUPPORTED_FUNCTION_ERROR: + return AudioProcessing::kUnsupportedFunctionError; + case AECM_BAD_PARAMETER_ERROR: + return AudioProcessing::kBadParameterError; + case AECM_BAD_PARAMETER_WARNING: + return AudioProcessing::kBadStreamParameterWarning; + default: + // AECMOBFIX_UNSPECIFIED_ERROR + // AECMOBFIX_UNINITIALIZED_ERROR + // AECMOBFIX_NULL_POINTER_ERROR + return AudioProcessing::kUnspecifiedError; + } +} +} // namespace + +EchoControlMobileImpl::EchoControlMobileImpl(const AudioProcessingImpl* apm) + : ProcessingComponent(apm), + apm_(apm), + routing_mode_(kSpeakerphone), + comfort_noise_enabled_(true) {} + +EchoControlMobileImpl::~EchoControlMobileImpl() {} + +int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) { + if (!is_component_enabled()) { + return apm_->kNoError; + } + + assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_channels() == apm_->num_reverse_channels()); + + int err = apm_->kNoError; + + // The ordering convention must be followed to pass to the correct AECM. + size_t handle_index = 0; + for (int i = 0; i < apm_->num_output_channels(); i++) { + for (int j = 0; j < audio->num_channels(); j++) { + Handle* my_handle = static_cast<Handle*>(handle(handle_index)); + err = WebRtcAecm_BufferFarend( + my_handle, + audio->low_pass_split_data(j), + static_cast<WebRtc_Word16>(audio->samples_per_split_channel())); + + if (err != apm_->kNoError) { + return GetHandleError(my_handle); // TODO(ajm): warning possible? + } + + handle_index++; + } + } + + return apm_->kNoError; +} + +int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) { + if (!is_component_enabled()) { + return apm_->kNoError; + } + + if (!apm_->was_stream_delay_set()) { + return apm_->kStreamParameterNotSetError; + } + + assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_channels() == apm_->num_output_channels()); + + int err = apm_->kNoError; + + // The ordering convention must be followed to pass to the correct AECM. + size_t handle_index = 0; + for (int i = 0; i < audio->num_channels(); i++) { + // TODO(ajm): improve how this works, possibly inside AECM. + // This is kind of hacked up. + WebRtc_Word16* noisy = audio->low_pass_reference(i); + WebRtc_Word16* clean = audio->low_pass_split_data(i); + if (noisy == NULL) { + noisy = clean; + clean = NULL; + } + for (int j = 0; j < apm_->num_reverse_channels(); j++) { + Handle* my_handle = static_cast<Handle*>(handle(handle_index)); + err = WebRtcAecm_Process( + my_handle, + noisy, + clean, + audio->low_pass_split_data(i), + static_cast<WebRtc_Word16>(audio->samples_per_split_channel()), + apm_->stream_delay_ms()); + + if (err != apm_->kNoError) { + return GetHandleError(my_handle); // TODO(ajm): warning possible? + } + + handle_index++; + } + } + + return apm_->kNoError; +} + +int EchoControlMobileImpl::Enable(bool enable) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + // Ensure AEC and AECM are not both enabled. + if (enable && apm_->echo_cancellation()->is_enabled()) { + return apm_->kBadParameterError; + } + + return EnableComponent(enable); +} + +bool EchoControlMobileImpl::is_enabled() const { + return is_component_enabled(); +} + +int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + if (MapSetting(mode) == -1) { + return apm_->kBadParameterError; + } + + routing_mode_ = mode; + return Configure(); +} + +EchoControlMobile::RoutingMode EchoControlMobileImpl::routing_mode() + const { + return routing_mode_; +} + +int EchoControlMobileImpl::enable_comfort_noise(bool enable) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + comfort_noise_enabled_ = enable; + return Configure(); +} + +bool EchoControlMobileImpl::is_comfort_noise_enabled() const { + return comfort_noise_enabled_; +} + +int EchoControlMobileImpl::Initialize() { + if (!is_component_enabled()) { + return apm_->kNoError; + } + + if (apm_->sample_rate_hz() == apm_->kSampleRate32kHz) { + // AECM doesn't support super-wideband. + return apm_->kBadSampleRateError; + } + + return ProcessingComponent::Initialize(); +} + +int EchoControlMobileImpl::get_version(char* version, + int version_len_bytes) const { + if (WebRtcAecm_get_version(version, version_len_bytes) != 0) { + return apm_->kBadParameterError; + } + + return apm_->kNoError; +} + +void* EchoControlMobileImpl::CreateHandle() const { + Handle* handle = NULL; + if (WebRtcAecm_Create(&handle) != apm_->kNoError) { + handle = NULL; + } else { + assert(handle != NULL); + } + + return handle; +} + +int EchoControlMobileImpl::DestroyHandle(void* handle) const { + return WebRtcAecm_Free(static_cast<Handle*>(handle)); +} + +int EchoControlMobileImpl::InitializeHandle(void* handle) const { + return WebRtcAecm_Init(static_cast<Handle*>(handle), + apm_->sample_rate_hz(), + 48000); // Dummy value. This isn't actually + // required by AECM. +} + +int EchoControlMobileImpl::ConfigureHandle(void* handle) const { + AecmConfig config; + config.cngMode = comfort_noise_enabled_; + config.echoMode = MapSetting(routing_mode_); + + return WebRtcAecm_set_config(static_cast<Handle*>(handle), config); +} + +int EchoControlMobileImpl::num_handles_required() const { + return apm_->num_output_channels() * + apm_->num_reverse_channels(); +} + +int EchoControlMobileImpl::GetHandleError(void* handle) const { + assert(handle != NULL); + return MapError(WebRtcAecm_get_error_code(static_cast<Handle*>(handle))); +} +} // namespace webrtc diff --git a/src/modules/audio_processing/main/source/echo_control_mobile_impl.h b/src/modules/audio_processing/main/source/echo_control_mobile_impl.h new file mode 100644 index 0000000000..2fd624810a --- /dev/null +++ b/src/modules/audio_processing/main/source/echo_control_mobile_impl.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_ + +#include "audio_processing.h" +#include "processing_component.h" + +namespace webrtc { +class AudioProcessingImpl; +class AudioBuffer; + +class EchoControlMobileImpl : public EchoControlMobile, + public ProcessingComponent { + public: + explicit EchoControlMobileImpl(const AudioProcessingImpl* apm); + virtual ~EchoControlMobileImpl(); + + int ProcessRenderAudio(const AudioBuffer* audio); + int ProcessCaptureAudio(AudioBuffer* audio); + + // EchoControlMobile implementation. + virtual bool is_enabled() const; + + // ProcessingComponent implementation. + virtual int Initialize(); + virtual int get_version(char* version, int version_len_bytes) const; + + private: + // EchoControlMobile implementation. + virtual int Enable(bool enable); + virtual int set_routing_mode(RoutingMode mode); + virtual RoutingMode routing_mode() const; + virtual int enable_comfort_noise(bool enable); + virtual bool is_comfort_noise_enabled() const; + + // ProcessingComponent implementation. + virtual void* CreateHandle() const; + virtual int InitializeHandle(void* handle) const; + virtual int ConfigureHandle(void* handle) const; + virtual int DestroyHandle(void* handle) const; + virtual int num_handles_required() const; + virtual int GetHandleError(void* handle) const; + + const AudioProcessingImpl* apm_; + RoutingMode routing_mode_; + bool comfort_noise_enabled_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_ diff --git a/src/modules/audio_processing/main/source/gain_control_impl.cc b/src/modules/audio_processing/main/source/gain_control_impl.cc new file mode 100644 index 0000000000..dc3e565589 --- /dev/null +++ b/src/modules/audio_processing/main/source/gain_control_impl.cc @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "gain_control_impl.h" + +#include <cassert> + +#include "critical_section_wrapper.h" +#include "gain_control.h" + +#include "audio_processing_impl.h" +#include "audio_buffer.h" + +namespace webrtc { + +typedef void Handle; + +/*template <class T> +class GainControlHandle : public ComponentHandle<T> { + public: + GainControlHandle(); + virtual ~GainControlHandle(); + + virtual int Create(); + virtual T* ptr() const; + + private: + T* handle; +};*/ + +namespace { +WebRtc_Word16 MapSetting(GainControl::Mode mode) { + switch (mode) { + case GainControl::kAdaptiveAnalog: + return kAgcModeAdaptiveAnalog; + break; + case GainControl::kAdaptiveDigital: + return kAgcModeAdaptiveDigital; + break; + case GainControl::kFixedDigital: + return kAgcModeFixedDigital; + break; + default: + return -1; + } +} +} // namespace + +GainControlImpl::GainControlImpl(const AudioProcessingImpl* apm) + : ProcessingComponent(apm), + apm_(apm), + mode_(kAdaptiveAnalog), + minimum_capture_level_(0), + maximum_capture_level_(255), + limiter_enabled_(true), + target_level_dbfs_(3), + compression_gain_db_(9), + analog_capture_level_(0), + was_analog_level_set_(false), + stream_is_saturated_(false) {} + +GainControlImpl::~GainControlImpl() {} + +int GainControlImpl::ProcessRenderAudio(AudioBuffer* audio) { + if (!is_component_enabled()) { + return apm_->kNoError; + } + + assert(audio->samples_per_split_channel() <= 160); + + WebRtc_Word16* mixed_data = audio->low_pass_split_data(0); + if (audio->num_channels() > 1) { + audio->CopyAndMixLowPass(1); + mixed_data = audio->mixed_low_pass_data(0); + } + + for (int i = 0; i < num_handles(); i++) { + Handle* my_handle = static_cast<Handle*>(handle(i)); + int err = WebRtcAgc_AddFarend( + my_handle, + mixed_data, + static_cast<WebRtc_Word16>(audio->samples_per_split_channel())); + + if (err != apm_->kNoError) { + return GetHandleError(my_handle); + } + } + + return apm_->kNoError; +} + +int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { + if (!is_component_enabled()) { + return apm_->kNoError; + } + + assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_channels() == num_handles()); + + int err = apm_->kNoError; + + if (mode_ == kAdaptiveAnalog) { + for (int i = 0; i < num_handles(); i++) { + Handle* my_handle = static_cast<Handle*>(handle(i)); + err = WebRtcAgc_AddMic( + my_handle, + audio->low_pass_split_data(i), + audio->high_pass_split_data(i), + static_cast<WebRtc_Word16>(audio->samples_per_split_channel())); + + if (err != apm_->kNoError) { + return GetHandleError(my_handle); + } + } + } else if (mode_ == kAdaptiveDigital) { + + for (int i = 0; i < num_handles(); i++) { + Handle* my_handle = static_cast<Handle*>(handle(i)); + WebRtc_Word32 capture_level_out = 0; + + err = WebRtcAgc_VirtualMic( + my_handle, + audio->low_pass_split_data(i), + audio->high_pass_split_data(i), + static_cast<WebRtc_Word16>(audio->samples_per_split_channel()), + //capture_levels_[i], + analog_capture_level_, + &capture_level_out); + + capture_levels_[i] = capture_level_out; + + if (err != apm_->kNoError) { + return GetHandleError(my_handle); + } + + } + } + + return apm_->kNoError; +} + +int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) { + if (!is_component_enabled()) { + return apm_->kNoError; + } + + if (mode_ == kAdaptiveAnalog && !was_analog_level_set_) { + return apm_->kStreamParameterNotSetError; + } + + assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_channels() == num_handles()); + + stream_is_saturated_ = false; + for (int i = 0; i < num_handles(); i++) { + Handle* my_handle = static_cast<Handle*>(handle(i)); + WebRtc_Word32 capture_level_out = 0; + WebRtc_UWord8 saturation_warning = 0; + + int err = WebRtcAgc_Process( + my_handle, + audio->low_pass_split_data(i), + audio->high_pass_split_data(i), + static_cast<WebRtc_Word16>(audio->samples_per_split_channel()), + audio->low_pass_split_data(i), + audio->high_pass_split_data(i), + capture_levels_[i], + &capture_level_out, + apm_->echo_cancellation()->stream_has_echo(), + &saturation_warning); + + if (err != apm_->kNoError) { + return GetHandleError(my_handle); + } + + capture_levels_[i] = capture_level_out; + if (saturation_warning == 1) { + stream_is_saturated_ = true; + } + } + + if (mode_ == kAdaptiveAnalog) { + // Take the analog level to be the average across the handles. + analog_capture_level_ = 0; + for (int i = 0; i < num_handles(); i++) { + analog_capture_level_ += capture_levels_[i]; + } + + analog_capture_level_ /= num_handles(); + } + + was_analog_level_set_ = false; + return apm_->kNoError; +} + +// TODO(ajm): ensure this is called under kAdaptiveAnalog. +int GainControlImpl::set_stream_analog_level(int level) { + was_analog_level_set_ = true; + if (level < minimum_capture_level_ || level > maximum_capture_level_) { + return apm_->kBadParameterError; + } + + if (mode_ == kAdaptiveAnalog) { + if (level != analog_capture_level_) { + // The analog level has been changed; update our internal levels. + capture_levels_.assign(num_handles(), level); + } + } + analog_capture_level_ = level; + + return apm_->kNoError; +} + +int GainControlImpl::stream_analog_level() { + // TODO(ajm): enable this assertion? + //assert(mode_ == kAdaptiveAnalog); + + return analog_capture_level_; +} + +int GainControlImpl::Enable(bool enable) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + return EnableComponent(enable); +} + +bool GainControlImpl::is_enabled() const { + return is_component_enabled(); +} + +int GainControlImpl::set_mode(Mode mode) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + if (MapSetting(mode) == -1) { + return apm_->kBadParameterError; + } + + mode_ = mode; + return Initialize(); +} + +GainControl::Mode GainControlImpl::mode() const { + return mode_; +} + +int GainControlImpl::set_analog_level_limits(int minimum, + int maximum) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + if (minimum < 0) { + return apm_->kBadParameterError; + } + + if (maximum > 65535) { + return apm_->kBadParameterError; + } + + if (maximum < minimum) { + return apm_->kBadParameterError; + } + + minimum_capture_level_ = minimum; + maximum_capture_level_ = maximum; + + return Initialize(); +} + +int GainControlImpl::analog_level_minimum() const { + return minimum_capture_level_; +} + +int GainControlImpl::analog_level_maximum() const { + return maximum_capture_level_; +} + +bool GainControlImpl::stream_is_saturated() const { + return stream_is_saturated_; +} + +int GainControlImpl::set_target_level_dbfs(int level) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + if (level > 31 || level < 0) { + return apm_->kBadParameterError; + } + + target_level_dbfs_ = level; + return Configure(); +} + +int GainControlImpl::target_level_dbfs() const { + return target_level_dbfs_; +} + +int GainControlImpl::set_compression_gain_db(int gain) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + if (gain < 0 || gain > 90) { + return apm_->kBadParameterError; + } + + compression_gain_db_ = gain; + return Configure(); +} + +int GainControlImpl::compression_gain_db() const { + return compression_gain_db_; +} + +int GainControlImpl::enable_limiter(bool enable) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + limiter_enabled_ = enable; + return Configure(); +} + +bool GainControlImpl::is_limiter_enabled() const { + return limiter_enabled_; +} + +int GainControlImpl::Initialize() { + int err = ProcessingComponent::Initialize(); + if (err != apm_->kNoError || !is_component_enabled()) { + return err; + } + + analog_capture_level_ = + (maximum_capture_level_ - minimum_capture_level_) >> 1; + capture_levels_.assign(num_handles(), analog_capture_level_); + was_analog_level_set_ = false; + + return apm_->kNoError; +} + +int GainControlImpl::get_version(char* version, int version_len_bytes) const { + if (WebRtcAgc_Version(version, version_len_bytes) != 0) { + return apm_->kBadParameterError; + } + + return apm_->kNoError; +} + +void* GainControlImpl::CreateHandle() const { + Handle* handle = NULL; + if (WebRtcAgc_Create(&handle) != apm_->kNoError) { + handle = NULL; + } else { + assert(handle != NULL); + } + + return handle; +} + +int GainControlImpl::DestroyHandle(void* handle) const { + return WebRtcAgc_Free(static_cast<Handle*>(handle)); +} + +int GainControlImpl::InitializeHandle(void* handle) const { + return WebRtcAgc_Init(static_cast<Handle*>(handle), + minimum_capture_level_, + maximum_capture_level_, + MapSetting(mode_), + apm_->sample_rate_hz()); +} + +int GainControlImpl::ConfigureHandle(void* handle) const { + WebRtcAgc_config_t config; + // TODO(ajm): Flip the sign here (since AGC expects a positive value) if we + // change the interface. + //assert(target_level_dbfs_ <= 0); + //config.targetLevelDbfs = static_cast<WebRtc_Word16>(-target_level_dbfs_); + config.targetLevelDbfs = static_cast<WebRtc_Word16>(target_level_dbfs_); + config.compressionGaindB = + static_cast<WebRtc_Word16>(compression_gain_db_); + config.limiterEnable = limiter_enabled_; + + return WebRtcAgc_set_config(static_cast<Handle*>(handle), config); +} + +int GainControlImpl::num_handles_required() const { + return apm_->num_output_channels(); +} + +int GainControlImpl::GetHandleError(void* handle) const { + // The AGC has no get_error() function. + // (Despite listing errors in its interface...) + assert(handle != NULL); + return apm_->kUnspecifiedError; +} +} // namespace webrtc diff --git a/src/modules/audio_processing/main/source/gain_control_impl.h b/src/modules/audio_processing/main/source/gain_control_impl.h new file mode 100644 index 0000000000..a11d606f45 --- /dev/null +++ b/src/modules/audio_processing/main/source/gain_control_impl.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_ + +#include <vector> + +#include "audio_processing.h" +#include "processing_component.h" + +namespace webrtc { +class AudioProcessingImpl; +class AudioBuffer; + +class GainControlImpl : public GainControl, + public ProcessingComponent { + public: + explicit GainControlImpl(const AudioProcessingImpl* apm); + virtual ~GainControlImpl(); + + int ProcessRenderAudio(AudioBuffer* audio); + int AnalyzeCaptureAudio(AudioBuffer* audio); + int ProcessCaptureAudio(AudioBuffer* audio); + + // ProcessingComponent implementation. + virtual int Initialize(); + virtual int get_version(char* version, int version_len_bytes) const; + + // GainControl implementation. + virtual bool is_enabled() const; + + private: + // GainControl implementation. + virtual int Enable(bool enable); + virtual int set_stream_analog_level(int level); + virtual int stream_analog_level(); + virtual int set_mode(Mode mode); + virtual Mode mode() const; + virtual int set_target_level_dbfs(int level); + virtual int target_level_dbfs() const; + virtual int set_compression_gain_db(int gain); + virtual int compression_gain_db() const; + virtual int enable_limiter(bool enable); + virtual bool is_limiter_enabled() const; + virtual int set_analog_level_limits(int minimum, int maximum); + virtual int analog_level_minimum() const; + virtual int analog_level_maximum() const; + virtual bool stream_is_saturated() const; + + // ProcessingComponent implementation. + virtual void* CreateHandle() const; + virtual int InitializeHandle(void* handle) const; + virtual int ConfigureHandle(void* handle) const; + virtual int DestroyHandle(void* handle) const; + virtual int num_handles_required() const; + virtual int GetHandleError(void* handle) const; + + const AudioProcessingImpl* apm_; + Mode mode_; + int minimum_capture_level_; + int maximum_capture_level_; + bool limiter_enabled_; + int target_level_dbfs_; + int compression_gain_db_; + std::vector<int> capture_levels_; + int analog_capture_level_; + bool was_analog_level_set_; + bool stream_is_saturated_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_ diff --git a/src/modules/audio_processing/main/source/high_pass_filter_impl.cc b/src/modules/audio_processing/main/source/high_pass_filter_impl.cc new file mode 100644 index 0000000000..fa6d5d5ece --- /dev/null +++ b/src/modules/audio_processing/main/source/high_pass_filter_impl.cc @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "high_pass_filter_impl.h" + +#include <cassert> + +#include "critical_section_wrapper.h" +#include "typedefs.h" +#include "signal_processing_library.h" + +#include "audio_processing_impl.h" +#include "audio_buffer.h" + +namespace webrtc { +namespace { +const WebRtc_Word16 kFilterCoefficients8kHz[5] = + {3798, -7596, 3798, 7807, -3733}; + +const WebRtc_Word16 kFilterCoefficients[5] = + {4012, -8024, 4012, 8002, -3913}; + +struct FilterState { + WebRtc_Word16 y[4]; + WebRtc_Word16 x[2]; + const WebRtc_Word16* ba; +}; + +int InitializeFilter(FilterState* hpf, int sample_rate_hz) { + assert(hpf != NULL); + + if (sample_rate_hz == AudioProcessingImpl::kSampleRate8kHz) { + hpf->ba = kFilterCoefficients8kHz; + } else { + hpf->ba = kFilterCoefficients; + } + + WebRtcSpl_MemSetW16(hpf->x, 0, 2); + WebRtcSpl_MemSetW16(hpf->y, 0, 4); + + return AudioProcessing::kNoError; +} + +int Filter(FilterState* hpf, WebRtc_Word16* data, int length) { + assert(hpf != NULL); + + WebRtc_Word32 tmp_int32 = 0; + WebRtc_Word16* y = hpf->y; + WebRtc_Word16* x = hpf->x; + const WebRtc_Word16* ba = hpf->ba; + + for (int i = 0; i < length; i++) { + // y[i] = b[0] * x[i] + b[1] * x[i-1] + b[2] * x[i-2] + // + -a[1] * y[i-1] + -a[2] * y[i-2]; + + tmp_int32 = + WEBRTC_SPL_MUL_16_16(y[1], ba[3]); // -a[1] * y[i-1] (low part) + tmp_int32 += + WEBRTC_SPL_MUL_16_16(y[3], ba[4]); // -a[2] * y[i-2] (low part) + tmp_int32 = (tmp_int32 >> 15); + tmp_int32 += + WEBRTC_SPL_MUL_16_16(y[0], ba[3]); // -a[1] * y[i-1] (high part) + tmp_int32 += + WEBRTC_SPL_MUL_16_16(y[2], ba[4]); // -a[2] * y[i-2] (high part) + tmp_int32 = (tmp_int32 << 1); + + tmp_int32 += WEBRTC_SPL_MUL_16_16(data[i], ba[0]); // b[0]*x[0] + tmp_int32 += WEBRTC_SPL_MUL_16_16(x[0], ba[1]); // b[1]*x[i-1] + tmp_int32 += WEBRTC_SPL_MUL_16_16(x[1], ba[2]); // b[2]*x[i-2] + + // Update state (input part) + x[1] = x[0]; + x[0] = data[i]; + + // Update state (filtered part) + y[2] = y[0]; + y[3] = y[1]; + y[0] = static_cast<WebRtc_Word16>(tmp_int32 >> 13); + y[1] = static_cast<WebRtc_Word16>((tmp_int32 - + WEBRTC_SPL_LSHIFT_W32(static_cast<WebRtc_Word32>(y[0]), 13)) << 2); + + // Rounding in Q12, i.e. add 2^11 + tmp_int32 += 2048; + + // Saturate (to 2^27) so that the HP filtered signal does not overflow + tmp_int32 = WEBRTC_SPL_SAT(static_cast<WebRtc_Word32>(134217727), + tmp_int32, + static_cast<WebRtc_Word32>(-134217728)); + + // Convert back to Q0 and use rounding + data[i] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp_int32, 12); + + } + + return AudioProcessing::kNoError; +} +} // namespace + +typedef FilterState Handle; + +HighPassFilterImpl::HighPassFilterImpl(const AudioProcessingImpl* apm) + : ProcessingComponent(apm), + apm_(apm) {} + +HighPassFilterImpl::~HighPassFilterImpl() {} + +int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) { + int err = apm_->kNoError; + + if (!is_component_enabled()) { + return apm_->kNoError; + } + + assert(audio->samples_per_split_channel() <= 160); + + for (int i = 0; i < num_handles(); i++) { + Handle* my_handle = static_cast<Handle*>(handle(i)); + err = Filter(my_handle, + audio->low_pass_split_data(i), + audio->samples_per_split_channel()); + + if (err != apm_->kNoError) { + return GetHandleError(my_handle); + } + } + + return apm_->kNoError; +} + +int HighPassFilterImpl::Enable(bool enable) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + return EnableComponent(enable); +} + +bool HighPassFilterImpl::is_enabled() const { + return is_component_enabled(); +} + +int HighPassFilterImpl::get_version(char* version, + int version_len_bytes) const { + // An empty string is used to indicate no version information. + memset(version, 0, version_len_bytes); + return apm_->kNoError; +} + +void* HighPassFilterImpl::CreateHandle() const { + return new FilterState; +} + +int HighPassFilterImpl::DestroyHandle(void* handle) const { + delete static_cast<Handle*>(handle); + return apm_->kNoError; +} + +int HighPassFilterImpl::InitializeHandle(void* handle) const { + return InitializeFilter(static_cast<Handle*>(handle), + apm_->sample_rate_hz()); +} + +int HighPassFilterImpl::ConfigureHandle(void* /*handle*/) const { + return apm_->kNoError; // Not configurable. +} + +int HighPassFilterImpl::num_handles_required() const { + return apm_->num_output_channels(); +} + +int HighPassFilterImpl::GetHandleError(void* handle) const { + // The component has no detailed errors. + assert(handle != NULL); + return apm_->kUnspecifiedError; +} +} // namespace webrtc diff --git a/src/modules/audio_processing/main/source/high_pass_filter_impl.h b/src/modules/audio_processing/main/source/high_pass_filter_impl.h new file mode 100644 index 0000000000..4c23754270 --- /dev/null +++ b/src/modules/audio_processing/main/source/high_pass_filter_impl.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_ + +#include "audio_processing.h" +#include "processing_component.h" + +namespace webrtc { +class AudioProcessingImpl; +class AudioBuffer; + +class HighPassFilterImpl : public HighPassFilter, + public ProcessingComponent { + public: + explicit HighPassFilterImpl(const AudioProcessingImpl* apm); + virtual ~HighPassFilterImpl(); + + int ProcessCaptureAudio(AudioBuffer* audio); + + // HighPassFilter implementation. + virtual bool is_enabled() const; + + // ProcessingComponent implementation. + virtual int get_version(char* version, int version_len_bytes) const; + + private: + // HighPassFilter implementation. + virtual int Enable(bool enable); + + // ProcessingComponent implementation. + virtual void* CreateHandle() const; + virtual int InitializeHandle(void* handle) const; + virtual int ConfigureHandle(void* handle) const; + virtual int DestroyHandle(void* handle) const; + virtual int num_handles_required() const; + virtual int GetHandleError(void* handle) const; + + const AudioProcessingImpl* apm_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_ diff --git a/src/modules/audio_processing/main/source/level_estimator_impl.cc b/src/modules/audio_processing/main/source/level_estimator_impl.cc new file mode 100644 index 0000000000..799a9624f7 --- /dev/null +++ b/src/modules/audio_processing/main/source/level_estimator_impl.cc @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "level_estimator_impl.h" + +#include <cassert> +#include <cstring> + +#include "critical_section_wrapper.h" + +#include "audio_processing_impl.h" +#include "audio_buffer.h" + +// TODO(ajm): implement the underlying level estimator component. + +namespace webrtc { + +typedef void Handle; + +namespace { +/*int EstimateLevel(AudioBuffer* audio, Handle* my_handle) { + assert(audio->samples_per_split_channel() <= 160); + + WebRtc_Word16* mixed_data = audio->low_pass_split_data(0); + if (audio->num_channels() > 1) { + audio->CopyAndMixLowPass(1); + mixed_data = audio->mixed_low_pass_data(0); + } + + int err = UpdateLvlEst(my_handle, + mixed_data, + audio->samples_per_split_channel()); + if (err != AudioProcessing::kNoError) { + return GetHandleError(my_handle); + } + + return AudioProcessing::kNoError; +} + +int GetMetricsLocal(Handle* my_handle, LevelEstimator::Metrics* metrics) { + level_t levels; + memset(&levels, 0, sizeof(levels)); + + int err = ExportLevels(my_handle, &levels, 2); + if (err != AudioProcessing::kNoError) { + return err; + } + metrics->signal.instant = levels.instant; + metrics->signal.average = levels.average; + metrics->signal.maximum = levels.max; + metrics->signal.minimum = levels.min; + + err = ExportLevels(my_handle, &levels, 1); + if (err != AudioProcessing::kNoError) { + return err; + } + metrics->speech.instant = levels.instant; + metrics->speech.average = levels.average; + metrics->speech.maximum = levels.max; + metrics->speech.minimum = levels.min; + + err = ExportLevels(my_handle, &levels, 0); + if (err != AudioProcessing::kNoError) { + return err; + } + metrics->noise.instant = levels.instant; + metrics->noise.average = levels.average; + metrics->noise.maximum = levels.max; + metrics->noise.minimum = levels.min; + + return AudioProcessing::kNoError; +}*/ +} // namespace + +LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessingImpl* apm) + : ProcessingComponent(apm), + apm_(apm) {} + +LevelEstimatorImpl::~LevelEstimatorImpl() {} + +int LevelEstimatorImpl::AnalyzeReverseStream(AudioBuffer* /*audio*/) { + return apm_->kUnsupportedComponentError; + /*if (!is_component_enabled()) { + return apm_->kNoError; + } + + return EstimateLevel(audio, static_cast<Handle*>(handle(1)));*/ +} + +int LevelEstimatorImpl::ProcessCaptureAudio(AudioBuffer* /*audio*/) { + return apm_->kUnsupportedComponentError; + /*if (!is_component_enabled()) { + return apm_->kNoError; + } + + return EstimateLevel(audio, static_cast<Handle*>(handle(0)));*/ +} + +int LevelEstimatorImpl::Enable(bool /*enable*/) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + return apm_->kUnsupportedComponentError; + //return EnableComponent(enable); +} + +bool LevelEstimatorImpl::is_enabled() const { + return is_component_enabled(); +} + +int LevelEstimatorImpl::GetMetrics(LevelEstimator::Metrics* /*metrics*/, + LevelEstimator::Metrics* /*reverse_metrics*/) { + return apm_->kUnsupportedComponentError; + /*if (!is_component_enabled()) { + return apm_->kNotEnabledError; + } + + int err = GetMetricsLocal(static_cast<Handle*>(handle(0)), metrics); + if (err != apm_->kNoError) { + return err; + } + + err = GetMetricsLocal(static_cast<Handle*>(handle(1)), reverse_metrics); + if (err != apm_->kNoError) { + return err; + } + + return apm_->kNoError;*/ +} + +int LevelEstimatorImpl::get_version(char* version, + int version_len_bytes) const { + // An empty string is used to indicate no version information. + memset(version, 0, version_len_bytes); + return apm_->kNoError; +} + +void* LevelEstimatorImpl::CreateHandle() const { + Handle* handle = NULL; + /*if (CreateLvlEst(&handle) != apm_->kNoError) { + handle = NULL; + } else { + assert(handle != NULL); + }*/ + + return handle; +} + +int LevelEstimatorImpl::DestroyHandle(void* /*handle*/) const { + return apm_->kUnsupportedComponentError; + //return FreeLvlEst(static_cast<Handle*>(handle)); +} + +int LevelEstimatorImpl::InitializeHandle(void* /*handle*/) const { + return apm_->kUnsupportedComponentError; + /*const double kIntervalSeconds = 1.5; + return InitLvlEst(static_cast<Handle*>(handle), + apm_->sample_rate_hz(), + kIntervalSeconds);*/ +} + +int LevelEstimatorImpl::ConfigureHandle(void* /*handle*/) const { + return apm_->kUnsupportedComponentError; + //return apm_->kNoError; +} + +int LevelEstimatorImpl::num_handles_required() const { + return apm_->kUnsupportedComponentError; + //return 2; +} + +int LevelEstimatorImpl::GetHandleError(void* handle) const { + // The component has no detailed errors. + assert(handle != NULL); + return apm_->kUnspecifiedError; +} +} // namespace webrtc diff --git a/src/modules/audio_processing/main/source/level_estimator_impl.h b/src/modules/audio_processing/main/source/level_estimator_impl.h new file mode 100644 index 0000000000..1515722df4 --- /dev/null +++ b/src/modules/audio_processing/main/source/level_estimator_impl.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_ + +#include "audio_processing.h" +#include "processing_component.h" + +namespace webrtc { +class AudioProcessingImpl; +class AudioBuffer; + +class LevelEstimatorImpl : public LevelEstimator, + public ProcessingComponent { + public: + explicit LevelEstimatorImpl(const AudioProcessingImpl* apm); + virtual ~LevelEstimatorImpl(); + + int AnalyzeReverseStream(AudioBuffer* audio); + int ProcessCaptureAudio(AudioBuffer* audio); + + // LevelEstimator implementation. + virtual bool is_enabled() const; + + // ProcessingComponent implementation. + virtual int get_version(char* version, int version_len_bytes) const; + + private: + // LevelEstimator implementation. + virtual int Enable(bool enable); + virtual int GetMetrics(Metrics* metrics, Metrics* reverse_metrics); + + // ProcessingComponent implementation. + virtual void* CreateHandle() const; + virtual int InitializeHandle(void* handle) const; + virtual int ConfigureHandle(void* handle) const; + virtual int DestroyHandle(void* handle) const; + virtual int num_handles_required() const; + virtual int GetHandleError(void* handle) const; + + const AudioProcessingImpl* apm_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_ diff --git a/src/modules/audio_processing/main/source/noise_suppression_impl.cc b/src/modules/audio_processing/main/source/noise_suppression_impl.cc new file mode 100644 index 0000000000..f899f350ca --- /dev/null +++ b/src/modules/audio_processing/main/source/noise_suppression_impl.cc @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "noise_suppression_impl.h" + +#include <cassert> + +#include "critical_section_wrapper.h" +#if defined(WEBRTC_NS_FLOAT) +#include "noise_suppression.h" +#elif defined(WEBRTC_NS_FIXED) +#include "noise_suppression_x.h" +#endif + +#include "audio_processing_impl.h" +#include "audio_buffer.h" + +namespace webrtc { + +#if defined(WEBRTC_NS_FLOAT) +typedef NsHandle Handle; +#elif defined(WEBRTC_NS_FIXED) +typedef NsxHandle Handle; +#endif + +namespace { +int MapSetting(NoiseSuppression::Level level) { + switch (level) { + case NoiseSuppression::kLow: + return 0; + case NoiseSuppression::kModerate: + return 1; + case NoiseSuppression::kHigh: + return 2; + case NoiseSuppression::kVeryHigh: + return 3; + default: + return -1; + } +} +} // namespace + +NoiseSuppressionImpl::NoiseSuppressionImpl(const AudioProcessingImpl* apm) + : ProcessingComponent(apm), + apm_(apm), + level_(kModerate) {} + +NoiseSuppressionImpl::~NoiseSuppressionImpl() {} + +int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { + int err = apm_->kNoError; + + if (!is_component_enabled()) { + return apm_->kNoError; + } + assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_channels() == num_handles()); + + for (int i = 0; i < num_handles(); i++) { + Handle* my_handle = static_cast<Handle*>(handle(i)); +#if defined(WEBRTC_NS_FLOAT) + err = WebRtcNs_Process(static_cast<Handle*>(handle(i)), + audio->low_pass_split_data(i), + audio->high_pass_split_data(i), + audio->low_pass_split_data(i), + audio->high_pass_split_data(i)); +#elif defined(WEBRTC_NS_FIXED) + err = WebRtcNsx_Process(static_cast<Handle*>(handle(i)), + audio->low_pass_split_data(i), + audio->high_pass_split_data(i), + audio->low_pass_split_data(i), + audio->high_pass_split_data(i)); +#endif + + if (err != apm_->kNoError) { + return GetHandleError(my_handle); + } + } + + return apm_->kNoError; +} + +int NoiseSuppressionImpl::Enable(bool enable) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + return EnableComponent(enable); +} + +bool NoiseSuppressionImpl::is_enabled() const { + return is_component_enabled(); +} + +int NoiseSuppressionImpl::set_level(Level level) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + if (MapSetting(level) == -1) { + return apm_->kBadParameterError; + } + + level_ = level; + return Configure(); +} + +NoiseSuppression::Level NoiseSuppressionImpl::level() const { + return level_; +} + +int NoiseSuppressionImpl::get_version(char* version, + int version_len_bytes) const { +#if defined(WEBRTC_NS_FLOAT) + if (WebRtcNs_get_version(version, version_len_bytes) != 0) +#elif defined(WEBRTC_NS_FIXED) + if (WebRtcNsx_get_version(version, version_len_bytes) != 0) +#endif + { + return apm_->kBadParameterError; + } + + return apm_->kNoError; +} + +void* NoiseSuppressionImpl::CreateHandle() const { + Handle* handle = NULL; +#if defined(WEBRTC_NS_FLOAT) + if (WebRtcNs_Create(&handle) != apm_->kNoError) +#elif defined(WEBRTC_NS_FIXED) + if (WebRtcNsx_Create(&handle) != apm_->kNoError) +#endif + { + handle = NULL; + } else { + assert(handle != NULL); + } + + return handle; +} + +int NoiseSuppressionImpl::DestroyHandle(void* handle) const { +#if defined(WEBRTC_NS_FLOAT) + return WebRtcNs_Free(static_cast<Handle*>(handle)); +#elif defined(WEBRTC_NS_FIXED) + return WebRtcNsx_Free(static_cast<Handle*>(handle)); +#endif +} + +int NoiseSuppressionImpl::InitializeHandle(void* handle) const { +#if defined(WEBRTC_NS_FLOAT) + return WebRtcNs_Init(static_cast<Handle*>(handle), apm_->sample_rate_hz()); +#elif defined(WEBRTC_NS_FIXED) + return WebRtcNsx_Init(static_cast<Handle*>(handle), apm_->sample_rate_hz()); +#endif +} + +int NoiseSuppressionImpl::ConfigureHandle(void* handle) const { +#if defined(WEBRTC_NS_FLOAT) + return WebRtcNs_set_policy(static_cast<Handle*>(handle), + MapSetting(level_)); +#elif defined(WEBRTC_NS_FIXED) + return WebRtcNsx_set_policy(static_cast<Handle*>(handle), + MapSetting(level_)); +#endif +} + +int NoiseSuppressionImpl::num_handles_required() const { + return apm_->num_output_channels(); +} + +int NoiseSuppressionImpl::GetHandleError(void* handle) const { + // The NS has no get_error() function. + assert(handle != NULL); + return apm_->kUnspecifiedError; +} +} // namespace webrtc + diff --git a/src/modules/audio_processing/main/source/noise_suppression_impl.h b/src/modules/audio_processing/main/source/noise_suppression_impl.h new file mode 100644 index 0000000000..c9ff9b31af --- /dev/null +++ b/src/modules/audio_processing/main/source/noise_suppression_impl.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_ + +#include "audio_processing.h" +#include "processing_component.h" + +namespace webrtc { +class AudioProcessingImpl; +class AudioBuffer; + +class NoiseSuppressionImpl : public NoiseSuppression, + public ProcessingComponent { + public: + explicit NoiseSuppressionImpl(const AudioProcessingImpl* apm); + virtual ~NoiseSuppressionImpl(); + + int ProcessCaptureAudio(AudioBuffer* audio); + + // NoiseSuppression implementation. + virtual bool is_enabled() const; + + // ProcessingComponent implementation. + virtual int get_version(char* version, int version_len_bytes) const; + + private: + // NoiseSuppression implementation. + virtual int Enable(bool enable); + virtual int set_level(Level level); + virtual Level level() const; + + // ProcessingComponent implementation. + virtual void* CreateHandle() const; + virtual int InitializeHandle(void* handle) const; + virtual int ConfigureHandle(void* handle) const; + virtual int DestroyHandle(void* handle) const; + virtual int num_handles_required() const; + virtual int GetHandleError(void* handle) const; + + const AudioProcessingImpl* apm_; + Level level_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_ diff --git a/src/modules/audio_processing/main/source/processing_component.cc b/src/modules/audio_processing/main/source/processing_component.cc new file mode 100644 index 0000000000..9ac125794c --- /dev/null +++ b/src/modules/audio_processing/main/source/processing_component.cc @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "processing_component.h" + +#include <cassert> + +#include "audio_processing_impl.h" + +namespace webrtc { + +ProcessingComponent::ProcessingComponent(const AudioProcessingImpl* apm) + : apm_(apm), + initialized_(false), + enabled_(false), + num_handles_(0) {} + +ProcessingComponent::~ProcessingComponent() { + assert(initialized_ == false); +} + +int ProcessingComponent::Destroy() { + while (!handles_.empty()) { + DestroyHandle(handles_.back()); + handles_.pop_back(); + } + initialized_ = false; + + return apm_->kNoError; +} + +int ProcessingComponent::EnableComponent(bool enable) { + if (enable && !enabled_) { + enabled_ = enable; // Must be set before Initialize() is called. + + int err = Initialize(); + if (err != apm_->kNoError) { + enabled_ = false; + return err; + } + } else { + enabled_ = enable; + } + + return apm_->kNoError; +} + +bool ProcessingComponent::is_component_enabled() const { + return enabled_; +} + +void* ProcessingComponent::handle(int index) const { + assert(index < num_handles_); + return handles_[index]; +} + +int ProcessingComponent::num_handles() const { + return num_handles_; +} + +int ProcessingComponent::Initialize() { + if (!enabled_) { + return apm_->kNoError; + } + + num_handles_ = num_handles_required(); + if (num_handles_ > static_cast<int>(handles_.size())) { + handles_.resize(num_handles_, NULL); + } + + assert(static_cast<int>(handles_.size()) >= num_handles_); + for (int i = 0; i < num_handles_; i++) { + if (handles_[i] == NULL) { + handles_[i] = CreateHandle(); + if (handles_[i] == NULL) { + return apm_->kCreationFailedError; + } + } + + int err = InitializeHandle(handles_[i]); + if (err != apm_->kNoError) { + return GetHandleError(handles_[i]); + } + } + + initialized_ = true; + return Configure(); +} + +int ProcessingComponent::Configure() { + if (!initialized_) { + return apm_->kNoError; + } + + assert(static_cast<int>(handles_.size()) >= num_handles_); + for (int i = 0; i < num_handles_; i++) { + int err = ConfigureHandle(handles_[i]); + if (err != apm_->kNoError) { + return GetHandleError(handles_[i]); + } + } + + return apm_->kNoError; +} +} // namespace webrtc diff --git a/src/modules/audio_processing/main/source/processing_component.h b/src/modules/audio_processing/main/source/processing_component.h new file mode 100644 index 0000000000..3d8a02bd3e --- /dev/null +++ b/src/modules/audio_processing/main/source/processing_component.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_PROCESSING_COMPONENT_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_PROCESSING_COMPONENT_H_ + +#include <vector> + +#include "audio_processing.h" + +namespace webrtc { +class AudioProcessingImpl; + +/*template <class T> +class ComponentHandle { + public: + ComponentHandle(); + virtual ~ComponentHandle(); + + virtual int Create() = 0; + virtual T* ptr() const = 0; +};*/ + +class ProcessingComponent { + public: + explicit ProcessingComponent(const AudioProcessingImpl* apm); + virtual ~ProcessingComponent(); + + virtual int Initialize(); + virtual int Destroy(); + virtual int get_version(char* version, int version_len_bytes) const = 0; + + protected: + virtual int Configure(); + int EnableComponent(bool enable); + bool is_component_enabled() const; + void* handle(int index) const; + int num_handles() const; + + private: + virtual void* CreateHandle() const = 0; + virtual int InitializeHandle(void* handle) const = 0; + virtual int ConfigureHandle(void* handle) const = 0; + virtual int DestroyHandle(void* handle) const = 0; + virtual int num_handles_required() const = 0; + virtual int GetHandleError(void* handle) const = 0; + + const AudioProcessingImpl* apm_; + std::vector<void*> handles_; + bool initialized_; + bool enabled_; + int num_handles_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_PROCESSING_COMPONENT_H__ diff --git a/src/modules/audio_processing/main/source/splitting_filter.cc b/src/modules/audio_processing/main/source/splitting_filter.cc new file mode 100644 index 0000000000..1526141cc9 --- /dev/null +++ b/src/modules/audio_processing/main/source/splitting_filter.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "splitting_filter.h" +#include "signal_processing_library.h" + +namespace webrtc { + +void SplittingFilterAnalysis(const WebRtc_Word16* in_data, + WebRtc_Word16* low_band, + WebRtc_Word16* high_band, + WebRtc_Word32* filter_state1, + WebRtc_Word32* filter_state2) +{ + WebRtcSpl_AnalysisQMF(in_data, low_band, high_band, filter_state1, filter_state2); +} + +void SplittingFilterSynthesis(const WebRtc_Word16* low_band, + const WebRtc_Word16* high_band, + WebRtc_Word16* out_data, + WebRtc_Word32* filt_state1, + WebRtc_Word32* filt_state2) +{ + WebRtcSpl_SynthesisQMF(low_band, high_band, out_data, filt_state1, filt_state2); +} +} // namespace webrtc diff --git a/src/modules/audio_processing/main/source/splitting_filter.h b/src/modules/audio_processing/main/source/splitting_filter.h new file mode 100644 index 0000000000..661bfb2f6e --- /dev/null +++ b/src/modules/audio_processing/main/source/splitting_filter.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_SPLITTING_FILTER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_SPLITTING_FILTER_H_ + +#include "typedefs.h" +#include "signal_processing_library.h" + +namespace webrtc { +/* + * SplittingFilterbank_analysisQMF(...) + * + * Splits a super-wb signal into two subbands: 0-8 kHz and 8-16 kHz. + * + * Input: + * - in_data : super-wb audio signal + * + * Input & Output: + * - filt_state1: Filter state for first all-pass filter + * - filt_state2: Filter state for second all-pass filter + * + * Output: + * - low_band : The signal from the 0-4 kHz band + * - high_band : The signal from the 4-8 kHz band + */ +void SplittingFilterAnalysis(const WebRtc_Word16* in_data, + WebRtc_Word16* low_band, + WebRtc_Word16* high_band, + WebRtc_Word32* filt_state1, + WebRtc_Word32* filt_state2); + +/* + * SplittingFilterbank_synthesisQMF(...) + * + * Combines the two subbands (0-8 and 8-16 kHz) into a super-wb signal. + * + * Input: + * - low_band : The signal with the 0-8 kHz band + * - high_band : The signal with the 8-16 kHz band + * + * Input & Output: + * - filt_state1: Filter state for first all-pass filter + * - filt_state2: Filter state for second all-pass filter + * + * Output: + * - out_data : super-wb speech signal + */ +void SplittingFilterSynthesis(const WebRtc_Word16* low_band, + const WebRtc_Word16* high_band, + WebRtc_Word16* out_data, + WebRtc_Word32* filt_state1, + WebRtc_Word32* filt_state2); +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_SPLITTING_FILTER_H_ diff --git a/src/modules/audio_processing/main/source/voice_detection_impl.cc b/src/modules/audio_processing/main/source/voice_detection_impl.cc new file mode 100644 index 0000000000..3eb446e911 --- /dev/null +++ b/src/modules/audio_processing/main/source/voice_detection_impl.cc @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "voice_detection_impl.h" + +#include <cassert> + +#include "critical_section_wrapper.h" +#include "webrtc_vad.h" + +#include "audio_processing_impl.h" +#include "audio_buffer.h" + +namespace webrtc { + +typedef VadInst Handle; + +namespace { +WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) { + switch (likelihood) { + case VoiceDetection::kVeryLowLikelihood: + return 3; + break; + case VoiceDetection::kLowLikelihood: + return 2; + break; + case VoiceDetection::kModerateLikelihood: + return 1; + break; + case VoiceDetection::kHighLikelihood: + return 0; + break; + default: + return -1; + } +} +} // namespace + + +VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm) + : ProcessingComponent(apm), + apm_(apm), + stream_has_voice_(false), + using_external_vad_(false), + likelihood_(kLowLikelihood), + frame_size_ms_(10), + frame_size_samples_(0) {} + +VoiceDetectionImpl::~VoiceDetectionImpl() {} + +int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { + if (!is_component_enabled()) { + return apm_->kNoError; + } + + if (using_external_vad_) { + using_external_vad_ = false; + return apm_->kNoError; + } + assert(audio->samples_per_split_channel() <= 160); + + WebRtc_Word16* mixed_data = audio->low_pass_split_data(0); + if (audio->num_channels() > 1) { + audio->CopyAndMixLowPass(1); + mixed_data = audio->mixed_low_pass_data(0); + } + + // TODO(ajm): concatenate data in frame buffer here. + + int vad_ret_val; + vad_ret_val = WebRtcVad_Process(static_cast<Handle*>(handle(0)), + apm_->split_sample_rate_hz(), + mixed_data, + frame_size_samples_); + + if (vad_ret_val == 0) { + stream_has_voice_ = false; + } else if (vad_ret_val == 1) { + stream_has_voice_ = true; + } else { + return apm_->kUnspecifiedError; + } + + return apm_->kNoError; +} + +int VoiceDetectionImpl::Enable(bool enable) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + return EnableComponent(enable); +} + +bool VoiceDetectionImpl::is_enabled() const { + return is_component_enabled(); +} + +int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { + using_external_vad_ = true; + stream_has_voice_ = has_voice; + return apm_->kNoError; +} + +bool VoiceDetectionImpl::stream_has_voice() const { + // TODO(ajm): enable this assertion? + //assert(using_external_vad_ || is_component_enabled()); + return stream_has_voice_; +} + +int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + if (MapSetting(likelihood) == -1) { + return apm_->kBadParameterError; + } + + likelihood_ = likelihood; + return Configure(); +} + +VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { + return likelihood_; +} + +int VoiceDetectionImpl::set_frame_size_ms(int size) { + CriticalSectionScoped crit_scoped(*apm_->crit()); + assert(size == 10); // TODO(ajm): remove when supported. + if (size != 10 && + size != 20 && + size != 30) { + return apm_->kBadParameterError; + } + + frame_size_ms_ = size; + + return Initialize(); +} + +int VoiceDetectionImpl::frame_size_ms() const { + return frame_size_ms_; +} + +int VoiceDetectionImpl::Initialize() { + int err = ProcessingComponent::Initialize(); + if (err != apm_->kNoError || !is_component_enabled()) { + return err; + } + + using_external_vad_ = false; + frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000); + // TODO(ajm): intialize frame buffer here. + + return apm_->kNoError; +} + +int VoiceDetectionImpl::get_version(char* version, + int version_len_bytes) const { + if (WebRtcVad_get_version(version, version_len_bytes) != 0) { + return apm_->kBadParameterError; + } + + return apm_->kNoError; +} + +void* VoiceDetectionImpl::CreateHandle() const { + Handle* handle = NULL; + if (WebRtcVad_Create(&handle) != apm_->kNoError) { + handle = NULL; + } else { + assert(handle != NULL); + } + + return handle; +} + +int VoiceDetectionImpl::DestroyHandle(void* handle) const { + return WebRtcVad_Free(static_cast<Handle*>(handle)); +} + +int VoiceDetectionImpl::InitializeHandle(void* handle) const { + return WebRtcVad_Init(static_cast<Handle*>(handle)); +} + +int VoiceDetectionImpl::ConfigureHandle(void* handle) const { + return WebRtcVad_set_mode(static_cast<Handle*>(handle), + MapSetting(likelihood_)); +} + +int VoiceDetectionImpl::num_handles_required() const { + return 1; +} + +int VoiceDetectionImpl::GetHandleError(void* handle) const { + // The VAD has no get_error() function. + assert(handle != NULL); + return apm_->kUnspecifiedError; +} +} // namespace webrtc diff --git a/src/modules/audio_processing/main/source/voice_detection_impl.h b/src/modules/audio_processing/main/source/voice_detection_impl.h new file mode 100644 index 0000000000..ef212d11b9 --- /dev/null +++ b/src/modules/audio_processing/main/source/voice_detection_impl.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_VOICE_DETECTION_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_VOICE_DETECTION_IMPL_H_ + +#include "audio_processing.h" +#include "processing_component.h" + +namespace webrtc { +class AudioProcessingImpl; +class AudioBuffer; + +class VoiceDetectionImpl : public VoiceDetection, + public ProcessingComponent { + public: + explicit VoiceDetectionImpl(const AudioProcessingImpl* apm); + virtual ~VoiceDetectionImpl(); + + int ProcessCaptureAudio(AudioBuffer* audio); + + // VoiceDetection implementation. + virtual bool is_enabled() const; + + // ProcessingComponent implementation. + virtual int Initialize(); + virtual int get_version(char* version, int version_len_bytes) const; + + private: + // VoiceDetection implementation. + virtual int Enable(bool enable); + virtual int set_stream_has_voice(bool has_voice); + virtual bool stream_has_voice() const; + virtual int set_likelihood(Likelihood likelihood); + virtual Likelihood likelihood() const; + virtual int set_frame_size_ms(int size); + virtual int frame_size_ms() const; + + // ProcessingComponent implementation. + virtual void* CreateHandle() const; + virtual int InitializeHandle(void* handle) const; + virtual int ConfigureHandle(void* handle) const; + virtual int DestroyHandle(void* handle) const; + virtual int num_handles_required() const; + virtual int GetHandleError(void* handle) const; + + const AudioProcessingImpl* apm_; + bool stream_has_voice_; + bool using_external_vad_; + Likelihood likelihood_; + int frame_size_ms_; + int frame_size_samples_; +}; +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_VOICE_DETECTION_IMPL_H_ diff --git a/src/modules/audio_processing/main/test/android/apmtest/AndroidManifest.xml b/src/modules/audio_processing/main/test/android/apmtest/AndroidManifest.xml new file mode 100644 index 0000000000..c6063b3d76 --- /dev/null +++ b/src/modules/audio_processing/main/test/android/apmtest/AndroidManifest.xml @@ -0,0 +1,30 @@ +<?xml version="1.0" encoding="utf-8"?> +<!-- BEGIN_INCLUDE(manifest) --> +<manifest xmlns:android="http://schemas.android.com/apk/res/android" + package="com.example.native_activity" + android:versionCode="1" + android:versionName="1.0"> + + <!-- This is the platform API where NativeActivity was introduced. --> + <uses-sdk android:minSdkVersion="8" /> + + <!-- This .apk has no Java code itself, so set hasCode to false. --> + <application android:label="@string/app_name" android:hasCode="false" android:debuggable="true"> + + <!-- Our activity is the built-in NativeActivity framework class. + This will take care of integrating with our NDK code. --> + <activity android:name="android.app.NativeActivity" + android:label="@string/app_name" + android:configChanges="orientation|keyboardHidden"> + <!-- Tell NativeActivity the name of or .so --> + <meta-data android:name="android.app.lib_name" + android:value="apmtest-activity" /> + <intent-filter> + <action android:name="android.intent.action.MAIN" /> + <category android:name="android.intent.category.LAUNCHER" /> + </intent-filter> + </activity> + </application> + +</manifest> +<!-- END_INCLUDE(manifest) --> diff --git a/src/modules/audio_processing/main/test/android/apmtest/default.properties b/src/modules/audio_processing/main/test/android/apmtest/default.properties new file mode 100644 index 0000000000..9a2c9f6c88 --- /dev/null +++ b/src/modules/audio_processing/main/test/android/apmtest/default.properties @@ -0,0 +1,11 @@ +# This file is automatically generated by Android Tools. +# Do not modify this file -- YOUR CHANGES WILL BE ERASED! +# +# This file must be checked in Version Control Systems. +# +# To customize properties used by the Ant build system use, +# "build.properties", and override values to adapt the script to your +# project structure. + +# Project target. +target=android-9 diff --git a/src/modules/audio_processing/main/test/android/apmtest/jni/Android.mk b/src/modules/audio_processing/main/test/android/apmtest/jni/Android.mk new file mode 100644 index 0000000000..eaf3c9d86f --- /dev/null +++ b/src/modules/audio_processing/main/test/android/apmtest/jni/Android.mk @@ -0,0 +1,26 @@ +# Copyright (C) 2010 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE := apmtest-activity +LOCAL_SRC_FILES := main.c +LOCAL_LDLIBS := -llog -landroid -lEGL -lGLESv1_CM +LOCAL_STATIC_LIBRARIES := android_native_app_glue + +include $(BUILD_SHARED_LIBRARY) + +$(call import-module,android/native_app_glue) diff --git a/src/modules/audio_processing/main/test/android/apmtest/jni/Application.mk b/src/modules/audio_processing/main/test/android/apmtest/jni/Application.mk new file mode 100644 index 0000000000..22d188e595 --- /dev/null +++ b/src/modules/audio_processing/main/test/android/apmtest/jni/Application.mk @@ -0,0 +1 @@ +APP_PLATFORM := android-9 diff --git a/src/modules/audio_processing/main/test/android/apmtest/jni/main.c b/src/modules/audio_processing/main/test/android/apmtest/jni/main.c new file mode 100644 index 0000000000..2e19635683 --- /dev/null +++ b/src/modules/audio_processing/main/test/android/apmtest/jni/main.c @@ -0,0 +1,307 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +//BEGIN_INCLUDE(all) +#include <jni.h> +#include <errno.h> + +#include <EGL/egl.h> +#include <GLES/gl.h> + +#include <android/sensor.h> +#include <android/log.h> +#include <android_native_app_glue.h> + +#define LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, "native-activity", __VA_ARGS__)) +#define LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, "native-activity", __VA_ARGS__)) + +/** + * Our saved state data. + */ +struct saved_state { + float angle; + int32_t x; + int32_t y; +}; + +/** + * Shared state for our app. + */ +struct engine { + struct android_app* app; + + ASensorManager* sensorManager; + const ASensor* accelerometerSensor; + ASensorEventQueue* sensorEventQueue; + + int animating; + EGLDisplay display; + EGLSurface surface; + EGLContext context; + int32_t width; + int32_t height; + struct saved_state state; +}; + +/** + * Initialize an EGL context for the current display. + */ +static int engine_init_display(struct engine* engine) { + // initialize OpenGL ES and EGL + + /* + * Here specify the attributes of the desired configuration. + * Below, we select an EGLConfig with at least 8 bits per color + * component compatible with on-screen windows + */ + const EGLint attribs[] = { + EGL_SURFACE_TYPE, EGL_WINDOW_BIT, + EGL_BLUE_SIZE, 8, + EGL_GREEN_SIZE, 8, + EGL_RED_SIZE, 8, + EGL_NONE + }; + EGLint w, h, dummy, format; + EGLint numConfigs; + EGLConfig config; + EGLSurface surface; + EGLContext context; + + EGLDisplay display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + + eglInitialize(display, 0, 0); + + /* Here, the application chooses the configuration it desires. In this + * sample, we have a very simplified selection process, where we pick + * the first EGLConfig that matches our criteria */ + eglChooseConfig(display, attribs, &config, 1, &numConfigs); + + /* EGL_NATIVE_VISUAL_ID is an attribute of the EGLConfig that is + * guaranteed to be accepted by ANativeWindow_setBuffersGeometry(). + * As soon as we picked a EGLConfig, we can safely reconfigure the + * ANativeWindow buffers to match, using EGL_NATIVE_VISUAL_ID. */ + eglGetConfigAttrib(display, config, EGL_NATIVE_VISUAL_ID, &format); + + ANativeWindow_setBuffersGeometry(engine->app->window, 0, 0, format); + + surface = eglCreateWindowSurface(display, config, engine->app->window, NULL); + context = eglCreateContext(display, config, NULL, NULL); + + if (eglMakeCurrent(display, surface, surface, context) == EGL_FALSE) { + LOGW("Unable to eglMakeCurrent"); + return -1; + } + + eglQuerySurface(display, surface, EGL_WIDTH, &w); + eglQuerySurface(display, surface, EGL_HEIGHT, &h); + + engine->display = display; + engine->context = context; + engine->surface = surface; + engine->width = w; + engine->height = h; + engine->state.angle = 0; + + // Initialize GL state. + glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_FASTEST); + glEnable(GL_CULL_FACE); + glShadeModel(GL_SMOOTH); + glDisable(GL_DEPTH_TEST); + + return 0; +} + +/** + * Just the current frame in the display. + */ +static void engine_draw_frame(struct engine* engine) { + if (engine->display == NULL) { + // No display. + return; + } + + // Just fill the screen with a color. + glClearColor(((float)engine->state.x)/engine->width, engine->state.angle, + ((float)engine->state.y)/engine->height, 1); + glClear(GL_COLOR_BUFFER_BIT); + + eglSwapBuffers(engine->display, engine->surface); +} + +/** + * Tear down the EGL context currently associated with the display. + */ +static void engine_term_display(struct engine* engine) { + if (engine->display != EGL_NO_DISPLAY) { + eglMakeCurrent(engine->display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); + if (engine->context != EGL_NO_CONTEXT) { + eglDestroyContext(engine->display, engine->context); + } + if (engine->surface != EGL_NO_SURFACE) { + eglDestroySurface(engine->display, engine->surface); + } + eglTerminate(engine->display); + } + engine->animating = 0; + engine->display = EGL_NO_DISPLAY; + engine->context = EGL_NO_CONTEXT; + engine->surface = EGL_NO_SURFACE; +} + +/** + * Process the next input event. + */ +static int32_t engine_handle_input(struct android_app* app, AInputEvent* event) { + struct engine* engine = (struct engine*)app->userData; + if (AInputEvent_getType(event) == AINPUT_EVENT_TYPE_MOTION) { + engine->animating = 1; + engine->state.x = AMotionEvent_getX(event, 0); + engine->state.y = AMotionEvent_getY(event, 0); + return 1; + } + return 0; +} + +/** + * Process the next main command. + */ +static void engine_handle_cmd(struct android_app* app, int32_t cmd) { + struct engine* engine = (struct engine*)app->userData; + switch (cmd) { + case APP_CMD_SAVE_STATE: + // The system has asked us to save our current state. Do so. + engine->app->savedState = malloc(sizeof(struct saved_state)); + *((struct saved_state*)engine->app->savedState) = engine->state; + engine->app->savedStateSize = sizeof(struct saved_state); + break; + case APP_CMD_INIT_WINDOW: + // The window is being shown, get it ready. + if (engine->app->window != NULL) { + engine_init_display(engine); + engine_draw_frame(engine); + } + break; + case APP_CMD_TERM_WINDOW: + // The window is being hidden or closed, clean it up. + engine_term_display(engine); + break; + case APP_CMD_GAINED_FOCUS: + // When our app gains focus, we start monitoring the accelerometer. + if (engine->accelerometerSensor != NULL) { + ASensorEventQueue_enableSensor(engine->sensorEventQueue, + engine->accelerometerSensor); + // We'd like to get 60 events per second (in us). + ASensorEventQueue_setEventRate(engine->sensorEventQueue, + engine->accelerometerSensor, (1000L/60)*1000); + } + break; + case APP_CMD_LOST_FOCUS: + // When our app loses focus, we stop monitoring the accelerometer. + // This is to avoid consuming battery while not being used. + if (engine->accelerometerSensor != NULL) { + ASensorEventQueue_disableSensor(engine->sensorEventQueue, + engine->accelerometerSensor); + } + // Also stop animating. + engine->animating = 0; + engine_draw_frame(engine); + break; + } +} + +/** + * This is the main entry point of a native application that is using + * android_native_app_glue. It runs in its own thread, with its own + * event loop for receiving input events and doing other things. + */ +void android_main(struct android_app* state) { + struct engine engine; + + // Make sure glue isn't stripped. + app_dummy(); + + memset(&engine, 0, sizeof(engine)); + state->userData = &engine; + state->onAppCmd = engine_handle_cmd; + state->onInputEvent = engine_handle_input; + engine.app = state; + + // Prepare to monitor accelerometer + engine.sensorManager = ASensorManager_getInstance(); + engine.accelerometerSensor = ASensorManager_getDefaultSensor(engine.sensorManager, + ASENSOR_TYPE_ACCELEROMETER); + engine.sensorEventQueue = ASensorManager_createEventQueue(engine.sensorManager, + state->looper, LOOPER_ID_USER, NULL, NULL); + + if (state->savedState != NULL) { + // We are starting with a previous saved state; restore from it. + engine.state = *(struct saved_state*)state->savedState; + } + + // loop waiting for stuff to do. + + while (1) { + // Read all pending events. + int ident; + int events; + struct android_poll_source* source; + + // If not animating, we will block forever waiting for events. + // If animating, we loop until all events are read, then continue + // to draw the next frame of animation. + while ((ident=ALooper_pollAll(engine.animating ? 0 : -1, NULL, &events, + (void**)&source)) >= 0) { + + // Process this event. + if (source != NULL) { + source->process(state, source); + } + + // If a sensor has data, process it now. + if (ident == LOOPER_ID_USER) { + if (engine.accelerometerSensor != NULL) { + ASensorEvent event; + while (ASensorEventQueue_getEvents(engine.sensorEventQueue, + &event, 1) > 0) { + LOGI("accelerometer: x=%f y=%f z=%f", + event.acceleration.x, event.acceleration.y, + event.acceleration.z); + } + } + } + + // Check if we are exiting. + if (state->destroyRequested != 0) { + engine_term_display(&engine); + return; + } + } + + if (engine.animating) { + // Done with events; draw next animation frame. + engine.state.angle += .01f; + if (engine.state.angle > 1) { + engine.state.angle = 0; + } + + // Drawing is throttled to the screen update rate, so there + // is no need to do timing here. + engine_draw_frame(&engine); + } + } +} +//END_INCLUDE(all) diff --git a/src/modules/audio_processing/main/test/android/apmtest/res/values/strings.xml b/src/modules/audio_processing/main/test/android/apmtest/res/values/strings.xml new file mode 100644 index 0000000000..d0bd0f3051 --- /dev/null +++ b/src/modules/audio_processing/main/test/android/apmtest/res/values/strings.xml @@ -0,0 +1,4 @@ +<?xml version="1.0" encoding="utf-8"?> +<resources> + <string name="app_name">apmtest</string> +</resources> diff --git a/src/modules/audio_processing/main/test/process_test/Android.mk b/src/modules/audio_processing/main/test/process_test/Android.mk new file mode 100644 index 0000000000..23080aab23 --- /dev/null +++ b/src/modules/audio_processing/main/test/process_test/Android.mk @@ -0,0 +1,48 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +LOCAL_PATH:= $(call my-dir) + +# apm test app + +include $(CLEAR_VARS) + +LOCAL_MODULE_TAGS := tests +LOCAL_CPP_EXTENSION := .cc +LOCAL_SRC_FILES:= \ + process_test.cc + +# Flags passed to both C and C++ files. +LOCAL_CFLAGS := \ + '-DWEBRTC_TARGET_PC' \ + '-DWEBRTC_LINUX' \ + '-DWEBRTC_THREAD_RR' \ + '-DWEBRTC_ANDROID' \ + '-DANDROID' + +LOCAL_CPPFLAGS := +LOCAL_LDFLAGS := +LOCAL_C_INCLUDES := \ + external/gtest/include \ + $(LOCAL_PATH)/../../../../../system_wrappers/interface \ + $(LOCAL_PATH)/../../interface \ + $(LOCAL_PATH)/../../../../interface \ + $(LOCAL_PATH)/../../../../.. + +LOCAL_STATIC_LIBRARIES := \ + libgtest + +LOCAL_SHARED_LIBRARIES := \ + libutils \ + libstlport \ + libwebrtc_audio_preprocessing + +LOCAL_MODULE:= webrtc_apm_process_test + +include external/stlport/libstlport.mk +include $(BUILD_EXECUTABLE) diff --git a/src/modules/audio_processing/main/test/process_test/apmtest.m b/src/modules/audio_processing/main/test/process_test/apmtest.m new file mode 100644 index 0000000000..6152bb5a9a --- /dev/null +++ b/src/modules/audio_processing/main/test/process_test/apmtest.m @@ -0,0 +1,360 @@ +function apmtest(task, testname, casenumber, legacy) +%APMTEST is a tool to process APM file sets and easily display the output. +% APMTEST(TASK, TESTNAME, CASENUMBER) performs one of several TASKs: +% 'test' Processes the files to produce test output. +% 'list' Prints a list of cases in the test set, preceded by their +% CASENUMBERs. +% 'show' Uses spclab to show the test case specified by the +% CASENUMBER parameter. +% +% using a set of test files determined by TESTNAME: +% 'all' All tests. +% 'apm' The standard APM test set (default). +% 'apmm' The mobile APM test set. +% 'aec' The AEC test set. +% 'aecm' The AECM test set. +% 'agc' The AGC test set. +% 'ns' The NS test set. +% 'vad' The VAD test set. +% +% CASENUMBER can be used to select a single test case. Omit CASENUMBER, +% or set to zero, to use all test cases. +% + +if nargin < 4 + % Set to true to run old VQE recordings. + legacy = false; +end + +if nargin < 3 + casenumber = 0; +end + +if nargin < 2 + task = 'test'; +end + +if nargin < 1 + testname = 'all'; +end + +if ~strcmp(task, 'test') && ~strcmp(task, 'list') && ~strcmp(task, 'show') + error(['TASK ' task ' is not recognized']); +end + +if casenumber == 0 && strcmp(task, 'show') + error(['CASENUMBER must be specified for TASK ' task]); +end + +filepath = 'data/'; +inpath = [filepath 'input/']; +outpath = [filepath 'output/']; +refpath = [filepath 'reference/']; + +% Temporary +if legacy + refpath = [filepath 'output/']; + outpath = [filepath 'reference/']; +end + +if strcmp(testname, 'all') + tests = {'apm','apmm','aec','aecm','agc','ns','vad'}; +else + tests = {testname}; +end + +if legacy + progname = '/usr/local/google/p4/dev/depot/test'; +else + progname = './process_test'; +end + +global farFile; +global nearFile; +global eventFile; +global delayFile; +global driftFile; + +if legacy + farFile = 'vqeFar.pcm'; + nearFile = 'vqeNear.pcm'; + eventFile = 'vqeEvent.dat'; + delayFile = 'vqeBuf.dat'; + driftFile = 'vqeDrift.dat'; +else + farFile = 'apm_far.pcm'; + nearFile = 'apm_near.pcm'; + eventFile = 'apm_event.dat'; + delayFile = 'apm_delay.dat'; + driftFile = 'apm_drift.dat'; +end + +simulateMode = false; +nErr = 0; +nCases = 0; +for i=1:length(tests) + simulateMode = false; + + if strcmp(tests{i}, 'apm') + testdir = ['apm/']; + outfile = ['out']; + if legacy + opt = ['-ec 1 -agc 2 -nc 2 -vad 3']; + else + opt = ['--no_progress -hpf' ... + ' -aec --drift_compensation -agc --fixed_digital' ... + ' -ns --ns_moderate -vad']; + end + + elseif strcmp(tests{i}, 'apm-swb') + simulateMode = true; + testdir = ['apm-swb/']; + outfile = ['out']; + if legacy + opt = ['-fs 32000 -ec 1 -agc 2 -nc 2']; + else + opt = ['--no_progress -fs 32000 -hpf' ... + ' -aec --drift_compensation -agc --adaptive_digital' ... + ' -ns --ns_moderate -vad']; + end + elseif strcmp(tests{i}, 'apmm') + testdir = ['apmm/']; + outfile = ['out']; + opt = ['-aec --drift_compensation -agc --fixed_digital -hpf -ns ' ... + '--ns_moderate']; + + else + error(['TESTNAME ' tests{i} ' is not recognized']); + end + + inpath = [inpath testdir]; + outpath = [outpath testdir]; + refpath = [refpath testdir]; + + if ~exist(inpath,'dir') + error(['Input directory ' inpath ' does not exist']); + end + + if ~exist(refpath,'dir') + warning(['Reference directory ' refpath ' does not exist']); + end + + [status, errMsg] = mkdir(outpath); + if (status == 0) + error(errMsg); + end + + [nErr, nCases] = recurseDir(inpath, outpath, refpath, outfile, ... + progname, opt, simulateMode, nErr, nCases, task, casenumber, legacy); + + if strcmp(task, 'test') || strcmp(task, 'show') + system(['rm ' farFile]); + system(['rm ' nearFile]); + if simulateMode == false + system(['rm ' eventFile]); + system(['rm ' delayFile]); + system(['rm ' driftFile]); + end + end +end + +if ~strcmp(task, 'list') + if nErr == 0 + fprintf(1, '\nAll files are bit-exact to reference\n', nErr); + else + fprintf(1, '\n%d files are NOT bit-exact to reference\n', nErr); + end +end + + +function [nErrOut, nCases] = recurseDir(inpath, outpath, refpath, ... + outfile, progname, opt, simulateMode, nErr, nCases, task, casenumber, ... + legacy) + +global farFile; +global nearFile; +global eventFile; +global delayFile; +global driftFile; + +dirs = dir(inpath); +nDirs = 0; +nErrOut = nErr; +for i=3:length(dirs) % skip . and .. + nDirs = nDirs + dirs(i).isdir; +end + + +if nDirs == 0 + nCases = nCases + 1; + + if casenumber == nCases || casenumber == 0 + + if strcmp(task, 'list') + fprintf([num2str(nCases) '. ' outfile '\n']) + else + vadoutfile = ['vad_' outfile '.dat']; + outfile = [outfile '.pcm']; + + % Check for VAD test + vadTest = 0; + if ~isempty(findstr(opt, '-vad')) + vadTest = 1; + if legacy + opt = [opt ' ' outpath vadoutfile]; + else + opt = [opt ' --vad_out_file ' outpath vadoutfile]; + end + end + + if exist([inpath 'vqeFar.pcm']) + system(['ln -s -f ' inpath 'vqeFar.pcm ' farFile]); + elseif exist([inpath 'apm_far.pcm']) + system(['ln -s -f ' inpath 'apm_far.pcm ' farFile]); + end + + if exist([inpath 'vqeNear.pcm']) + system(['ln -s -f ' inpath 'vqeNear.pcm ' nearFile]); + elseif exist([inpath 'apm_near.pcm']) + system(['ln -s -f ' inpath 'apm_near.pcm ' nearFile]); + end + + if exist([inpath 'vqeEvent.dat']) + system(['ln -s -f ' inpath 'vqeEvent.dat ' eventFile]); + elseif exist([inpath 'apm_event.day']) + system(['ln -s -f ' inpath 'apm_event.dat ' eventFile]); + end + + if exist([inpath 'vqeBuf.dat']) + system(['ln -s -f ' inpath 'vqeBuf.dat ' delayFile]); + elseif exist([inpath 'apm_delay.day']) + system(['ln -s -f ' inpath 'apm_delay.dat ' delayFile]); + end + + if exist([inpath 'vqeSkew.dat']) + system(['ln -s -f ' inpath 'vqeSkew.dat ' driftFile]); + elseif exist([inpath 'vqeDrift.dat']) + system(['ln -s -f ' inpath 'vqeDrift.dat ' driftFile]); + elseif exist([inpath 'apm_drift.dat']) + system(['ln -s -f ' inpath 'apm_drift.dat ' driftFile]); + end + + if simulateMode == false + command = [progname ' -o ' outpath outfile ' ' opt]; + else + if legacy + inputCmd = [' -in ' nearFile]; + else + inputCmd = [' -i ' nearFile]; + end + + if exist([farFile]) + if legacy + inputCmd = [' -if ' farFile inputCmd]; + else + inputCmd = [' -ir ' farFile inputCmd]; + end + end + command = [progname inputCmd ' -o ' outpath outfile ' ' opt]; + end + % This prevents MATLAB from using its own C libraries. + shellcmd = ['bash -c "unset LD_LIBRARY_PATH;']; + fprintf([command '\n']); + [status, result] = system([shellcmd command '"']); + fprintf(result); + + fprintf(['Reference file: ' refpath outfile '\n']); + + if vadTest == 1 + equal_to_ref = are_files_equal([outpath vadoutfile], ... + [refpath vadoutfile], ... + 'int8'); + if ~equal_to_ref + nErr = nErr + 1; + end + end + + [equal_to_ref, diffvector] = are_files_equal([outpath outfile], ... + [refpath outfile], ... + 'int16'); + if ~equal_to_ref + nErr = nErr + 1; + end + + if strcmp(task, 'show') + % Assume the last init gives the sample rate of interest. + str_idx = strfind(result, 'Sample rate:'); + fs = str2num(result(str_idx(end) + 13:str_idx(end) + 17)); + fprintf('Using %d Hz\n', fs); + + if exist([farFile]) + spclab(fs, farFile, nearFile, [refpath outfile], ... + [outpath outfile], diffvector); + %spclab(fs, diffvector); + else + spclab(fs, nearFile, [refpath outfile], [outpath outfile], ... + diffvector); + %spclab(fs, diffvector); + end + + if vadTest == 1 + spclab([refpath vadoutfile], [outpath vadoutfile]); + end + end + end + end +else + + for i=3:length(dirs) + if dirs(i).isdir + [nErr, nCases] = recurseDir([inpath dirs(i).name '/'], outpath, ... + refpath,[outfile '_' dirs(i).name], progname, opt, ... + simulateMode, nErr, nCases, task, casenumber, legacy); + end + end +end +nErrOut = nErr; + +function [are_equal, diffvector] = ... + are_files_equal(newfile, reffile, precision, diffvector) + +are_equal = false; +diffvector = 0; +if ~exist(newfile,'file') + warning(['Output file ' newfile ' does not exist']); + return +end + +if ~exist(reffile,'file') + warning(['Reference file ' reffile ' does not exist']); + return +end + +fid = fopen(newfile,'rb'); +new = fread(fid,inf,precision); +fclose(fid); + +fid = fopen(reffile,'rb'); +ref = fread(fid,inf,precision); +fclose(fid); + +if length(new) ~= length(ref) + warning('Reference is not the same length as output'); + minlength = min(length(new), length(ref)); + new = new(1:minlength); + ref = ref(1:minlength); +end +diffvector = new - ref; + +if isequal(new, ref) + fprintf([newfile ' is bit-exact to reference\n']); + are_equal = true; +else + if isempty(new) + warning([newfile ' is empty']); + return + end + snr = snrseg(new,ref,80); + fprintf('\n'); + are_equal = false; +end diff --git a/src/modules/audio_processing/main/test/process_test/process_test.cc b/src/modules/audio_processing/main/test/process_test/process_test.cc new file mode 100644 index 0000000000..c62345fcf0 --- /dev/null +++ b/src/modules/audio_processing/main/test/process_test/process_test.cc @@ -0,0 +1,628 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> +#include <string.h> +#ifdef WEBRTC_ANDROID +#include <sys/stat.h> +#endif + +#include "tick_util.h" +#include "gtest/gtest.h" +#include "module_common_types.h" + +#include "audio_processing.h" + +#include "cpu_features_wrapper.h" + +using webrtc::AudioFrame; +using webrtc::TickInterval; +using webrtc::TickTime; + +using webrtc::AudioProcessing; +using webrtc::GainControl; +using webrtc::NoiseSuppression; + +void usage() { + printf( + "Usage: process_test [options] [-ir REVERSE_FILE] [-i PRIMARY_FILE]\n"); + printf( + " [-o OUT_FILE]\n"); + printf( + "process_test is a test application for AudioProcessing.\n\n" + "When -ir or -i is specified the files will be processed directly in a\n" + "simulation mode. Otherwise the full set of test files is expected to be\n" + "present in the working directory.\n"); + printf("\n"); + printf("Options\n"); + printf("General configuration:\n"); + printf(" -fs SAMPLE_RATE_HZ\n"); + printf(" -ch CHANNELS_IN CHANNELS_OUT\n"); + printf(" -rch REVERSE_CHANNELS\n"); + printf("\n"); + printf("Component configuration:\n"); + printf( + "All components are disabled by default. Each block below begins with a\n" + "flag to enable the component with default settings. The subsequent flags\n" + "in the block are used to provide configuration settings.\n"); + printf("\n -aec Echo cancellation\n"); + printf(" --drift_compensation\n"); + printf(" --no_drift_compensation\n"); + printf("\n -aecm Echo control mobile\n"); + printf("\n -agc Gain control\n"); + printf(" --analog\n"); + printf(" --adaptive_digital\n"); + printf(" --fixed_digital\n"); + printf(" --target_level LEVEL\n"); + printf(" --compression_gain GAIN\n"); + printf(" --limiter\n"); + printf(" --no_limiter\n"); + printf("\n -hpf High pass filter\n"); + printf("\n -ns Noise suppression\n"); + printf(" --ns_low\n"); + printf(" --ns_moderate\n"); + printf(" --ns_high\n"); + printf(" --ns_very_high\n"); + printf("\n -vad Voice activity detection\n"); + printf(" --vad_out_file FILE"); + printf("\n"); + printf("Modifiers:\n"); + printf(" --perf Measure performance.\n"); + printf(" --quiet Suppress text output.\n"); + printf(" --no_progress Suppress progress.\n"); + printf(" --version Print version information and exit.\n"); +} + +// void function for gtest. +void void_main(int argc, char* argv[]) { + if (argc > 1 && strcmp(argv[1], "--help") == 0) { + usage(); + return; + } + + if (argc < 2) { + printf("Did you mean to run without arguments?\n"); + printf("Try `process_test --help' for more information.\n\n"); + } + + AudioProcessing* apm = AudioProcessing::Create(0); + ASSERT_TRUE(apm != NULL); + + WebRtc_Word8 version[1024]; + WebRtc_UWord32 version_bytes_remaining = sizeof(version); + WebRtc_UWord32 version_position = 0; + + const char* far_filename = NULL; + const char* near_filename = NULL; + const char* out_filename = NULL; + const char* vad_out_filename = NULL; + + int32_t sample_rate_hz = 16000; + int32_t device_sample_rate_hz = 16000; + + int num_capture_input_channels = 1; + int num_capture_output_channels = 1; + int num_render_channels = 1; + + int samples_per_channel = sample_rate_hz / 100; + + bool simulating = false; + bool perf_testing = false; + bool verbose = true; + bool progress = true; + //bool interleaved = true; + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "-ir") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify filename after -ir"; + far_filename = argv[i]; + simulating = true; + + } else if (strcmp(argv[i], "-i") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify filename after -i"; + near_filename = argv[i]; + simulating = true; + + } else if (strcmp(argv[i], "-o") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify filename after -o"; + out_filename = argv[i]; + + } else if (strcmp(argv[i], "-fs") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify sample rate after -fs"; + ASSERT_EQ(1, sscanf(argv[i], "%d", &sample_rate_hz)); + samples_per_channel = sample_rate_hz / 100; + + ASSERT_EQ(apm->kNoError, + apm->set_sample_rate_hz(sample_rate_hz)); + + } else if (strcmp(argv[i], "-ch") == 0) { + i++; + ASSERT_LT(i + 1, argc) << "Specify number of channels after -ch"; + ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_input_channels)); + i++; + ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_output_channels)); + + ASSERT_EQ(apm->kNoError, + apm->set_num_channels(num_capture_input_channels, + num_capture_output_channels)); + + } else if (strcmp(argv[i], "-rch") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify number of channels after -rch"; + ASSERT_EQ(1, sscanf(argv[i], "%d", &num_render_channels)); + + ASSERT_EQ(apm->kNoError, + apm->set_num_reverse_channels(num_render_channels)); + + } else if (strcmp(argv[i], "-aec") == 0) { + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); + + } else if (strcmp(argv[i], "-noasm") == 0) { + WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM; + + } else if (strcmp(argv[i], "--drift_compensation") == 0) { + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); + // TODO(ajm): this is enabled in the VQE test app by default. Investigate + // why it can give better performance despite passing zeros. + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->enable_drift_compensation(true)); + } else if (strcmp(argv[i], "--no_drift_compensation") == 0) { + ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->enable_drift_compensation(false)); + + } else if (strcmp(argv[i], "-aecm") == 0) { + ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true)); + + } else if (strcmp(argv[i], "-agc") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + + } else if (strcmp(argv[i], "--analog") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); + + } else if (strcmp(argv[i], "--adaptive_digital") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); + + } else if (strcmp(argv[i], "--fixed_digital") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_mode(GainControl::kFixedDigital)); + + } else if (strcmp(argv[i], "--target_level") == 0) { + i++; + int level; + ASSERT_EQ(1, sscanf(argv[i], "%d", &level)); + + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_target_level_dbfs(level)); + + } else if (strcmp(argv[i], "--compression_gain") == 0) { + i++; + int gain; + ASSERT_EQ(1, sscanf(argv[i], "%d", &gain)); + + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_compression_gain_db(gain)); + + } else if (strcmp(argv[i], "--limiter") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->enable_limiter(true)); + + } else if (strcmp(argv[i], "--no_limiter") == 0) { + ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->gain_control()->enable_limiter(false)); + + } else if (strcmp(argv[i], "-hpf") == 0) { + ASSERT_EQ(apm->kNoError, apm->high_pass_filter()->Enable(true)); + + } else if (strcmp(argv[i], "-ns") == 0) { + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + + } else if (strcmp(argv[i], "--ns_low") == 0) { + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->noise_suppression()->set_level(NoiseSuppression::kLow)); + + } else if (strcmp(argv[i], "--ns_moderate") == 0) { + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->noise_suppression()->set_level(NoiseSuppression::kModerate)); + + } else if (strcmp(argv[i], "--ns_high") == 0) { + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->noise_suppression()->set_level(NoiseSuppression::kHigh)); + + } else if (strcmp(argv[i], "--ns_very_high") == 0) { + ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); + ASSERT_EQ(apm->kNoError, + apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh)); + + } else if (strcmp(argv[i], "-vad") == 0) { + ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); + + } else if (strcmp(argv[i], "--vad_out_file") == 0) { + i++; + ASSERT_LT(i, argc) << "Specify filename after --vad_out_file"; + vad_out_filename = argv[i]; + + } else if (strcmp(argv[i], "--perf") == 0) { + perf_testing = true; + + } else if (strcmp(argv[i], "--quiet") == 0) { + verbose = false; + progress = false; + + } else if (strcmp(argv[i], "--no_progress") == 0) { + progress = false; + + } else if (strcmp(argv[i], "--version") == 0) { + ASSERT_EQ(apm->kNoError, apm->Version(version, + version_bytes_remaining, + version_position)); + printf("%s\n", version); + return; + + } else { + FAIL() << "Unrecognized argument " << argv[i]; + } + } + + if (verbose) { + printf("Sample rate: %d Hz\n", sample_rate_hz); + printf("Primary channels: %d (in), %d (out)\n", + num_capture_input_channels, + num_capture_output_channels); + printf("Reverse channels: %d \n", num_render_channels); + } + + const char far_file_default[] = "apm_far.pcm"; + const char near_file_default[] = "apm_near.pcm"; + const char out_file_default[] = "out.pcm"; + const char event_filename[] = "apm_event.dat"; + const char delay_filename[] = "apm_delay.dat"; + const char drift_filename[] = "apm_drift.dat"; + const char vad_file_default[] = "vad_out.dat"; + + if (!simulating) { + far_filename = far_file_default; + near_filename = near_file_default; + } + + if (out_filename == NULL) { + out_filename = out_file_default; + } + + if (vad_out_filename == NULL) { + vad_out_filename = vad_file_default; + } + + FILE* far_file = NULL; + FILE* near_file = NULL; + FILE* out_file = NULL; + FILE* event_file = NULL; + FILE* delay_file = NULL; + FILE* drift_file = NULL; + FILE* vad_out_file = NULL; + + if (far_filename != NULL) { + far_file = fopen(far_filename, "rb"); + ASSERT_TRUE(NULL != far_file) << "Unable to open far-end audio file " + << far_filename; + } + + near_file = fopen(near_filename, "rb"); + ASSERT_TRUE(NULL != near_file) << "Unable to open near-end audio file " + << near_filename; + struct stat st; + stat(near_filename, &st); + int near_size_samples = st.st_size / sizeof(int16_t); + + out_file = fopen(out_filename, "wb"); + ASSERT_TRUE(NULL != out_file) << "Unable to open output audio file " + << out_filename; + + if (!simulating) { + event_file = fopen(event_filename, "rb"); + ASSERT_TRUE(NULL != event_file) << "Unable to open event file " + << event_filename; + + delay_file = fopen(delay_filename, "rb"); + ASSERT_TRUE(NULL != delay_file) << "Unable to open buffer file " + << delay_filename; + + drift_file = fopen(drift_filename, "rb"); + ASSERT_TRUE(NULL != drift_file) << "Unable to open drift file " + << drift_filename; + } + + if (apm->voice_detection()->is_enabled()) { + vad_out_file = fopen(vad_out_filename, "wb"); + ASSERT_TRUE(NULL != vad_out_file) << "Unable to open VAD output file " + << vad_out_file; + } + + enum Events { + kInitializeEvent, + kRenderEvent, + kCaptureEvent, + kResetEventDeprecated + }; + int16_t event = 0; + size_t read_count = 0; + int reverse_count = 0; + int primary_count = 0; + int near_read_samples = 0; + TickInterval acc_ticks; + + AudioFrame far_frame; + far_frame._frequencyInHz = sample_rate_hz; + + AudioFrame near_frame; + near_frame._frequencyInHz = sample_rate_hz; + + int delay_ms = 0; + int drift_samples = 0; + int capture_level = 127; + int8_t stream_has_voice = 0; + + TickTime t0 = TickTime::Now(); + TickTime t1 = t0; + WebRtc_Word64 max_time_us = 0; + WebRtc_Word64 max_time_reverse_us = 0; + WebRtc_Word64 min_time_us = 1e6; + WebRtc_Word64 min_time_reverse_us = 1e6; + + while (simulating || feof(event_file) == 0) { + std::ostringstream trace_stream; + trace_stream << "Processed frames: " << reverse_count << " (reverse), " + << primary_count << " (primary)"; + SCOPED_TRACE(trace_stream.str()); + + + if (simulating) { + if (far_file == NULL) { + event = kCaptureEvent; + } else { + if (event == kRenderEvent) { + event = kCaptureEvent; + } else { + event = kRenderEvent; + } + } + } else { + read_count = fread(&event, sizeof(event), 1, event_file); + if (read_count != 1) { + break; + } + //if (fread(&event, sizeof(event), 1, event_file) != 1) { + // break; // This is expected. + //} + } + + if (event == kInitializeEvent || event == kResetEventDeprecated) { + ASSERT_EQ(1u, + fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file)); + samples_per_channel = sample_rate_hz / 100; + + ASSERT_EQ(1u, + fread(&device_sample_rate_hz, + sizeof(device_sample_rate_hz), + 1, + event_file)); + + ASSERT_EQ(apm->kNoError, + apm->set_sample_rate_hz(sample_rate_hz)); + + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->set_device_sample_rate_hz( + device_sample_rate_hz)); + + far_frame._frequencyInHz = sample_rate_hz; + near_frame._frequencyInHz = sample_rate_hz; + + if (verbose) { + printf("Init at frame: %d (primary), %d (reverse)\n", + primary_count, reverse_count); + printf(" Sample rate: %d Hz\n", sample_rate_hz); + } + + } else if (event == kRenderEvent) { + reverse_count++; + far_frame._audioChannel = num_render_channels; + far_frame._payloadDataLengthInSamples = + num_render_channels * samples_per_channel; + + read_count = fread(far_frame._payloadData, + sizeof(WebRtc_Word16), + far_frame._payloadDataLengthInSamples, + far_file); + + if (simulating) { + if (read_count != far_frame._payloadDataLengthInSamples) { + break; // This is expected. + } + } else { + ASSERT_EQ(read_count, + far_frame._payloadDataLengthInSamples); + } + + if (perf_testing) { + t0 = TickTime::Now(); + } + + ASSERT_EQ(apm->kNoError, + apm->AnalyzeReverseStream(&far_frame)); + + if (perf_testing) { + t1 = TickTime::Now(); + TickInterval tick_diff = t1 - t0; + acc_ticks += tick_diff; + if (tick_diff.Microseconds() > max_time_reverse_us) { + max_time_reverse_us = tick_diff.Microseconds(); + } + if (tick_diff.Microseconds() < min_time_reverse_us) { + min_time_reverse_us = tick_diff.Microseconds(); + } + } + + } else if (event == kCaptureEvent) { + primary_count++; + near_frame._audioChannel = num_capture_input_channels; + near_frame._payloadDataLengthInSamples = + num_capture_input_channels * samples_per_channel; + + read_count = fread(near_frame._payloadData, + sizeof(WebRtc_Word16), + near_frame._payloadDataLengthInSamples, + near_file); + + near_read_samples += read_count; + if (progress && primary_count % 100 == 0) { + printf("%.0f%% complete\r", + (near_read_samples * 100.0) / near_size_samples); + fflush(stdout); + } + if (simulating) { + if (read_count != near_frame._payloadDataLengthInSamples) { + break; // This is expected. + } + + delay_ms = 0; + drift_samples = 0; + } else { + ASSERT_EQ(read_count, + near_frame._payloadDataLengthInSamples); + + // TODO(ajm): sizeof(delay_ms) for current files? + ASSERT_EQ(1u, + fread(&delay_ms, 2, 1, delay_file)); + ASSERT_EQ(1u, + fread(&drift_samples, sizeof(drift_samples), 1, drift_file)); + } + + if (perf_testing) { + t0 = TickTime::Now(); + } + + // TODO(ajm): fake an analog gain while simulating. + + int capture_level_in = capture_level; + ASSERT_EQ(apm->kNoError, + apm->gain_control()->set_stream_analog_level(capture_level)); + ASSERT_EQ(apm->kNoError, + apm->set_stream_delay_ms(delay_ms)); + ASSERT_EQ(apm->kNoError, + apm->echo_cancellation()->set_stream_drift_samples(drift_samples)); + + int err = apm->ProcessStream(&near_frame); + if (err == apm->kBadStreamParameterWarning) { + printf("Bad parameter warning. %s\n", trace_stream.str().c_str()); + } + ASSERT_TRUE(err == apm->kNoError || + err == apm->kBadStreamParameterWarning); + + capture_level = apm->gain_control()->stream_analog_level(); + + stream_has_voice = + static_cast<int8_t>(apm->voice_detection()->stream_has_voice()); + if (vad_out_file != NULL) { + ASSERT_EQ(1u, fwrite(&stream_has_voice, + sizeof(stream_has_voice), + 1, + vad_out_file)); + } + + if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) { + ASSERT_EQ(capture_level_in, capture_level); + } + + if (perf_testing) { + t1 = TickTime::Now(); + TickInterval tick_diff = t1 - t0; + acc_ticks += tick_diff; + if (tick_diff.Microseconds() > max_time_us) { + max_time_us = tick_diff.Microseconds(); + } + if (tick_diff.Microseconds() < min_time_us) { + min_time_us = tick_diff.Microseconds(); + } + } + + ASSERT_EQ(near_frame._payloadDataLengthInSamples, + fwrite(near_frame._payloadData, + sizeof(WebRtc_Word16), + near_frame._payloadDataLengthInSamples, + out_file)); + } + else { + FAIL() << "Event " << event << " is unrecognized"; + } + } + + if (verbose) { + printf("\nProcessed frames: %d (primary), %d (reverse)\n", + primary_count, reverse_count); + } + + int8_t temp_int8; + if (far_file != NULL) { + read_count = fread(&temp_int8, sizeof(temp_int8), 1, far_file); + EXPECT_NE(0, feof(far_file)) << "Far-end file not fully processed"; + } + read_count = fread(&temp_int8, sizeof(temp_int8), 1, near_file); + EXPECT_NE(0, feof(near_file)) << "Near-end file not fully processed"; + + if (!simulating) { + read_count = fread(&temp_int8, sizeof(temp_int8), 1, event_file); + EXPECT_NE(0, feof(event_file)) << "Event file not fully processed"; + read_count = fread(&temp_int8, sizeof(temp_int8), 1, delay_file); + EXPECT_NE(0, feof(delay_file)) << "Delay file not fully processed"; + read_count = fread(&temp_int8, sizeof(temp_int8), 1, drift_file); + EXPECT_NE(0, feof(drift_file)) << "Drift file not fully processed"; + } + + if (perf_testing) { + if (primary_count > 0) { + WebRtc_Word64 exec_time = acc_ticks.Milliseconds(); + printf("\nTotal time: %.3f s, file time: %.2f s\n", + exec_time * 0.001, primary_count * 0.01); + printf("Time per frame: %.3f ms (average), %.3f ms (max)," + " %.3f ms (min)\n", + (exec_time * 1.0) / primary_count, + (max_time_us + max_time_reverse_us) / 1000.0, + (min_time_us + min_time_reverse_us) / 1000.0); + } else { + printf("Warning: no capture frames\n"); + } + } + + AudioProcessing::Destroy(apm); + apm = NULL; +} + +int main(int argc, char* argv[]) +{ + void_main(argc, argv); + + return 0; +} diff --git a/src/modules/audio_processing/main/test/unit_test/Android.mk b/src/modules/audio_processing/main/test/unit_test/Android.mk new file mode 100644 index 0000000000..b2029cfb4d --- /dev/null +++ b/src/modules/audio_processing/main/test/unit_test/Android.mk @@ -0,0 +1,49 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +LOCAL_PATH:= $(call my-dir) + +# apm test app + +include $(CLEAR_VARS) + +LOCAL_MODULE_TAGS := tests +LOCAL_CPP_EXTENSION := .cc +LOCAL_SRC_FILES:= \ + unit_test.cc + +# Flags passed to both C and C++ files. +LOCAL_CFLAGS := \ + '-DWEBRTC_TARGET_PC' \ + '-DWEBRTC_LINUX' \ + '-DWEBRTC_THREAD_RR' \ + '-DWEBRTC_ANDROID' \ + '-DANDROID' + +LOCAL_CPPFLAGS := +LOCAL_LDFLAGS := +LOCAL_C_INCLUDES := \ + external/gtest/include \ + $(LOCAL_PATH)/../../../../../system_wrappers/interface \ + $(LOCAL_PATH)/../../../../../common_audio/signal_processing_library/main/interface \ + $(LOCAL_PATH)/../../interface \ + $(LOCAL_PATH)/../../../../interface \ + $(LOCAL_PATH)/../../../../.. + +LOCAL_STATIC_LIBRARIES := \ + libgtest + +LOCAL_SHARED_LIBRARIES := \ + libutils \ + libstlport \ + libwebrtc_audio_preprocessing + +LOCAL_MODULE:= webrtc_apm_unit_test + +include external/stlport/libstlport.mk +include $(BUILD_EXECUTABLE) diff --git a/src/modules/audio_processing/main/test/unit_test/audio_processing_unittest.pb.cc b/src/modules/audio_processing/main/test/unit_test/audio_processing_unittest.pb.cc new file mode 100644 index 0000000000..c82ffdb43e --- /dev/null +++ b/src/modules/audio_processing/main/test/unit_test/audio_processing_unittest.pb.cc @@ -0,0 +1,1111 @@ +// Generated by the protocol buffer compiler. DO NOT EDIT! + +#define INTERNAL_SUPPRESS_PROTOBUF_FIELD_DEPRECATION +#include "audio_processing_unittest.pb.h" + +#include <algorithm> + +#include <google/protobuf/stubs/once.h> +#include <google/protobuf/io/coded_stream.h> +#include <google/protobuf/wire_format_lite_inl.h> +// @@protoc_insertion_point(includes) + +namespace audio_processing_unittest { + +void protobuf_ShutdownFile_audio_5fprocessing_5funittest_2eproto() { + delete Test::default_instance_; + delete Test_Statistic::default_instance_; + delete Test_EchoMetrics::default_instance_; + delete OutputData::default_instance_; +} + +void protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto() { + static bool already_here = false; + if (already_here) return; + already_here = true; + GOOGLE_PROTOBUF_VERIFY_VERSION; + + Test::default_instance_ = new Test(); + Test_Statistic::default_instance_ = new Test_Statistic(); + Test_EchoMetrics::default_instance_ = new Test_EchoMetrics(); + OutputData::default_instance_ = new OutputData(); + Test::default_instance_->InitAsDefaultInstance(); + Test_Statistic::default_instance_->InitAsDefaultInstance(); + Test_EchoMetrics::default_instance_->InitAsDefaultInstance(); + OutputData::default_instance_->InitAsDefaultInstance(); + ::google::protobuf::internal::OnShutdown(&protobuf_ShutdownFile_audio_5fprocessing_5funittest_2eproto); +} + +// Force AddDescriptors() to be called at static initialization time. +struct StaticDescriptorInitializer_audio_5fprocessing_5funittest_2eproto { + StaticDescriptorInitializer_audio_5fprocessing_5funittest_2eproto() { + protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto(); + } +} static_descriptor_initializer_audio_5fprocessing_5funittest_2eproto_; + + +// =================================================================== + +#ifndef _MSC_VER +const int Test_Statistic::kInstantFieldNumber; +const int Test_Statistic::kAverageFieldNumber; +const int Test_Statistic::kMaximumFieldNumber; +const int Test_Statistic::kMinimumFieldNumber; +#endif // !_MSC_VER + +Test_Statistic::Test_Statistic() + : ::google::protobuf::MessageLite() { + SharedCtor(); +} + +void Test_Statistic::InitAsDefaultInstance() { +} + +Test_Statistic::Test_Statistic(const Test_Statistic& from) + : ::google::protobuf::MessageLite() { + SharedCtor(); + MergeFrom(from); +} + +void Test_Statistic::SharedCtor() { + _cached_size_ = 0; + instant_ = 0; + average_ = 0; + maximum_ = 0; + minimum_ = 0; + ::memset(_has_bits_, 0, sizeof(_has_bits_)); +} + +Test_Statistic::~Test_Statistic() { + SharedDtor(); +} + +void Test_Statistic::SharedDtor() { + if (this != default_instance_) { + } +} + +void Test_Statistic::SetCachedSize(int size) const { + GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); + _cached_size_ = size; + GOOGLE_SAFE_CONCURRENT_WRITES_END(); +} +const Test_Statistic& Test_Statistic::default_instance() { + if (default_instance_ == NULL) protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto(); return *default_instance_; +} + +Test_Statistic* Test_Statistic::default_instance_ = NULL; + +Test_Statistic* Test_Statistic::New() const { + return new Test_Statistic; +} + +void Test_Statistic::Clear() { + if (_has_bits_[0 / 32] & (0xffu << (0 % 32))) { + instant_ = 0; + average_ = 0; + maximum_ = 0; + minimum_ = 0; + } + ::memset(_has_bits_, 0, sizeof(_has_bits_)); +} + +bool Test_Statistic::MergePartialFromCodedStream( + ::google::protobuf::io::CodedInputStream* input) { +#define DO_(EXPRESSION) if (!(EXPRESSION)) return false + ::google::protobuf::uint32 tag; + while ((tag = input->ReadTag()) != 0) { + switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { + // optional int32 instant = 1; + case 1: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_VARINT) { + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, &instant_))); + set_has_instant(); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(16)) goto parse_average; + break; + } + + // optional int32 average = 2; + case 2: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_VARINT) { + parse_average: + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, &average_))); + set_has_average(); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(24)) goto parse_maximum; + break; + } + + // optional int32 maximum = 3; + case 3: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_VARINT) { + parse_maximum: + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, &maximum_))); + set_has_maximum(); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(32)) goto parse_minimum; + break; + } + + // optional int32 minimum = 4; + case 4: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_VARINT) { + parse_minimum: + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, &minimum_))); + set_has_minimum(); + } else { + goto handle_uninterpreted; + } + if (input->ExpectAtEnd()) return true; + break; + } + + default: { + handle_uninterpreted: + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) { + return true; + } + DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag, NULL)); + break; + } + } + } + return true; +#undef DO_ +} + +void Test_Statistic::SerializeWithCachedSizes( + ::google::protobuf::io::CodedOutputStream* output) const { + // optional int32 instant = 1; + if (has_instant()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(1, this->instant(), output); + } + + // optional int32 average = 2; + if (has_average()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(2, this->average(), output); + } + + // optional int32 maximum = 3; + if (has_maximum()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(3, this->maximum(), output); + } + + // optional int32 minimum = 4; + if (has_minimum()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(4, this->minimum(), output); + } + +} + +int Test_Statistic::ByteSize() const { + int total_size = 0; + + if (_has_bits_[0 / 32] & (0xffu << (0 % 32))) { + // optional int32 instant = 1; + if (has_instant()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->instant()); + } + + // optional int32 average = 2; + if (has_average()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->average()); + } + + // optional int32 maximum = 3; + if (has_maximum()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->maximum()); + } + + // optional int32 minimum = 4; + if (has_minimum()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->minimum()); + } + + } + GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); + _cached_size_ = total_size; + GOOGLE_SAFE_CONCURRENT_WRITES_END(); + return total_size; +} + +void Test_Statistic::CheckTypeAndMergeFrom( + const ::google::protobuf::MessageLite& from) { + MergeFrom(*::google::protobuf::down_cast<const Test_Statistic*>(&from)); +} + +void Test_Statistic::MergeFrom(const Test_Statistic& from) { + GOOGLE_CHECK_NE(&from, this); + if (from._has_bits_[0 / 32] & (0xffu << (0 % 32))) { + if (from.has_instant()) { + set_instant(from.instant()); + } + if (from.has_average()) { + set_average(from.average()); + } + if (from.has_maximum()) { + set_maximum(from.maximum()); + } + if (from.has_minimum()) { + set_minimum(from.minimum()); + } + } +} + +void Test_Statistic::CopyFrom(const Test_Statistic& from) { + if (&from == this) return; + Clear(); + MergeFrom(from); +} + +bool Test_Statistic::IsInitialized() const { + + return true; +} + +void Test_Statistic::Swap(Test_Statistic* other) { + if (other != this) { + std::swap(instant_, other->instant_); + std::swap(average_, other->average_); + std::swap(maximum_, other->maximum_); + std::swap(minimum_, other->minimum_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + std::swap(_cached_size_, other->_cached_size_); + } +} + +::std::string Test_Statistic::GetTypeName() const { + return "audio_processing_unittest.Test.Statistic"; +} + + +// ------------------------------------------------------------------- + +#ifndef _MSC_VER +const int Test_EchoMetrics::kResidualEchoReturnLossFieldNumber; +const int Test_EchoMetrics::kEchoReturnLossFieldNumber; +const int Test_EchoMetrics::kEchoReturnLossEnhancementFieldNumber; +const int Test_EchoMetrics::kANlpFieldNumber; +#endif // !_MSC_VER + +Test_EchoMetrics::Test_EchoMetrics() + : ::google::protobuf::MessageLite() { + SharedCtor(); +} + +void Test_EchoMetrics::InitAsDefaultInstance() { + residualechoreturnloss_ = const_cast< ::audio_processing_unittest::Test_Statistic*>(&::audio_processing_unittest::Test_Statistic::default_instance()); + echoreturnloss_ = const_cast< ::audio_processing_unittest::Test_Statistic*>(&::audio_processing_unittest::Test_Statistic::default_instance()); + echoreturnlossenhancement_ = const_cast< ::audio_processing_unittest::Test_Statistic*>(&::audio_processing_unittest::Test_Statistic::default_instance()); + anlp_ = const_cast< ::audio_processing_unittest::Test_Statistic*>(&::audio_processing_unittest::Test_Statistic::default_instance()); +} + +Test_EchoMetrics::Test_EchoMetrics(const Test_EchoMetrics& from) + : ::google::protobuf::MessageLite() { + SharedCtor(); + MergeFrom(from); +} + +void Test_EchoMetrics::SharedCtor() { + _cached_size_ = 0; + residualechoreturnloss_ = NULL; + echoreturnloss_ = NULL; + echoreturnlossenhancement_ = NULL; + anlp_ = NULL; + ::memset(_has_bits_, 0, sizeof(_has_bits_)); +} + +Test_EchoMetrics::~Test_EchoMetrics() { + SharedDtor(); +} + +void Test_EchoMetrics::SharedDtor() { + if (this != default_instance_) { + delete residualechoreturnloss_; + delete echoreturnloss_; + delete echoreturnlossenhancement_; + delete anlp_; + } +} + +void Test_EchoMetrics::SetCachedSize(int size) const { + GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); + _cached_size_ = size; + GOOGLE_SAFE_CONCURRENT_WRITES_END(); +} +const Test_EchoMetrics& Test_EchoMetrics::default_instance() { + if (default_instance_ == NULL) protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto(); return *default_instance_; +} + +Test_EchoMetrics* Test_EchoMetrics::default_instance_ = NULL; + +Test_EchoMetrics* Test_EchoMetrics::New() const { + return new Test_EchoMetrics; +} + +void Test_EchoMetrics::Clear() { + if (_has_bits_[0 / 32] & (0xffu << (0 % 32))) { + if (has_residualechoreturnloss()) { + if (residualechoreturnloss_ != NULL) residualechoreturnloss_->::audio_processing_unittest::Test_Statistic::Clear(); + } + if (has_echoreturnloss()) { + if (echoreturnloss_ != NULL) echoreturnloss_->::audio_processing_unittest::Test_Statistic::Clear(); + } + if (has_echoreturnlossenhancement()) { + if (echoreturnlossenhancement_ != NULL) echoreturnlossenhancement_->::audio_processing_unittest::Test_Statistic::Clear(); + } + if (has_anlp()) { + if (anlp_ != NULL) anlp_->::audio_processing_unittest::Test_Statistic::Clear(); + } + } + ::memset(_has_bits_, 0, sizeof(_has_bits_)); +} + +bool Test_EchoMetrics::MergePartialFromCodedStream( + ::google::protobuf::io::CodedInputStream* input) { +#define DO_(EXPRESSION) if (!(EXPRESSION)) return false + ::google::protobuf::uint32 tag; + while ((tag = input->ReadTag()) != 0) { + switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { + // optional .audio_processing_unittest.Test.Statistic residualEchoReturnLoss = 1; + case 1: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED) { + DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual( + input, mutable_residualechoreturnloss())); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(18)) goto parse_echoReturnLoss; + break; + } + + // optional .audio_processing_unittest.Test.Statistic echoReturnLoss = 2; + case 2: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED) { + parse_echoReturnLoss: + DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual( + input, mutable_echoreturnloss())); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(26)) goto parse_echoReturnLossEnhancement; + break; + } + + // optional .audio_processing_unittest.Test.Statistic echoReturnLossEnhancement = 3; + case 3: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED) { + parse_echoReturnLossEnhancement: + DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual( + input, mutable_echoreturnlossenhancement())); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(34)) goto parse_aNlp; + break; + } + + // optional .audio_processing_unittest.Test.Statistic aNlp = 4; + case 4: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED) { + parse_aNlp: + DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual( + input, mutable_anlp())); + } else { + goto handle_uninterpreted; + } + if (input->ExpectAtEnd()) return true; + break; + } + + default: { + handle_uninterpreted: + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) { + return true; + } + DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag, NULL)); + break; + } + } + } + return true; +#undef DO_ +} + +void Test_EchoMetrics::SerializeWithCachedSizes( + ::google::protobuf::io::CodedOutputStream* output) const { + // optional .audio_processing_unittest.Test.Statistic residualEchoReturnLoss = 1; + if (has_residualechoreturnloss()) { + ::google::protobuf::internal::WireFormatLite::WriteMessage( + 1, this->residualechoreturnloss(), output); + } + + // optional .audio_processing_unittest.Test.Statistic echoReturnLoss = 2; + if (has_echoreturnloss()) { + ::google::protobuf::internal::WireFormatLite::WriteMessage( + 2, this->echoreturnloss(), output); + } + + // optional .audio_processing_unittest.Test.Statistic echoReturnLossEnhancement = 3; + if (has_echoreturnlossenhancement()) { + ::google::protobuf::internal::WireFormatLite::WriteMessage( + 3, this->echoreturnlossenhancement(), output); + } + + // optional .audio_processing_unittest.Test.Statistic aNlp = 4; + if (has_anlp()) { + ::google::protobuf::internal::WireFormatLite::WriteMessage( + 4, this->anlp(), output); + } + +} + +int Test_EchoMetrics::ByteSize() const { + int total_size = 0; + + if (_has_bits_[0 / 32] & (0xffu << (0 % 32))) { + // optional .audio_processing_unittest.Test.Statistic residualEchoReturnLoss = 1; + if (has_residualechoreturnloss()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual( + this->residualechoreturnloss()); + } + + // optional .audio_processing_unittest.Test.Statistic echoReturnLoss = 2; + if (has_echoreturnloss()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual( + this->echoreturnloss()); + } + + // optional .audio_processing_unittest.Test.Statistic echoReturnLossEnhancement = 3; + if (has_echoreturnlossenhancement()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual( + this->echoreturnlossenhancement()); + } + + // optional .audio_processing_unittest.Test.Statistic aNlp = 4; + if (has_anlp()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual( + this->anlp()); + } + + } + GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); + _cached_size_ = total_size; + GOOGLE_SAFE_CONCURRENT_WRITES_END(); + return total_size; +} + +void Test_EchoMetrics::CheckTypeAndMergeFrom( + const ::google::protobuf::MessageLite& from) { + MergeFrom(*::google::protobuf::down_cast<const Test_EchoMetrics*>(&from)); +} + +void Test_EchoMetrics::MergeFrom(const Test_EchoMetrics& from) { + GOOGLE_CHECK_NE(&from, this); + if (from._has_bits_[0 / 32] & (0xffu << (0 % 32))) { + if (from.has_residualechoreturnloss()) { + mutable_residualechoreturnloss()->::audio_processing_unittest::Test_Statistic::MergeFrom(from.residualechoreturnloss()); + } + if (from.has_echoreturnloss()) { + mutable_echoreturnloss()->::audio_processing_unittest::Test_Statistic::MergeFrom(from.echoreturnloss()); + } + if (from.has_echoreturnlossenhancement()) { + mutable_echoreturnlossenhancement()->::audio_processing_unittest::Test_Statistic::MergeFrom(from.echoreturnlossenhancement()); + } + if (from.has_anlp()) { + mutable_anlp()->::audio_processing_unittest::Test_Statistic::MergeFrom(from.anlp()); + } + } +} + +void Test_EchoMetrics::CopyFrom(const Test_EchoMetrics& from) { + if (&from == this) return; + Clear(); + MergeFrom(from); +} + +bool Test_EchoMetrics::IsInitialized() const { + + return true; +} + +void Test_EchoMetrics::Swap(Test_EchoMetrics* other) { + if (other != this) { + std::swap(residualechoreturnloss_, other->residualechoreturnloss_); + std::swap(echoreturnloss_, other->echoreturnloss_); + std::swap(echoreturnlossenhancement_, other->echoreturnlossenhancement_); + std::swap(anlp_, other->anlp_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + std::swap(_cached_size_, other->_cached_size_); + } +} + +::std::string Test_EchoMetrics::GetTypeName() const { + return "audio_processing_unittest.Test.EchoMetrics"; +} + + +// ------------------------------------------------------------------- + +#ifndef _MSC_VER +const int Test::kNumReverseChannelsFieldNumber; +const int Test::kNumChannelsFieldNumber; +const int Test::kSampleRateFieldNumber; +const int Test::kHasEchoCountFieldNumber; +const int Test::kHasVoiceCountFieldNumber; +const int Test::kIsSaturatedCountFieldNumber; +const int Test::kEchoMetricsFieldNumber; +#endif // !_MSC_VER + +Test::Test() + : ::google::protobuf::MessageLite() { + SharedCtor(); +} + +void Test::InitAsDefaultInstance() { + echometrics_ = const_cast< ::audio_processing_unittest::Test_EchoMetrics*>(&::audio_processing_unittest::Test_EchoMetrics::default_instance()); +} + +Test::Test(const Test& from) + : ::google::protobuf::MessageLite() { + SharedCtor(); + MergeFrom(from); +} + +void Test::SharedCtor() { + _cached_size_ = 0; + numreversechannels_ = 0; + numchannels_ = 0; + samplerate_ = 0; + hasechocount_ = 0; + hasvoicecount_ = 0; + issaturatedcount_ = 0; + echometrics_ = NULL; + ::memset(_has_bits_, 0, sizeof(_has_bits_)); +} + +Test::~Test() { + SharedDtor(); +} + +void Test::SharedDtor() { + if (this != default_instance_) { + delete echometrics_; + } +} + +void Test::SetCachedSize(int size) const { + GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); + _cached_size_ = size; + GOOGLE_SAFE_CONCURRENT_WRITES_END(); +} +const Test& Test::default_instance() { + if (default_instance_ == NULL) protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto(); return *default_instance_; +} + +Test* Test::default_instance_ = NULL; + +Test* Test::New() const { + return new Test; +} + +void Test::Clear() { + if (_has_bits_[0 / 32] & (0xffu << (0 % 32))) { + numreversechannels_ = 0; + numchannels_ = 0; + samplerate_ = 0; + hasechocount_ = 0; + hasvoicecount_ = 0; + issaturatedcount_ = 0; + if (has_echometrics()) { + if (echometrics_ != NULL) echometrics_->::audio_processing_unittest::Test_EchoMetrics::Clear(); + } + } + ::memset(_has_bits_, 0, sizeof(_has_bits_)); +} + +bool Test::MergePartialFromCodedStream( + ::google::protobuf::io::CodedInputStream* input) { +#define DO_(EXPRESSION) if (!(EXPRESSION)) return false + ::google::protobuf::uint32 tag; + while ((tag = input->ReadTag()) != 0) { + switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { + // optional int32 numReverseChannels = 1; + case 1: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_VARINT) { + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, &numreversechannels_))); + set_has_numreversechannels(); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(16)) goto parse_numChannels; + break; + } + + // optional int32 numChannels = 2; + case 2: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_VARINT) { + parse_numChannels: + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, &numchannels_))); + set_has_numchannels(); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(24)) goto parse_sampleRate; + break; + } + + // optional int32 sampleRate = 3; + case 3: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_VARINT) { + parse_sampleRate: + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, &samplerate_))); + set_has_samplerate(); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(32)) goto parse_hasEchoCount; + break; + } + + // optional int32 hasEchoCount = 4; + case 4: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_VARINT) { + parse_hasEchoCount: + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, &hasechocount_))); + set_has_hasechocount(); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(40)) goto parse_hasVoiceCount; + break; + } + + // optional int32 hasVoiceCount = 5; + case 5: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_VARINT) { + parse_hasVoiceCount: + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, &hasvoicecount_))); + set_has_hasvoicecount(); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(48)) goto parse_isSaturatedCount; + break; + } + + // optional int32 isSaturatedCount = 6; + case 6: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_VARINT) { + parse_isSaturatedCount: + DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< + ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( + input, &issaturatedcount_))); + set_has_issaturatedcount(); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(58)) goto parse_echoMetrics; + break; + } + + // optional .audio_processing_unittest.Test.EchoMetrics echoMetrics = 7; + case 7: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED) { + parse_echoMetrics: + DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual( + input, mutable_echometrics())); + } else { + goto handle_uninterpreted; + } + if (input->ExpectAtEnd()) return true; + break; + } + + default: { + handle_uninterpreted: + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) { + return true; + } + DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag, NULL)); + break; + } + } + } + return true; +#undef DO_ +} + +void Test::SerializeWithCachedSizes( + ::google::protobuf::io::CodedOutputStream* output) const { + // optional int32 numReverseChannels = 1; + if (has_numreversechannels()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(1, this->numreversechannels(), output); + } + + // optional int32 numChannels = 2; + if (has_numchannels()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(2, this->numchannels(), output); + } + + // optional int32 sampleRate = 3; + if (has_samplerate()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(3, this->samplerate(), output); + } + + // optional int32 hasEchoCount = 4; + if (has_hasechocount()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(4, this->hasechocount(), output); + } + + // optional int32 hasVoiceCount = 5; + if (has_hasvoicecount()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(5, this->hasvoicecount(), output); + } + + // optional int32 isSaturatedCount = 6; + if (has_issaturatedcount()) { + ::google::protobuf::internal::WireFormatLite::WriteInt32(6, this->issaturatedcount(), output); + } + + // optional .audio_processing_unittest.Test.EchoMetrics echoMetrics = 7; + if (has_echometrics()) { + ::google::protobuf::internal::WireFormatLite::WriteMessage( + 7, this->echometrics(), output); + } + +} + +int Test::ByteSize() const { + int total_size = 0; + + if (_has_bits_[0 / 32] & (0xffu << (0 % 32))) { + // optional int32 numReverseChannels = 1; + if (has_numreversechannels()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->numreversechannels()); + } + + // optional int32 numChannels = 2; + if (has_numchannels()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->numchannels()); + } + + // optional int32 sampleRate = 3; + if (has_samplerate()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->samplerate()); + } + + // optional int32 hasEchoCount = 4; + if (has_hasechocount()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->hasechocount()); + } + + // optional int32 hasVoiceCount = 5; + if (has_hasvoicecount()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->hasvoicecount()); + } + + // optional int32 isSaturatedCount = 6; + if (has_issaturatedcount()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::Int32Size( + this->issaturatedcount()); + } + + // optional .audio_processing_unittest.Test.EchoMetrics echoMetrics = 7; + if (has_echometrics()) { + total_size += 1 + + ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual( + this->echometrics()); + } + + } + GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); + _cached_size_ = total_size; + GOOGLE_SAFE_CONCURRENT_WRITES_END(); + return total_size; +} + +void Test::CheckTypeAndMergeFrom( + const ::google::protobuf::MessageLite& from) { + MergeFrom(*::google::protobuf::down_cast<const Test*>(&from)); +} + +void Test::MergeFrom(const Test& from) { + GOOGLE_CHECK_NE(&from, this); + if (from._has_bits_[0 / 32] & (0xffu << (0 % 32))) { + if (from.has_numreversechannels()) { + set_numreversechannels(from.numreversechannels()); + } + if (from.has_numchannels()) { + set_numchannels(from.numchannels()); + } + if (from.has_samplerate()) { + set_samplerate(from.samplerate()); + } + if (from.has_hasechocount()) { + set_hasechocount(from.hasechocount()); + } + if (from.has_hasvoicecount()) { + set_hasvoicecount(from.hasvoicecount()); + } + if (from.has_issaturatedcount()) { + set_issaturatedcount(from.issaturatedcount()); + } + if (from.has_echometrics()) { + mutable_echometrics()->::audio_processing_unittest::Test_EchoMetrics::MergeFrom(from.echometrics()); + } + } +} + +void Test::CopyFrom(const Test& from) { + if (&from == this) return; + Clear(); + MergeFrom(from); +} + +bool Test::IsInitialized() const { + + return true; +} + +void Test::Swap(Test* other) { + if (other != this) { + std::swap(numreversechannels_, other->numreversechannels_); + std::swap(numchannels_, other->numchannels_); + std::swap(samplerate_, other->samplerate_); + std::swap(hasechocount_, other->hasechocount_); + std::swap(hasvoicecount_, other->hasvoicecount_); + std::swap(issaturatedcount_, other->issaturatedcount_); + std::swap(echometrics_, other->echometrics_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + std::swap(_cached_size_, other->_cached_size_); + } +} + +::std::string Test::GetTypeName() const { + return "audio_processing_unittest.Test"; +} + + +// =================================================================== + +#ifndef _MSC_VER +const int OutputData::kTestFieldNumber; +#endif // !_MSC_VER + +OutputData::OutputData() + : ::google::protobuf::MessageLite() { + SharedCtor(); +} + +void OutputData::InitAsDefaultInstance() { +} + +OutputData::OutputData(const OutputData& from) + : ::google::protobuf::MessageLite() { + SharedCtor(); + MergeFrom(from); +} + +void OutputData::SharedCtor() { + _cached_size_ = 0; + ::memset(_has_bits_, 0, sizeof(_has_bits_)); +} + +OutputData::~OutputData() { + SharedDtor(); +} + +void OutputData::SharedDtor() { + if (this != default_instance_) { + } +} + +void OutputData::SetCachedSize(int size) const { + GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); + _cached_size_ = size; + GOOGLE_SAFE_CONCURRENT_WRITES_END(); +} +const OutputData& OutputData::default_instance() { + if (default_instance_ == NULL) protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto(); return *default_instance_; +} + +OutputData* OutputData::default_instance_ = NULL; + +OutputData* OutputData::New() const { + return new OutputData; +} + +void OutputData::Clear() { + test_.Clear(); + ::memset(_has_bits_, 0, sizeof(_has_bits_)); +} + +bool OutputData::MergePartialFromCodedStream( + ::google::protobuf::io::CodedInputStream* input) { +#define DO_(EXPRESSION) if (!(EXPRESSION)) return false + ::google::protobuf::uint32 tag; + while ((tag = input->ReadTag()) != 0) { + switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { + // repeated .audio_processing_unittest.Test test = 1; + case 1: { + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED) { + parse_test: + DO_(::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual( + input, add_test())); + } else { + goto handle_uninterpreted; + } + if (input->ExpectTag(10)) goto parse_test; + if (input->ExpectAtEnd()) return true; + break; + } + + default: { + handle_uninterpreted: + if (::google::protobuf::internal::WireFormatLite::GetTagWireType(tag) == + ::google::protobuf::internal::WireFormatLite::WIRETYPE_END_GROUP) { + return true; + } + DO_(::google::protobuf::internal::WireFormatLite::SkipField(input, tag, NULL)); + break; + } + } + } + return true; +#undef DO_ +} + +void OutputData::SerializeWithCachedSizes( + ::google::protobuf::io::CodedOutputStream* output) const { + // repeated .audio_processing_unittest.Test test = 1; + for (int i = 0; i < this->test_size(); i++) { + ::google::protobuf::internal::WireFormatLite::WriteMessage( + 1, this->test(i), output); + } + +} + +int OutputData::ByteSize() const { + int total_size = 0; + + // repeated .audio_processing_unittest.Test test = 1; + total_size += 1 * this->test_size(); + for (int i = 0; i < this->test_size(); i++) { + total_size += + ::google::protobuf::internal::WireFormatLite::MessageSizeNoVirtual( + this->test(i)); + } + + GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN(); + _cached_size_ = total_size; + GOOGLE_SAFE_CONCURRENT_WRITES_END(); + return total_size; +} + +void OutputData::CheckTypeAndMergeFrom( + const ::google::protobuf::MessageLite& from) { + MergeFrom(*::google::protobuf::down_cast<const OutputData*>(&from)); +} + +void OutputData::MergeFrom(const OutputData& from) { + GOOGLE_CHECK_NE(&from, this); + test_.MergeFrom(from.test_); +} + +void OutputData::CopyFrom(const OutputData& from) { + if (&from == this) return; + Clear(); + MergeFrom(from); +} + +bool OutputData::IsInitialized() const { + + return true; +} + +void OutputData::Swap(OutputData* other) { + if (other != this) { + test_.Swap(&other->test_); + std::swap(_has_bits_[0], other->_has_bits_[0]); + std::swap(_cached_size_, other->_cached_size_); + } +} + +::std::string OutputData::GetTypeName() const { + return "audio_processing_unittest.OutputData"; +} + + +// @@protoc_insertion_point(namespace_scope) + +} // namespace audio_processing_unittest + +// @@protoc_insertion_point(global_scope) diff --git a/src/modules/audio_processing/main/test/unit_test/audio_processing_unittest.pb.h b/src/modules/audio_processing/main/test/unit_test/audio_processing_unittest.pb.h new file mode 100644 index 0000000000..34c21b2f40 --- /dev/null +++ b/src/modules/audio_processing/main/test/unit_test/audio_processing_unittest.pb.h @@ -0,0 +1,862 @@ +// Generated by the protocol buffer compiler. DO NOT EDIT! +// source: audio_processing_unittest.proto + +#ifndef PROTOBUF_audio_5fprocessing_5funittest_2eproto__INCLUDED +#define PROTOBUF_audio_5fprocessing_5funittest_2eproto__INCLUDED + +#include <string> + +#include <google/protobuf/stubs/common.h> + +#if GOOGLE_PROTOBUF_VERSION < 2004000 +#error This file was generated by a newer version of protoc which is +#error incompatible with your Protocol Buffer headers. Please update +#error your headers. +#endif +#if 2004000 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION +#error This file was generated by an older version of protoc which is +#error incompatible with your Protocol Buffer headers. Please +#error regenerate this file with a newer version of protoc. +#endif + +#include <google/protobuf/generated_message_util.h> +#include <google/protobuf/repeated_field.h> +#include <google/protobuf/extension_set.h> +// @@protoc_insertion_point(includes) + +namespace audio_processing_unittest { + +// Internal implementation detail -- do not call these. +void protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto(); +void protobuf_AssignDesc_audio_5fprocessing_5funittest_2eproto(); +void protobuf_ShutdownFile_audio_5fprocessing_5funittest_2eproto(); + +class Test; +class Test_Statistic; +class Test_EchoMetrics; +class OutputData; + +// =================================================================== + +class Test_Statistic : public ::google::protobuf::MessageLite { + public: + Test_Statistic(); + virtual ~Test_Statistic(); + + Test_Statistic(const Test_Statistic& from); + + inline Test_Statistic& operator=(const Test_Statistic& from) { + CopyFrom(from); + return *this; + } + + static const Test_Statistic& default_instance(); + + void Swap(Test_Statistic* other); + + // implements Message ---------------------------------------------- + + Test_Statistic* New() const; + void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from); + void CopyFrom(const Test_Statistic& from); + void MergeFrom(const Test_Statistic& from); + void Clear(); + bool IsInitialized() const; + + int ByteSize() const; + bool MergePartialFromCodedStream( + ::google::protobuf::io::CodedInputStream* input); + void SerializeWithCachedSizes( + ::google::protobuf::io::CodedOutputStream* output) const; + int GetCachedSize() const { return _cached_size_; } + private: + void SharedCtor(); + void SharedDtor(); + void SetCachedSize(int size) const; + public: + + ::std::string GetTypeName() const; + + // nested types ---------------------------------------------------- + + // accessors ------------------------------------------------------- + + // optional int32 instant = 1; + inline bool has_instant() const; + inline void clear_instant(); + static const int kInstantFieldNumber = 1; + inline ::google::protobuf::int32 instant() const; + inline void set_instant(::google::protobuf::int32 value); + + // optional int32 average = 2; + inline bool has_average() const; + inline void clear_average(); + static const int kAverageFieldNumber = 2; + inline ::google::protobuf::int32 average() const; + inline void set_average(::google::protobuf::int32 value); + + // optional int32 maximum = 3; + inline bool has_maximum() const; + inline void clear_maximum(); + static const int kMaximumFieldNumber = 3; + inline ::google::protobuf::int32 maximum() const; + inline void set_maximum(::google::protobuf::int32 value); + + // optional int32 minimum = 4; + inline bool has_minimum() const; + inline void clear_minimum(); + static const int kMinimumFieldNumber = 4; + inline ::google::protobuf::int32 minimum() const; + inline void set_minimum(::google::protobuf::int32 value); + + // @@protoc_insertion_point(class_scope:audio_processing_unittest.Test.Statistic) + private: + inline void set_has_instant(); + inline void clear_has_instant(); + inline void set_has_average(); + inline void clear_has_average(); + inline void set_has_maximum(); + inline void clear_has_maximum(); + inline void set_has_minimum(); + inline void clear_has_minimum(); + + ::google::protobuf::int32 instant_; + ::google::protobuf::int32 average_; + ::google::protobuf::int32 maximum_; + ::google::protobuf::int32 minimum_; + + mutable int _cached_size_; + ::google::protobuf::uint32 _has_bits_[(4 + 31) / 32]; + + friend void protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto(); + friend void protobuf_AssignDesc_audio_5fprocessing_5funittest_2eproto(); + friend void protobuf_ShutdownFile_audio_5fprocessing_5funittest_2eproto(); + + void InitAsDefaultInstance(); + static Test_Statistic* default_instance_; +}; +// ------------------------------------------------------------------- + +class Test_EchoMetrics : public ::google::protobuf::MessageLite { + public: + Test_EchoMetrics(); + virtual ~Test_EchoMetrics(); + + Test_EchoMetrics(const Test_EchoMetrics& from); + + inline Test_EchoMetrics& operator=(const Test_EchoMetrics& from) { + CopyFrom(from); + return *this; + } + + static const Test_EchoMetrics& default_instance(); + + void Swap(Test_EchoMetrics* other); + + // implements Message ---------------------------------------------- + + Test_EchoMetrics* New() const; + void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from); + void CopyFrom(const Test_EchoMetrics& from); + void MergeFrom(const Test_EchoMetrics& from); + void Clear(); + bool IsInitialized() const; + + int ByteSize() const; + bool MergePartialFromCodedStream( + ::google::protobuf::io::CodedInputStream* input); + void SerializeWithCachedSizes( + ::google::protobuf::io::CodedOutputStream* output) const; + int GetCachedSize() const { return _cached_size_; } + private: + void SharedCtor(); + void SharedDtor(); + void SetCachedSize(int size) const; + public: + + ::std::string GetTypeName() const; + + // nested types ---------------------------------------------------- + + // accessors ------------------------------------------------------- + + // optional .audio_processing_unittest.Test.Statistic residualEchoReturnLoss = 1; + inline bool has_residualechoreturnloss() const; + inline void clear_residualechoreturnloss(); + static const int kResidualEchoReturnLossFieldNumber = 1; + inline const ::audio_processing_unittest::Test_Statistic& residualechoreturnloss() const; + inline ::audio_processing_unittest::Test_Statistic* mutable_residualechoreturnloss(); + inline ::audio_processing_unittest::Test_Statistic* release_residualechoreturnloss(); + + // optional .audio_processing_unittest.Test.Statistic echoReturnLoss = 2; + inline bool has_echoreturnloss() const; + inline void clear_echoreturnloss(); + static const int kEchoReturnLossFieldNumber = 2; + inline const ::audio_processing_unittest::Test_Statistic& echoreturnloss() const; + inline ::audio_processing_unittest::Test_Statistic* mutable_echoreturnloss(); + inline ::audio_processing_unittest::Test_Statistic* release_echoreturnloss(); + + // optional .audio_processing_unittest.Test.Statistic echoReturnLossEnhancement = 3; + inline bool has_echoreturnlossenhancement() const; + inline void clear_echoreturnlossenhancement(); + static const int kEchoReturnLossEnhancementFieldNumber = 3; + inline const ::audio_processing_unittest::Test_Statistic& echoreturnlossenhancement() const; + inline ::audio_processing_unittest::Test_Statistic* mutable_echoreturnlossenhancement(); + inline ::audio_processing_unittest::Test_Statistic* release_echoreturnlossenhancement(); + + // optional .audio_processing_unittest.Test.Statistic aNlp = 4; + inline bool has_anlp() const; + inline void clear_anlp(); + static const int kANlpFieldNumber = 4; + inline const ::audio_processing_unittest::Test_Statistic& anlp() const; + inline ::audio_processing_unittest::Test_Statistic* mutable_anlp(); + inline ::audio_processing_unittest::Test_Statistic* release_anlp(); + + // @@protoc_insertion_point(class_scope:audio_processing_unittest.Test.EchoMetrics) + private: + inline void set_has_residualechoreturnloss(); + inline void clear_has_residualechoreturnloss(); + inline void set_has_echoreturnloss(); + inline void clear_has_echoreturnloss(); + inline void set_has_echoreturnlossenhancement(); + inline void clear_has_echoreturnlossenhancement(); + inline void set_has_anlp(); + inline void clear_has_anlp(); + + ::audio_processing_unittest::Test_Statistic* residualechoreturnloss_; + ::audio_processing_unittest::Test_Statistic* echoreturnloss_; + ::audio_processing_unittest::Test_Statistic* echoreturnlossenhancement_; + ::audio_processing_unittest::Test_Statistic* anlp_; + + mutable int _cached_size_; + ::google::protobuf::uint32 _has_bits_[(4 + 31) / 32]; + + friend void protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto(); + friend void protobuf_AssignDesc_audio_5fprocessing_5funittest_2eproto(); + friend void protobuf_ShutdownFile_audio_5fprocessing_5funittest_2eproto(); + + void InitAsDefaultInstance(); + static Test_EchoMetrics* default_instance_; +}; +// ------------------------------------------------------------------- + +class Test : public ::google::protobuf::MessageLite { + public: + Test(); + virtual ~Test(); + + Test(const Test& from); + + inline Test& operator=(const Test& from) { + CopyFrom(from); + return *this; + } + + static const Test& default_instance(); + + void Swap(Test* other); + + // implements Message ---------------------------------------------- + + Test* New() const; + void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from); + void CopyFrom(const Test& from); + void MergeFrom(const Test& from); + void Clear(); + bool IsInitialized() const; + + int ByteSize() const; + bool MergePartialFromCodedStream( + ::google::protobuf::io::CodedInputStream* input); + void SerializeWithCachedSizes( + ::google::protobuf::io::CodedOutputStream* output) const; + int GetCachedSize() const { return _cached_size_; } + private: + void SharedCtor(); + void SharedDtor(); + void SetCachedSize(int size) const; + public: + + ::std::string GetTypeName() const; + + // nested types ---------------------------------------------------- + + typedef Test_Statistic Statistic; + typedef Test_EchoMetrics EchoMetrics; + + // accessors ------------------------------------------------------- + + // optional int32 numReverseChannels = 1; + inline bool has_numreversechannels() const; + inline void clear_numreversechannels(); + static const int kNumReverseChannelsFieldNumber = 1; + inline ::google::protobuf::int32 numreversechannels() const; + inline void set_numreversechannels(::google::protobuf::int32 value); + + // optional int32 numChannels = 2; + inline bool has_numchannels() const; + inline void clear_numchannels(); + static const int kNumChannelsFieldNumber = 2; + inline ::google::protobuf::int32 numchannels() const; + inline void set_numchannels(::google::protobuf::int32 value); + + // optional int32 sampleRate = 3; + inline bool has_samplerate() const; + inline void clear_samplerate(); + static const int kSampleRateFieldNumber = 3; + inline ::google::protobuf::int32 samplerate() const; + inline void set_samplerate(::google::protobuf::int32 value); + + // optional int32 hasEchoCount = 4; + inline bool has_hasechocount() const; + inline void clear_hasechocount(); + static const int kHasEchoCountFieldNumber = 4; + inline ::google::protobuf::int32 hasechocount() const; + inline void set_hasechocount(::google::protobuf::int32 value); + + // optional int32 hasVoiceCount = 5; + inline bool has_hasvoicecount() const; + inline void clear_hasvoicecount(); + static const int kHasVoiceCountFieldNumber = 5; + inline ::google::protobuf::int32 hasvoicecount() const; + inline void set_hasvoicecount(::google::protobuf::int32 value); + + // optional int32 isSaturatedCount = 6; + inline bool has_issaturatedcount() const; + inline void clear_issaturatedcount(); + static const int kIsSaturatedCountFieldNumber = 6; + inline ::google::protobuf::int32 issaturatedcount() const; + inline void set_issaturatedcount(::google::protobuf::int32 value); + + // optional .audio_processing_unittest.Test.EchoMetrics echoMetrics = 7; + inline bool has_echometrics() const; + inline void clear_echometrics(); + static const int kEchoMetricsFieldNumber = 7; + inline const ::audio_processing_unittest::Test_EchoMetrics& echometrics() const; + inline ::audio_processing_unittest::Test_EchoMetrics* mutable_echometrics(); + inline ::audio_processing_unittest::Test_EchoMetrics* release_echometrics(); + + // @@protoc_insertion_point(class_scope:audio_processing_unittest.Test) + private: + inline void set_has_numreversechannels(); + inline void clear_has_numreversechannels(); + inline void set_has_numchannels(); + inline void clear_has_numchannels(); + inline void set_has_samplerate(); + inline void clear_has_samplerate(); + inline void set_has_hasechocount(); + inline void clear_has_hasechocount(); + inline void set_has_hasvoicecount(); + inline void clear_has_hasvoicecount(); + inline void set_has_issaturatedcount(); + inline void clear_has_issaturatedcount(); + inline void set_has_echometrics(); + inline void clear_has_echometrics(); + + ::google::protobuf::int32 numreversechannels_; + ::google::protobuf::int32 numchannels_; + ::google::protobuf::int32 samplerate_; + ::google::protobuf::int32 hasechocount_; + ::google::protobuf::int32 hasvoicecount_; + ::google::protobuf::int32 issaturatedcount_; + ::audio_processing_unittest::Test_EchoMetrics* echometrics_; + + mutable int _cached_size_; + ::google::protobuf::uint32 _has_bits_[(7 + 31) / 32]; + + friend void protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto(); + friend void protobuf_AssignDesc_audio_5fprocessing_5funittest_2eproto(); + friend void protobuf_ShutdownFile_audio_5fprocessing_5funittest_2eproto(); + + void InitAsDefaultInstance(); + static Test* default_instance_; +}; +// ------------------------------------------------------------------- + +class OutputData : public ::google::protobuf::MessageLite { + public: + OutputData(); + virtual ~OutputData(); + + OutputData(const OutputData& from); + + inline OutputData& operator=(const OutputData& from) { + CopyFrom(from); + return *this; + } + + static const OutputData& default_instance(); + + void Swap(OutputData* other); + + // implements Message ---------------------------------------------- + + OutputData* New() const; + void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from); + void CopyFrom(const OutputData& from); + void MergeFrom(const OutputData& from); + void Clear(); + bool IsInitialized() const; + + int ByteSize() const; + bool MergePartialFromCodedStream( + ::google::protobuf::io::CodedInputStream* input); + void SerializeWithCachedSizes( + ::google::protobuf::io::CodedOutputStream* output) const; + int GetCachedSize() const { return _cached_size_; } + private: + void SharedCtor(); + void SharedDtor(); + void SetCachedSize(int size) const; + public: + + ::std::string GetTypeName() const; + + // nested types ---------------------------------------------------- + + // accessors ------------------------------------------------------- + + // repeated .audio_processing_unittest.Test test = 1; + inline int test_size() const; + inline void clear_test(); + static const int kTestFieldNumber = 1; + inline const ::audio_processing_unittest::Test& test(int index) const; + inline ::audio_processing_unittest::Test* mutable_test(int index); + inline ::audio_processing_unittest::Test* add_test(); + inline const ::google::protobuf::RepeatedPtrField< ::audio_processing_unittest::Test >& + test() const; + inline ::google::protobuf::RepeatedPtrField< ::audio_processing_unittest::Test >* + mutable_test(); + + // @@protoc_insertion_point(class_scope:audio_processing_unittest.OutputData) + private: + + ::google::protobuf::RepeatedPtrField< ::audio_processing_unittest::Test > test_; + + mutable int _cached_size_; + ::google::protobuf::uint32 _has_bits_[(1 + 31) / 32]; + + friend void protobuf_AddDesc_audio_5fprocessing_5funittest_2eproto(); + friend void protobuf_AssignDesc_audio_5fprocessing_5funittest_2eproto(); + friend void protobuf_ShutdownFile_audio_5fprocessing_5funittest_2eproto(); + + void InitAsDefaultInstance(); + static OutputData* default_instance_; +}; +// =================================================================== + + +// =================================================================== + +// Test_Statistic + +// optional int32 instant = 1; +inline bool Test_Statistic::has_instant() const { + return (_has_bits_[0] & 0x00000001u) != 0; +} +inline void Test_Statistic::set_has_instant() { + _has_bits_[0] |= 0x00000001u; +} +inline void Test_Statistic::clear_has_instant() { + _has_bits_[0] &= ~0x00000001u; +} +inline void Test_Statistic::clear_instant() { + instant_ = 0; + clear_has_instant(); +} +inline ::google::protobuf::int32 Test_Statistic::instant() const { + return instant_; +} +inline void Test_Statistic::set_instant(::google::protobuf::int32 value) { + set_has_instant(); + instant_ = value; +} + +// optional int32 average = 2; +inline bool Test_Statistic::has_average() const { + return (_has_bits_[0] & 0x00000002u) != 0; +} +inline void Test_Statistic::set_has_average() { + _has_bits_[0] |= 0x00000002u; +} +inline void Test_Statistic::clear_has_average() { + _has_bits_[0] &= ~0x00000002u; +} +inline void Test_Statistic::clear_average() { + average_ = 0; + clear_has_average(); +} +inline ::google::protobuf::int32 Test_Statistic::average() const { + return average_; +} +inline void Test_Statistic::set_average(::google::protobuf::int32 value) { + set_has_average(); + average_ = value; +} + +// optional int32 maximum = 3; +inline bool Test_Statistic::has_maximum() const { + return (_has_bits_[0] & 0x00000004u) != 0; +} +inline void Test_Statistic::set_has_maximum() { + _has_bits_[0] |= 0x00000004u; +} +inline void Test_Statistic::clear_has_maximum() { + _has_bits_[0] &= ~0x00000004u; +} +inline void Test_Statistic::clear_maximum() { + maximum_ = 0; + clear_has_maximum(); +} +inline ::google::protobuf::int32 Test_Statistic::maximum() const { + return maximum_; +} +inline void Test_Statistic::set_maximum(::google::protobuf::int32 value) { + set_has_maximum(); + maximum_ = value; +} + +// optional int32 minimum = 4; +inline bool Test_Statistic::has_minimum() const { + return (_has_bits_[0] & 0x00000008u) != 0; +} +inline void Test_Statistic::set_has_minimum() { + _has_bits_[0] |= 0x00000008u; +} +inline void Test_Statistic::clear_has_minimum() { + _has_bits_[0] &= ~0x00000008u; +} +inline void Test_Statistic::clear_minimum() { + minimum_ = 0; + clear_has_minimum(); +} +inline ::google::protobuf::int32 Test_Statistic::minimum() const { + return minimum_; +} +inline void Test_Statistic::set_minimum(::google::protobuf::int32 value) { + set_has_minimum(); + minimum_ = value; +} + +// ------------------------------------------------------------------- + +// Test_EchoMetrics + +// optional .audio_processing_unittest.Test.Statistic residualEchoReturnLoss = 1; +inline bool Test_EchoMetrics::has_residualechoreturnloss() const { + return (_has_bits_[0] & 0x00000001u) != 0; +} +inline void Test_EchoMetrics::set_has_residualechoreturnloss() { + _has_bits_[0] |= 0x00000001u; +} +inline void Test_EchoMetrics::clear_has_residualechoreturnloss() { + _has_bits_[0] &= ~0x00000001u; +} +inline void Test_EchoMetrics::clear_residualechoreturnloss() { + if (residualechoreturnloss_ != NULL) residualechoreturnloss_->::audio_processing_unittest::Test_Statistic::Clear(); + clear_has_residualechoreturnloss(); +} +inline const ::audio_processing_unittest::Test_Statistic& Test_EchoMetrics::residualechoreturnloss() const { + return residualechoreturnloss_ != NULL ? *residualechoreturnloss_ : *default_instance_->residualechoreturnloss_; +} +inline ::audio_processing_unittest::Test_Statistic* Test_EchoMetrics::mutable_residualechoreturnloss() { + set_has_residualechoreturnloss(); + if (residualechoreturnloss_ == NULL) residualechoreturnloss_ = new ::audio_processing_unittest::Test_Statistic; + return residualechoreturnloss_; +} +inline ::audio_processing_unittest::Test_Statistic* Test_EchoMetrics::release_residualechoreturnloss() { + clear_has_residualechoreturnloss(); + ::audio_processing_unittest::Test_Statistic* temp = residualechoreturnloss_; + residualechoreturnloss_ = NULL; + return temp; +} + +// optional .audio_processing_unittest.Test.Statistic echoReturnLoss = 2; +inline bool Test_EchoMetrics::has_echoreturnloss() const { + return (_has_bits_[0] & 0x00000002u) != 0; +} +inline void Test_EchoMetrics::set_has_echoreturnloss() { + _has_bits_[0] |= 0x00000002u; +} +inline void Test_EchoMetrics::clear_has_echoreturnloss() { + _has_bits_[0] &= ~0x00000002u; +} +inline void Test_EchoMetrics::clear_echoreturnloss() { + if (echoreturnloss_ != NULL) echoreturnloss_->::audio_processing_unittest::Test_Statistic::Clear(); + clear_has_echoreturnloss(); +} +inline const ::audio_processing_unittest::Test_Statistic& Test_EchoMetrics::echoreturnloss() const { + return echoreturnloss_ != NULL ? *echoreturnloss_ : *default_instance_->echoreturnloss_; +} +inline ::audio_processing_unittest::Test_Statistic* Test_EchoMetrics::mutable_echoreturnloss() { + set_has_echoreturnloss(); + if (echoreturnloss_ == NULL) echoreturnloss_ = new ::audio_processing_unittest::Test_Statistic; + return echoreturnloss_; +} +inline ::audio_processing_unittest::Test_Statistic* Test_EchoMetrics::release_echoreturnloss() { + clear_has_echoreturnloss(); + ::audio_processing_unittest::Test_Statistic* temp = echoreturnloss_; + echoreturnloss_ = NULL; + return temp; +} + +// optional .audio_processing_unittest.Test.Statistic echoReturnLossEnhancement = 3; +inline bool Test_EchoMetrics::has_echoreturnlossenhancement() const { + return (_has_bits_[0] & 0x00000004u) != 0; +} +inline void Test_EchoMetrics::set_has_echoreturnlossenhancement() { + _has_bits_[0] |= 0x00000004u; +} +inline void Test_EchoMetrics::clear_has_echoreturnlossenhancement() { + _has_bits_[0] &= ~0x00000004u; +} +inline void Test_EchoMetrics::clear_echoreturnlossenhancement() { + if (echoreturnlossenhancement_ != NULL) echoreturnlossenhancement_->::audio_processing_unittest::Test_Statistic::Clear(); + clear_has_echoreturnlossenhancement(); +} +inline const ::audio_processing_unittest::Test_Statistic& Test_EchoMetrics::echoreturnlossenhancement() const { + return echoreturnlossenhancement_ != NULL ? *echoreturnlossenhancement_ : *default_instance_->echoreturnlossenhancement_; +} +inline ::audio_processing_unittest::Test_Statistic* Test_EchoMetrics::mutable_echoreturnlossenhancement() { + set_has_echoreturnlossenhancement(); + if (echoreturnlossenhancement_ == NULL) echoreturnlossenhancement_ = new ::audio_processing_unittest::Test_Statistic; + return echoreturnlossenhancement_; +} +inline ::audio_processing_unittest::Test_Statistic* Test_EchoMetrics::release_echoreturnlossenhancement() { + clear_has_echoreturnlossenhancement(); + ::audio_processing_unittest::Test_Statistic* temp = echoreturnlossenhancement_; + echoreturnlossenhancement_ = NULL; + return temp; +} + +// optional .audio_processing_unittest.Test.Statistic aNlp = 4; +inline bool Test_EchoMetrics::has_anlp() const { + return (_has_bits_[0] & 0x00000008u) != 0; +} +inline void Test_EchoMetrics::set_has_anlp() { + _has_bits_[0] |= 0x00000008u; +} +inline void Test_EchoMetrics::clear_has_anlp() { + _has_bits_[0] &= ~0x00000008u; +} +inline void Test_EchoMetrics::clear_anlp() { + if (anlp_ != NULL) anlp_->::audio_processing_unittest::Test_Statistic::Clear(); + clear_has_anlp(); +} +inline const ::audio_processing_unittest::Test_Statistic& Test_EchoMetrics::anlp() const { + return anlp_ != NULL ? *anlp_ : *default_instance_->anlp_; +} +inline ::audio_processing_unittest::Test_Statistic* Test_EchoMetrics::mutable_anlp() { + set_has_anlp(); + if (anlp_ == NULL) anlp_ = new ::audio_processing_unittest::Test_Statistic; + return anlp_; +} +inline ::audio_processing_unittest::Test_Statistic* Test_EchoMetrics::release_anlp() { + clear_has_anlp(); + ::audio_processing_unittest::Test_Statistic* temp = anlp_; + anlp_ = NULL; + return temp; +} + +// ------------------------------------------------------------------- + +// Test + +// optional int32 numReverseChannels = 1; +inline bool Test::has_numreversechannels() const { + return (_has_bits_[0] & 0x00000001u) != 0; +} +inline void Test::set_has_numreversechannels() { + _has_bits_[0] |= 0x00000001u; +} +inline void Test::clear_has_numreversechannels() { + _has_bits_[0] &= ~0x00000001u; +} +inline void Test::clear_numreversechannels() { + numreversechannels_ = 0; + clear_has_numreversechannels(); +} +inline ::google::protobuf::int32 Test::numreversechannels() const { + return numreversechannels_; +} +inline void Test::set_numreversechannels(::google::protobuf::int32 value) { + set_has_numreversechannels(); + numreversechannels_ = value; +} + +// optional int32 numChannels = 2; +inline bool Test::has_numchannels() const { + return (_has_bits_[0] & 0x00000002u) != 0; +} +inline void Test::set_has_numchannels() { + _has_bits_[0] |= 0x00000002u; +} +inline void Test::clear_has_numchannels() { + _has_bits_[0] &= ~0x00000002u; +} +inline void Test::clear_numchannels() { + numchannels_ = 0; + clear_has_numchannels(); +} +inline ::google::protobuf::int32 Test::numchannels() const { + return numchannels_; +} +inline void Test::set_numchannels(::google::protobuf::int32 value) { + set_has_numchannels(); + numchannels_ = value; +} + +// optional int32 sampleRate = 3; +inline bool Test::has_samplerate() const { + return (_has_bits_[0] & 0x00000004u) != 0; +} +inline void Test::set_has_samplerate() { + _has_bits_[0] |= 0x00000004u; +} +inline void Test::clear_has_samplerate() { + _has_bits_[0] &= ~0x00000004u; +} +inline void Test::clear_samplerate() { + samplerate_ = 0; + clear_has_samplerate(); +} +inline ::google::protobuf::int32 Test::samplerate() const { + return samplerate_; +} +inline void Test::set_samplerate(::google::protobuf::int32 value) { + set_has_samplerate(); + samplerate_ = value; +} + +// optional int32 hasEchoCount = 4; +inline bool Test::has_hasechocount() const { + return (_has_bits_[0] & 0x00000008u) != 0; +} +inline void Test::set_has_hasechocount() { + _has_bits_[0] |= 0x00000008u; +} +inline void Test::clear_has_hasechocount() { + _has_bits_[0] &= ~0x00000008u; +} +inline void Test::clear_hasechocount() { + hasechocount_ = 0; + clear_has_hasechocount(); +} +inline ::google::protobuf::int32 Test::hasechocount() const { + return hasechocount_; +} +inline void Test::set_hasechocount(::google::protobuf::int32 value) { + set_has_hasechocount(); + hasechocount_ = value; +} + +// optional int32 hasVoiceCount = 5; +inline bool Test::has_hasvoicecount() const { + return (_has_bits_[0] & 0x00000010u) != 0; +} +inline void Test::set_has_hasvoicecount() { + _has_bits_[0] |= 0x00000010u; +} +inline void Test::clear_has_hasvoicecount() { + _has_bits_[0] &= ~0x00000010u; +} +inline void Test::clear_hasvoicecount() { + hasvoicecount_ = 0; + clear_has_hasvoicecount(); +} +inline ::google::protobuf::int32 Test::hasvoicecount() const { + return hasvoicecount_; +} +inline void Test::set_hasvoicecount(::google::protobuf::int32 value) { + set_has_hasvoicecount(); + hasvoicecount_ = value; +} + +// optional int32 isSaturatedCount = 6; +inline bool Test::has_issaturatedcount() const { + return (_has_bits_[0] & 0x00000020u) != 0; +} +inline void Test::set_has_issaturatedcount() { + _has_bits_[0] |= 0x00000020u; +} +inline void Test::clear_has_issaturatedcount() { + _has_bits_[0] &= ~0x00000020u; +} +inline void Test::clear_issaturatedcount() { + issaturatedcount_ = 0; + clear_has_issaturatedcount(); +} +inline ::google::protobuf::int32 Test::issaturatedcount() const { + return issaturatedcount_; +} +inline void Test::set_issaturatedcount(::google::protobuf::int32 value) { + set_has_issaturatedcount(); + issaturatedcount_ = value; +} + +// optional .audio_processing_unittest.Test.EchoMetrics echoMetrics = 7; +inline bool Test::has_echometrics() const { + return (_has_bits_[0] & 0x00000040u) != 0; +} +inline void Test::set_has_echometrics() { + _has_bits_[0] |= 0x00000040u; +} +inline void Test::clear_has_echometrics() { + _has_bits_[0] &= ~0x00000040u; +} +inline void Test::clear_echometrics() { + if (echometrics_ != NULL) echometrics_->::audio_processing_unittest::Test_EchoMetrics::Clear(); + clear_has_echometrics(); +} +inline const ::audio_processing_unittest::Test_EchoMetrics& Test::echometrics() const { + return echometrics_ != NULL ? *echometrics_ : *default_instance_->echometrics_; +} +inline ::audio_processing_unittest::Test_EchoMetrics* Test::mutable_echometrics() { + set_has_echometrics(); + if (echometrics_ == NULL) echometrics_ = new ::audio_processing_unittest::Test_EchoMetrics; + return echometrics_; +} +inline ::audio_processing_unittest::Test_EchoMetrics* Test::release_echometrics() { + clear_has_echometrics(); + ::audio_processing_unittest::Test_EchoMetrics* temp = echometrics_; + echometrics_ = NULL; + return temp; +} + +// ------------------------------------------------------------------- + +// OutputData + +// repeated .audio_processing_unittest.Test test = 1; +inline int OutputData::test_size() const { + return test_.size(); +} +inline void OutputData::clear_test() { + test_.Clear(); +} +inline const ::audio_processing_unittest::Test& OutputData::test(int index) const { + return test_.Get(index); +} +inline ::audio_processing_unittest::Test* OutputData::mutable_test(int index) { + return test_.Mutable(index); +} +inline ::audio_processing_unittest::Test* OutputData::add_test() { + return test_.Add(); +} +inline const ::google::protobuf::RepeatedPtrField< ::audio_processing_unittest::Test >& +OutputData::test() const { + return test_; +} +inline ::google::protobuf::RepeatedPtrField< ::audio_processing_unittest::Test >* +OutputData::mutable_test() { + return &test_; +} + + +// @@protoc_insertion_point(namespace_scope) + +} // namespace audio_processing_unittest + +// @@protoc_insertion_point(global_scope) + +#endif // PROTOBUF_audio_5fprocessing_5funittest_2eproto__INCLUDED diff --git a/src/modules/audio_processing/main/test/unit_test/audio_processing_unittest.proto b/src/modules/audio_processing/main/test/unit_test/audio_processing_unittest.proto new file mode 100644 index 0000000000..8520e64f22 --- /dev/null +++ b/src/modules/audio_processing/main/test/unit_test/audio_processing_unittest.proto @@ -0,0 +1,33 @@ +package audio_processing_unittest; +option optimize_for = LITE_RUNTIME; + +message Test { + optional int32 numReverseChannels = 1; + optional int32 numChannels = 2; + optional int32 sampleRate = 3; + + optional int32 hasEchoCount = 4; + optional int32 hasVoiceCount = 5; + optional int32 isSaturatedCount = 6; + + message Statistic { + optional int32 instant = 1; + optional int32 average = 2; + optional int32 maximum = 3; + optional int32 minimum = 4; + } + + message EchoMetrics { + optional Statistic residualEchoReturnLoss = 1; + optional Statistic echoReturnLoss = 2; + optional Statistic echoReturnLossEnhancement = 3; + optional Statistic aNlp = 4; + } + + optional EchoMetrics echoMetrics = 7; +} + +message OutputData { + repeated Test test = 1; +} + diff --git a/src/modules/audio_processing/main/test/unit_test/unit_test.cc b/src/modules/audio_processing/main/test/unit_test/unit_test.cc new file mode 100644 index 0000000000..3a6fce5a3f --- /dev/null +++ b/src/modules/audio_processing/main/test/unit_test/unit_test.cc @@ -0,0 +1,881 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <cstdio> + +#include <gtest/gtest.h> + +#include "audio_processing.h" +#include "audio_processing_unittest.pb.h" +#include "event_wrapper.h" +#include "module_common_types.h" +#include "thread_wrapper.h" +#include "trace.h" +#include "signal_processing_library.h" + +using webrtc::AudioProcessing; +using webrtc::AudioFrame; +using webrtc::GainControl; +using webrtc::NoiseSuppression; +using webrtc::EchoCancellation; +using webrtc::EventWrapper; +using webrtc::Trace; +using webrtc::LevelEstimator; +using webrtc::EchoCancellation; +using webrtc::EchoControlMobile; +using webrtc::VoiceDetection; + +namespace { +// When true, this will compare the output data with the results stored to +// file. This is the typical case. When the file should be updated, it can +// be set to false with the command-line switch --write_output_data. +bool global_read_output_data = true; + +class ApmEnvironment : public ::testing::Environment { + public: + virtual void SetUp() { + Trace::CreateTrace(); + ASSERT_EQ(0, Trace::SetTraceFile("apm_trace.txt")); + } + + virtual void TearDown() { + Trace::ReturnTrace(); + } +}; + +class ApmTest : public ::testing::Test { + protected: + ApmTest(); + virtual void SetUp(); + virtual void TearDown(); + + webrtc::AudioProcessing* apm_; + webrtc::AudioFrame* frame_; + webrtc::AudioFrame* revframe_; + FILE* far_file_; + FILE* near_file_; + bool update_output_data_; +}; + +ApmTest::ApmTest() + : apm_(NULL), + far_file_(NULL), + near_file_(NULL), + frame_(NULL), + revframe_(NULL) {} + +void ApmTest::SetUp() { + apm_ = AudioProcessing::Create(0); + ASSERT_TRUE(apm_ != NULL); + + frame_ = new AudioFrame(); + revframe_ = new AudioFrame(); + + ASSERT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(32000)); + ASSERT_EQ(apm_->kNoError, apm_->set_num_channels(2, 2)); + ASSERT_EQ(apm_->kNoError, apm_->set_num_reverse_channels(2)); + + frame_->_payloadDataLengthInSamples = 320; + frame_->_audioChannel = 2; + frame_->_frequencyInHz = 32000; + revframe_->_payloadDataLengthInSamples = 320; + revframe_->_audioChannel = 2; + revframe_->_frequencyInHz = 32000; + + far_file_ = fopen("aec_far.pcm", "rb"); + ASSERT_TRUE(far_file_ != NULL) << "Could not open input file aec_far.pcm\n"; + near_file_ = fopen("aec_near.pcm", "rb"); + ASSERT_TRUE(near_file_ != NULL) << "Could not open input file aec_near.pcm\n"; +} + +void ApmTest::TearDown() { + if (frame_) { + delete frame_; + } + frame_ = NULL; + + if (revframe_) { + delete revframe_; + } + revframe_ = NULL; + + if (far_file_) { + ASSERT_EQ(0, fclose(far_file_)); + } + far_file_ = NULL; + + if (near_file_) { + ASSERT_EQ(0, fclose(near_file_)); + } + near_file_ = NULL; + + if (apm_ != NULL) { + AudioProcessing::Destroy(apm_); + } + apm_ = NULL; +} + +void MixStereoToMono(WebRtc_Word16* stereo, + WebRtc_Word16* mono, + int numSamples) { + for (int i = 0; i < numSamples; i++) { + int int32 = (static_cast<int>(stereo[i * 2]) + + static_cast<int>(stereo[i * 2 + 1])) >> 1; + mono[i] = static_cast<WebRtc_Word16>(int32); + } +} + +void WriteMessageLiteToFile(const char* filename, + const ::google::protobuf::MessageLite& message) { + assert(filename != NULL); + + FILE* file = fopen(filename, "wb"); + ASSERT_TRUE(file != NULL) << "Could not open " << filename; + int size = message.ByteSize(); + ASSERT_GT(size, 0); + unsigned char* array = new unsigned char[size]; + ASSERT_TRUE(message.SerializeToArray(array, size)); + + ASSERT_EQ(1, fwrite(&size, sizeof(int), 1, file)); + ASSERT_EQ(size, fwrite(array, sizeof(unsigned char), size, file)); + + delete [] array; + fclose(file); +} + +void ReadMessageLiteFromFile(const char* filename, + ::google::protobuf::MessageLite* message) { + assert(filename != NULL); + assert(message != NULL); + + FILE* file = fopen(filename, "rb"); + ASSERT_TRUE(file != NULL) << "Could not open " << filename; + int size = 0; + ASSERT_EQ(1, fread(&size, sizeof(int), 1, file)); + ASSERT_GT(size, 0); + unsigned char* array = new unsigned char[size]; + ASSERT_EQ(size, fread(array, sizeof(unsigned char), size, file)); + + ASSERT_TRUE(message->ParseFromArray(array, size)); + + delete [] array; + fclose(file); +} + +struct ThreadData { + ThreadData(int thread_num_, AudioProcessing* ap_) + : thread_num(thread_num_), + error(false), + ap(ap_) {} + int thread_num; + bool error; + AudioProcessing* ap; +}; + +// Don't use GTest here; non-thread-safe on Windows (as of 1.5.0). +bool DeadlockProc(void* thread_object) { + ThreadData* thread_data = static_cast<ThreadData*>(thread_object); + AudioProcessing* ap = thread_data->ap; + int err = ap->kNoError; + + AudioFrame primary_frame; + AudioFrame reverse_frame; + primary_frame._payloadDataLengthInSamples = 320; + primary_frame._audioChannel = 2; + primary_frame._frequencyInHz = 32000; + reverse_frame._payloadDataLengthInSamples = 320; + reverse_frame._audioChannel = 2; + reverse_frame._frequencyInHz = 32000; + + ap->echo_cancellation()->Enable(true); + ap->gain_control()->Enable(true); + ap->high_pass_filter()->Enable(true); + ap->level_estimator()->Enable(true); + ap->noise_suppression()->Enable(true); + ap->voice_detection()->Enable(true); + + if (thread_data->thread_num % 2 == 0) { + err = ap->AnalyzeReverseStream(&reverse_frame); + if (err != ap->kNoError) { + printf("Error in AnalyzeReverseStream(): %d\n", err); + thread_data->error = true; + return false; + } + } + + if (thread_data->thread_num % 2 == 1) { + ap->set_stream_delay_ms(0); + ap->echo_cancellation()->set_stream_drift_samples(0); + ap->gain_control()->set_stream_analog_level(0); + err = ap->ProcessStream(&primary_frame); + if (err == ap->kStreamParameterNotSetError) { + printf("Expected kStreamParameterNotSetError in ProcessStream(): %d\n", + err); + } else if (err != ap->kNoError) { + printf("Error in ProcessStream(): %d\n", err); + thread_data->error = true; + return false; + } + ap->gain_control()->stream_analog_level(); + } + + EventWrapper* event = EventWrapper::Create(); + event->Wait(1); + delete event; + event = NULL; + + return true; +} + +/*TEST_F(ApmTest, Deadlock) { + const int num_threads = 16; + std::vector<ThreadWrapper*> threads(num_threads); + std::vector<ThreadData*> thread_data(num_threads); + + ASSERT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(32000)); + ASSERT_EQ(apm_->kNoError, apm_->set_num_channels(2, 2)); + ASSERT_EQ(apm_->kNoError, apm_->set_num_reverse_channels(2)); + + for (int i = 0; i < num_threads; i++) { + thread_data[i] = new ThreadData(i, apm_); + threads[i] = ThreadWrapper::CreateThread(DeadlockProc, + thread_data[i], + kNormalPriority, + 0); + ASSERT_TRUE(threads[i] != NULL); + unsigned int thread_id = 0; + threads[i]->Start(thread_id); + } + + EventWrapper* event = EventWrapper::Create(); + ASSERT_EQ(kEventTimeout, event->Wait(5000)); + delete event; + event = NULL; + + for (int i = 0; i < num_threads; i++) { + // This will return false if the thread has deadlocked. + ASSERT_TRUE(threads[i]->Stop()); + ASSERT_FALSE(thread_data[i]->error); + delete threads[i]; + threads[i] = NULL; + delete thread_data[i]; + thread_data[i] = NULL; + } +}*/ + +TEST_F(ApmTest, StreamParameters) { + // No errors when the components are disabled. + EXPECT_EQ(apm_->kNoError, + apm_->ProcessStream(frame_)); + + // Missing agc level + EXPECT_EQ(apm_->kNoError, apm_->Initialize()); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->set_stream_drift_samples(0)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); + + // Missing delay + EXPECT_EQ(apm_->kNoError, apm_->Initialize()); + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->set_stream_drift_samples(0)); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(127)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); + + // Missing drift + EXPECT_EQ(apm_->kNoError, apm_->Initialize()); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_drift_compensation(true)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + apm_->ProcessStream(frame_)); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(127)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + apm_->ProcessStream(frame_)); + + // No stream parameters + EXPECT_EQ(apm_->kNoError, apm_->Initialize()); + EXPECT_EQ(apm_->kNoError, + apm_->AnalyzeReverseStream(revframe_)); + EXPECT_EQ(apm_->kStreamParameterNotSetError, + apm_->ProcessStream(frame_)); + + // All there + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + EXPECT_EQ(apm_->kNoError, apm_->Initialize()); + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->set_stream_drift_samples(0)); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(127)); + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); +} + +TEST_F(ApmTest, Channels) { + // Testing number of invalid channels + EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_channels(0, 1)); + EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_channels(1, 0)); + EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_channels(3, 1)); + EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_channels(1, 3)); + EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_reverse_channels(0)); + EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_reverse_channels(3)); + // Testing number of valid channels + for (int i = 1; i < 3; i++) { + for (int j = 1; j < 3; j++) { + if (j > i) { + EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_channels(i, j)); + } else { + EXPECT_EQ(apm_->kNoError, apm_->set_num_channels(i, j)); + EXPECT_EQ(j, apm_->num_output_channels()); + } + } + EXPECT_EQ(i, apm_->num_input_channels()); + EXPECT_EQ(apm_->kNoError, apm_->set_num_reverse_channels(i)); + EXPECT_EQ(i, apm_->num_reverse_channels()); + } +} + +TEST_F(ApmTest, SampleRates) { + // Testing invalid sample rates + EXPECT_EQ(apm_->kBadParameterError, apm_->set_sample_rate_hz(10000)); + // Testing valid sample rates + int fs[] = {8000, 16000, 32000}; + for (size_t i = 0; i < sizeof(fs) / sizeof(*fs); i++) { + EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(fs[i])); + EXPECT_EQ(fs[i], apm_->sample_rate_hz()); + } +} + +TEST_F(ApmTest, Process) { + GOOGLE_PROTOBUF_VERIFY_VERSION; + audio_processing_unittest::OutputData output_data; + + if (global_read_output_data) { + ReadMessageLiteFromFile("output_data.pb", &output_data); + + } else { + // We don't have a file; add the required tests to the protobuf. + int rev_ch[] = {1, 2}; + int ch[] = {1, 2}; + int fs[] = {8000, 16000, 32000}; + for (size_t i = 0; i < sizeof(rev_ch) / sizeof(*rev_ch); i++) { + for (size_t j = 0; j < sizeof(ch) / sizeof(*ch); j++) { + for (size_t k = 0; k < sizeof(fs) / sizeof(*fs); k++) { + audio_processing_unittest::Test* test = output_data.add_test(); + test->set_numreversechannels(rev_ch[i]); + test->set_numchannels(ch[j]); + test->set_samplerate(fs[k]); + } + } + } + } + + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_drift_compensation(true)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_metrics(true)); + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_analog_level_limits(0, 255)); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); + + EXPECT_EQ(apm_->kNoError, + apm_->high_pass_filter()->Enable(true)); + + //EXPECT_EQ(apm_->kNoError, + // apm_->level_estimator()->Enable(true)); + + EXPECT_EQ(apm_->kNoError, + apm_->noise_suppression()->Enable(true)); + + EXPECT_EQ(apm_->kNoError, + apm_->voice_detection()->Enable(true)); + + for (int i = 0; i < output_data.test_size(); i++) { + printf("Running test %d of %d...\n", i + 1, output_data.test_size()); + + audio_processing_unittest::Test* test = output_data.mutable_test(i); + const int num_samples = test->samplerate() / 100; + revframe_->_payloadDataLengthInSamples = num_samples; + revframe_->_audioChannel = test->numreversechannels(); + revframe_->_frequencyInHz = test->samplerate(); + frame_->_payloadDataLengthInSamples = num_samples; + frame_->_audioChannel = test->numchannels(); + frame_->_frequencyInHz = test->samplerate(); + + EXPECT_EQ(apm_->kNoError, apm_->Initialize()); + ASSERT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(test->samplerate())); + ASSERT_EQ(apm_->kNoError, apm_->set_num_channels(frame_->_audioChannel, + frame_->_audioChannel)); + ASSERT_EQ(apm_->kNoError, + apm_->set_num_reverse_channels(revframe_->_audioChannel)); + + + int has_echo_count = 0; + int has_voice_count = 0; + int is_saturated_count = 0; + + while (1) { + WebRtc_Word16 temp_data[640]; + int analog_level = 127; + + // Read far-end frame + size_t read_count = fread(temp_data, + sizeof(WebRtc_Word16), + num_samples * 2, + far_file_); + if (read_count != static_cast<size_t>(num_samples * 2)) { + // Check that the file really ended. + ASSERT_NE(0, feof(far_file_)); + break; // This is expected. + } + + if (revframe_->_audioChannel == 1) { + MixStereoToMono(temp_data, revframe_->_payloadData, + revframe_->_payloadDataLengthInSamples); + } else { + memcpy(revframe_->_payloadData, + &temp_data[0], + sizeof(WebRtc_Word16) * read_count); + } + + EXPECT_EQ(apm_->kNoError, + apm_->AnalyzeReverseStream(revframe_)); + + EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->set_stream_drift_samples(0)); + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_stream_analog_level(analog_level)); + + // Read near-end frame + read_count = fread(temp_data, + sizeof(WebRtc_Word16), + num_samples * 2, + near_file_); + if (read_count != static_cast<size_t>(num_samples * 2)) { + // Check that the file really ended. + ASSERT_NE(0, feof(near_file_)); + break; // This is expected. + } + + if (frame_->_audioChannel == 1) { + MixStereoToMono(temp_data, frame_->_payloadData, num_samples); + } else { + memcpy(frame_->_payloadData, + &temp_data[0], + sizeof(WebRtc_Word16) * read_count); + } + + EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); + + if (apm_->echo_cancellation()->stream_has_echo()) { + has_echo_count++; + } + + analog_level = apm_->gain_control()->stream_analog_level(); + if (apm_->gain_control()->stream_is_saturated()) { + is_saturated_count++; + } + if (apm_->voice_detection()->stream_has_voice()) { + has_voice_count++; + } + } + + //<-- Statistics --> + //LevelEstimator::Metrics far_metrics; + //LevelEstimator::Metrics near_metrics; + //EchoCancellation::Metrics echo_metrics; + //LevelEstimator::Metrics far_metrics_ref_; + //LevelEstimator::Metrics near_metrics_ref_; + //EchoCancellation::Metrics echo_metrics_ref_; + //EXPECT_EQ(apm_->kNoError, + // apm_->echo_cancellation()->GetMetrics(&echo_metrics)); + //EXPECT_EQ(apm_->kNoError, + // apm_->level_estimator()->GetMetrics(&near_metrics, + + // TODO(ajm): check echo metrics and output audio. + if (global_read_output_data) { + EXPECT_EQ(has_echo_count, + test->hasechocount()); + EXPECT_EQ(has_voice_count, + test->hasvoicecount()); + EXPECT_EQ(is_saturated_count, + test->issaturatedcount()); + } else { + test->set_hasechocount(has_echo_count); + test->set_hasvoicecount(has_voice_count); + test->set_issaturatedcount(is_saturated_count); + } + + rewind(far_file_); + rewind(near_file_); + } + + if (!global_read_output_data) { + WriteMessageLiteToFile("output_data.pb", output_data); + } + + google::protobuf::ShutdownProtobufLibrary(); +} + +TEST_F(ApmTest, EchoCancellation) { + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_drift_compensation(true)); + EXPECT_TRUE(apm_->echo_cancellation()->is_drift_compensation_enabled()); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_drift_compensation(false)); + EXPECT_FALSE(apm_->echo_cancellation()->is_drift_compensation_enabled()); + + EXPECT_EQ(apm_->kBadParameterError, + apm_->echo_cancellation()->set_device_sample_rate_hz(4000)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->echo_cancellation()->set_device_sample_rate_hz(100000)); + + int rate[] = {16000, 44100, 48000}; + for (size_t i = 0; i < sizeof(rate)/sizeof(*rate); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->set_device_sample_rate_hz(rate[i])); + EXPECT_EQ(rate[i], + apm_->echo_cancellation()->device_sample_rate_hz()); + } + + EXPECT_EQ(apm_->kBadParameterError, + apm_->echo_cancellation()->set_suppression_level( + static_cast<EchoCancellation::SuppressionLevel>(-1))); + + EXPECT_EQ(apm_->kBadParameterError, + apm_->echo_cancellation()->set_suppression_level( + static_cast<EchoCancellation::SuppressionLevel>(4))); + + EchoCancellation::SuppressionLevel level[] = { + EchoCancellation::kLowSuppression, + EchoCancellation::kModerateSuppression, + EchoCancellation::kHighSuppression, + }; + for (size_t i = 0; i < sizeof(level)/sizeof(*level); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->set_suppression_level(level[i])); + EXPECT_EQ(level[i], + apm_->echo_cancellation()->suppression_level()); + } + + EchoCancellation::Metrics metrics; + EXPECT_EQ(apm_->kNotEnabledError, + apm_->echo_cancellation()->GetMetrics(&metrics)); + + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_metrics(true)); + EXPECT_TRUE(apm_->echo_cancellation()->are_metrics_enabled()); + EXPECT_EQ(apm_->kNoError, + apm_->echo_cancellation()->enable_metrics(false)); + EXPECT_FALSE(apm_->echo_cancellation()->are_metrics_enabled()); + + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); + EXPECT_TRUE(apm_->echo_cancellation()->is_enabled()); + EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false)); + EXPECT_FALSE(apm_->echo_cancellation()->is_enabled()); +} + +TEST_F(ApmTest, EchoControlMobile) { + // AECM won't use super-wideband. + EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(32000)); + EXPECT_EQ(apm_->kBadSampleRateError, apm_->echo_control_mobile()->Enable(true)); + EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(16000)); + // Turn AECM on (and AEC off) + EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true)); + EXPECT_TRUE(apm_->echo_control_mobile()->is_enabled()); + + EXPECT_EQ(apm_->kBadParameterError, + apm_->echo_control_mobile()->set_routing_mode( + static_cast<EchoControlMobile::RoutingMode>(-1))); + EXPECT_EQ(apm_->kBadParameterError, + apm_->echo_control_mobile()->set_routing_mode( + static_cast<EchoControlMobile::RoutingMode>(5))); + + // Toggle routing modes + EchoControlMobile::RoutingMode mode[] = { + EchoControlMobile::kQuietEarpieceOrHeadset, + EchoControlMobile::kEarpiece, + EchoControlMobile::kLoudEarpiece, + EchoControlMobile::kSpeakerphone, + EchoControlMobile::kLoudSpeakerphone, + }; + for (size_t i = 0; i < sizeof(mode)/sizeof(*mode); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->echo_control_mobile()->set_routing_mode(mode[i])); + EXPECT_EQ(mode[i], + apm_->echo_control_mobile()->routing_mode()); + } + // Turn comfort noise off/on + EXPECT_EQ(apm_->kNoError, + apm_->echo_control_mobile()->enable_comfort_noise(false)); + EXPECT_FALSE(apm_->echo_control_mobile()->is_comfort_noise_enabled()); + EXPECT_EQ(apm_->kNoError, + apm_->echo_control_mobile()->enable_comfort_noise(true)); + EXPECT_TRUE(apm_->echo_control_mobile()->is_comfort_noise_enabled()); + // Turn AECM off + EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(false)); + EXPECT_FALSE(apm_->echo_control_mobile()->is_enabled()); +} + +TEST_F(ApmTest, GainControl) { + // Testing gain modes + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_mode(static_cast<GainControl::Mode>(-1))); + + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_mode(static_cast<GainControl::Mode>(3))); + + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_mode( + apm_->gain_control()->mode())); + + GainControl::Mode mode[] = { + GainControl::kAdaptiveAnalog, + GainControl::kAdaptiveDigital, + GainControl::kFixedDigital + }; + for (size_t i = 0; i < sizeof(mode)/sizeof(*mode); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_mode(mode[i])); + EXPECT_EQ(mode[i], apm_->gain_control()->mode()); + } + // Testing invalid target levels + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_target_level_dbfs(-3)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_target_level_dbfs(-40)); + // Testing valid target levels + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_target_level_dbfs( + apm_->gain_control()->target_level_dbfs())); + + int level_dbfs[] = {0, 6, 31}; + for (size_t i = 0; i < sizeof(level_dbfs)/sizeof(*level_dbfs); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_target_level_dbfs(level_dbfs[i])); + EXPECT_EQ(level_dbfs[i], apm_->gain_control()->target_level_dbfs()); + } + + // Testing invalid compression gains + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_compression_gain_db(-1)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_compression_gain_db(100)); + + // Testing valid compression gains + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_compression_gain_db( + apm_->gain_control()->compression_gain_db())); + + int gain_db[] = {0, 10, 90}; + for (size_t i = 0; i < sizeof(gain_db)/sizeof(*gain_db); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_compression_gain_db(gain_db[i])); + EXPECT_EQ(gain_db[i], apm_->gain_control()->compression_gain_db()); + } + + // Testing limiter off/on + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->enable_limiter(false)); + EXPECT_FALSE(apm_->gain_control()->is_limiter_enabled()); + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->enable_limiter(true)); + EXPECT_TRUE(apm_->gain_control()->is_limiter_enabled()); + + // Testing invalid level limits + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_analog_level_limits(-1, 512)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_analog_level_limits(100000, 512)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_analog_level_limits(512, -1)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_analog_level_limits(512, 100000)); + EXPECT_EQ(apm_->kBadParameterError, + apm_->gain_control()->set_analog_level_limits(512, 255)); + + // Testing valid level limits + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_analog_level_limits( + apm_->gain_control()->analog_level_minimum(), + apm_->gain_control()->analog_level_maximum())); + + int min_level[] = {0, 255, 1024}; + for (size_t i = 0; i < sizeof(min_level)/sizeof(*min_level); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_analog_level_limits(min_level[i], 1024)); + EXPECT_EQ(min_level[i], apm_->gain_control()->analog_level_minimum()); + } + + int max_level[] = {0, 1024, 65535}; + for (size_t i = 0; i < sizeof(min_level)/sizeof(*min_level); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->gain_control()->set_analog_level_limits(0, max_level[i])); + EXPECT_EQ(max_level[i], apm_->gain_control()->analog_level_maximum()); + } + + // TODO(ajm): stream_is_saturated() and stream_analog_level() + + // Turn AGC off + EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); + EXPECT_FALSE(apm_->gain_control()->is_enabled()); +} + +TEST_F(ApmTest, NoiseSuppression) { + // Tesing invalid suppression levels + EXPECT_EQ(apm_->kBadParameterError, + apm_->noise_suppression()->set_level( + static_cast<NoiseSuppression::Level>(-1))); + + EXPECT_EQ(apm_->kBadParameterError, + apm_->noise_suppression()->set_level( + static_cast<NoiseSuppression::Level>(5))); + + // Tesing valid suppression levels + NoiseSuppression::Level level[] = { + NoiseSuppression::kLow, + NoiseSuppression::kModerate, + NoiseSuppression::kHigh, + NoiseSuppression::kVeryHigh + }; + for (size_t i = 0; i < sizeof(level)/sizeof(*level); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->noise_suppression()->set_level(level[i])); + EXPECT_EQ(level[i], apm_->noise_suppression()->level()); + } + + // Turing NS on/off + EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(true)); + EXPECT_TRUE(apm_->noise_suppression()->is_enabled()); + EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(false)); + EXPECT_FALSE(apm_->noise_suppression()->is_enabled()); +} + +TEST_F(ApmTest, HighPassFilter) { + // Turing HP filter on/off + EXPECT_EQ(apm_->kNoError, apm_->high_pass_filter()->Enable(true)); + EXPECT_TRUE(apm_->high_pass_filter()->is_enabled()); + EXPECT_EQ(apm_->kNoError, apm_->high_pass_filter()->Enable(false)); + EXPECT_FALSE(apm_->high_pass_filter()->is_enabled()); +} + +TEST_F(ApmTest, LevelEstimator) { + // Turing Level estimator on/off + EXPECT_EQ(apm_->kUnsupportedComponentError, + apm_->level_estimator()->Enable(true)); + EXPECT_FALSE(apm_->level_estimator()->is_enabled()); + EXPECT_EQ(apm_->kUnsupportedComponentError, + apm_->level_estimator()->Enable(false)); + EXPECT_FALSE(apm_->level_estimator()->is_enabled()); +} + +TEST_F(ApmTest, VoiceDetection) { + // Test external VAD + EXPECT_EQ(apm_->kNoError, + apm_->voice_detection()->set_stream_has_voice(true)); + EXPECT_TRUE(apm_->voice_detection()->stream_has_voice()); + EXPECT_EQ(apm_->kNoError, + apm_->voice_detection()->set_stream_has_voice(false)); + EXPECT_FALSE(apm_->voice_detection()->stream_has_voice()); + + // Tesing invalid likelihoods + EXPECT_EQ(apm_->kBadParameterError, + apm_->voice_detection()->set_likelihood( + static_cast<VoiceDetection::Likelihood>(-1))); + + EXPECT_EQ(apm_->kBadParameterError, + apm_->voice_detection()->set_likelihood( + static_cast<VoiceDetection::Likelihood>(5))); + + // Tesing valid likelihoods + VoiceDetection::Likelihood likelihood[] = { + VoiceDetection::kVeryLowLikelihood, + VoiceDetection::kLowLikelihood, + VoiceDetection::kModerateLikelihood, + VoiceDetection::kHighLikelihood + }; + for (size_t i = 0; i < sizeof(likelihood)/sizeof(*likelihood); i++) { + EXPECT_EQ(apm_->kNoError, + apm_->voice_detection()->set_likelihood(likelihood[i])); + EXPECT_EQ(likelihood[i], apm_->voice_detection()->likelihood()); + } + + /* TODO(bjornv): Enable once VAD supports other frame lengths than 10 ms + // Tesing invalid frame sizes + EXPECT_EQ(apm_->kBadParameterError, + apm_->voice_detection()->set_frame_size_ms(12)); + + // Tesing valid frame sizes + for (int i = 10; i <= 30; i += 10) { + EXPECT_EQ(apm_->kNoError, + apm_->voice_detection()->set_frame_size_ms(i)); + EXPECT_EQ(i, apm_->voice_detection()->frame_size_ms()); + } + */ + + // Turing VAD on/off + EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); + EXPECT_TRUE(apm_->voice_detection()->is_enabled()); + EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); + EXPECT_FALSE(apm_->voice_detection()->is_enabled()); + + // TODO(bjornv): Add tests for streamed voice; stream_has_voice() +} + +// Below are some ideas for tests from VPM. + +/*TEST_F(VideoProcessingModuleTest, GetVersionTest) +{ +} + +TEST_F(VideoProcessingModuleTest, HandleNullBuffer) +{ +} + +TEST_F(VideoProcessingModuleTest, HandleBadSize) +{ +} + +TEST_F(VideoProcessingModuleTest, IdenticalResultsAfterReset) +{ +} +*/ +} // namespace + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + ApmEnvironment* env = new ApmEnvironment; // GTest takes ownership. + ::testing::AddGlobalTestEnvironment(env); + + for (int i = 1; i < argc; i++) { + if (strcmp(argv[i], "--write_output_data") == 0) { + global_read_output_data = false; + } + } + + return RUN_ALL_TESTS(); +} diff --git a/src/modules/audio_processing/ns/main/interface/noise_suppression.h b/src/modules/audio_processing/ns/main/interface/noise_suppression.h new file mode 100644 index 0000000000..b8983b077d --- /dev/null +++ b/src/modules/audio_processing/ns/main/interface/noise_suppression.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_ + +#include "typedefs.h" + +typedef struct NsHandleT NsHandle; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This function returns the version number of the code. + * + * Input: + * - version : Pointer to a character array where the version + * info is stored. + * - length : Length of version. + * + * Return value : 0 - Ok + * -1 - Error (probably length is not sufficient) + */ +int WebRtcNs_get_version(char *version, short length); + + +/* + * This function creates an instance to the noise reduction structure + * + * Input: + * - NS_inst : Pointer to noise reduction instance that should be + * created + * + * Output: + * - NS_inst : Pointer to created noise reduction instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_Create(NsHandle **NS_inst); + + +/* + * This function frees the dynamic memory of a specified Noise Reduction + * instance. + * + * Input: + * - NS_inst : Pointer to NS instance that should be freed + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_Free(NsHandle *NS_inst); + + +/* + * This function initializes a NS instance + * + * Input: + * - NS_inst : Instance that should be initialized + * - fs : sampling frequency + * + * Output: + * - NS_inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_Init(NsHandle *NS_inst, WebRtc_UWord32 fs); + +/* + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - NS_inst : Instance that should be initialized + * - mode : 0: Mild, 1: Medium , 2: Aggressive + * + * Output: + * - NS_inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_set_policy(NsHandle *NS_inst, int mode); + + +/* + * This functions does Noise Suppression for the inserted speech frame. The + * input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - NS_inst : VAD Instance. Needs to be initiated before call. + * - spframe : Pointer to speech frame buffer for L band + * - spframe_H : Pointer to speech frame buffer for H band + * - fs : sampling frequency + * + * Output: + * - NS_inst : Updated NS instance + * - outframe : Pointer to output frame for L band + * - outframe_H : Pointer to output frame for H band + * + * Return value : 0 - OK + * -1 - Error + */ +int WebRtcNs_Process(NsHandle *NS_inst, + short *spframe, + short *spframe_H, + short *outframe, + short *outframe_H); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_ diff --git a/src/modules/audio_processing/ns/main/interface/noise_suppression_x.h b/src/modules/audio_processing/ns/main/interface/noise_suppression_x.h new file mode 100644 index 0000000000..35fea2f02c --- /dev/null +++ b/src/modules/audio_processing/ns/main/interface/noise_suppression_x.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_ + +#include "signal_processing_library.h" + +typedef struct NsxHandleT NsxHandle; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This function returns the version number of the code. + * + * Input: + * - version : Pointer to a character array where the version + * info is stored. + * - length : Length of version. + * + * Return value : 0 - Ok + * -1 - Error (probably length is not sufficient) + */ +int WebRtcNsx_get_version(char *version, short length); + + +/* + * This function creates an instance to the noise reduction structure + * + * Input: + * - nsxInst : Pointer to noise reduction instance that should be + * created + * + * Output: + * - nsxInst : Pointer to created noise reduction instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_Create(NsxHandle **nsxInst); + + +/* + * This function frees the dynamic memory of a specified Noise Suppression + * instance. + * + * Input: + * - nsxInst : Pointer to NS instance that should be freed + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_Free(NsxHandle *nsxInst); + + +/* + * This function initializes a NS instance + * + * Input: + * - nsxInst : Instance that should be initialized + * - fs : sampling frequency + * + * Output: + * - nsxInst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_Init(NsxHandle *nsxInst, WebRtc_UWord32 fs); + +/* + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - nsxInst : Instance that should be initialized + * - mode : 0: Mild, 1: Medium , 2: Aggressive + * + * Output: + * - nsxInst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_set_policy(NsxHandle *nsxInst, int mode); + +/* + * This functions does noise suppression for the inserted speech frame. The + * input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - nsxInst : NSx instance. Needs to be initiated before call. + * - speechFrame : Pointer to speech frame buffer for L band + * - speechFrameHB : Pointer to speech frame buffer for H band + * - fs : sampling frequency + * + * Output: + * - nsxInst : Updated NSx instance + * - outFrame : Pointer to output frame for L band + * - outFrameHB : Pointer to output frame for H band + * + * Return value : 0 - OK + * -1 - Error + */ +int WebRtcNsx_Process(NsxHandle *nsxInst, + short *speechFrame, + short *speechFrameHB, + short *outFrame, + short *outFrameHB); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_ diff --git a/src/modules/audio_processing/ns/main/source/Android.mk b/src/modules/audio_processing/ns/main/source/Android.mk new file mode 100644 index 0000000000..07ec98ecd8 --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/Android.mk @@ -0,0 +1,52 @@ +# This file is generated by gyp; do not edit. This means you! + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +LOCAL_MODULE := libwebrtc_ns +LOCAL_MODULE_TAGS := optional +LOCAL_GENERATED_SOURCES := +LOCAL_SRC_FILES := \ + noise_suppression_x.c \ + nsx_core.c + +# floating point +# noise_suppression.c ns_core.c + +# Flags passed to both C and C++ files. +MY_CFLAGS := +MY_CFLAGS_C := +MY_DEFS := '-DNO_TCMALLOC' \ + '-DNO_HEAPCHECKER' \ + '-DWEBRTC_TARGET_PC' \ + '-DWEBRTC_LINUX' \ + '-DWEBRTC_THREAD_RR' +ifeq ($(TARGET_ARCH),arm) +MY_DEFS += \ + '-DWEBRTC_ANDROID' \ + '-DANDROID' +endif +LOCAL_CFLAGS := $(MY_CFLAGS_C) $(MY_CFLAGS) $(MY_DEFS) + +# Include paths placed before CFLAGS/CPPFLAGS +LOCAL_C_INCLUDES := $(LOCAL_PATH)/../../../../.. \ + $(LOCAL_PATH)/../interface \ + $(LOCAL_PATH)/../../../utility \ + $(LOCAL_PATH)/../../../../../common_audio/signal_processing_library/main/interface + +# Flags passed to only C++ (and not C) files. +LOCAL_CPPFLAGS := + +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := + +LOCAL_SHARED_LIBRARIES := libcutils \ + libdl \ + libstlport +LOCAL_ADDITIONAL_DEPENDENCIES := + +include external/stlport/libstlport.mk +include $(BUILD_STATIC_LIBRARY) diff --git a/src/modules/audio_processing/ns/main/source/defines.h b/src/modules/audio_processing/ns/main/source/defines.h new file mode 100644 index 0000000000..d25396793d --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/defines.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ + +//#define PROCESS_FLOW_0 // Use the traditional method. +//#define PROCESS_FLOW_1 // Use traditional with DD estimate of prior SNR. +#define PROCESS_FLOW_2 // Use the new method of speech/noise classification. + +#define BLOCKL_MAX 160 // max processing block length: 160 +#define ANAL_BLOCKL_MAX 256 // max analysis block length: 256 +#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1 + +#define QUANTILE (float)0.25 + +#define SIMULT 3 +#define END_STARTUP_LONG 200 +#define END_STARTUP_SHORT 50 +#define FACTOR (float)40.0 +#define WIDTH (float)0.01 + +#define SMOOTH (float)0.75 // filter smoothing +// Length of fft work arrays. +#define IP_LENGTH (ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2)) +#define W_LENGTH (ANAL_BLOCKL_MAX >> 1) + +//PARAMETERS FOR NEW METHOD +#define DD_PR_SNR (float)0.98 // DD update of prior SNR +#define LRT_TAVG (float)0.50 // tavg parameter for LRT (previously 0.90) +#define SPECT_FL_TAVG (float)0.30 // tavg parameter for spectral flatness measure +#define SPECT_DIFF_TAVG (float)0.30 // tavg parameter for spectral difference measure +#define PRIOR_UPDATE (float)0.10 // update parameter of prior model +#define NOISE_UPDATE (float)0.90 // update parameter for noise +#define SPEECH_UPDATE (float)0.99 // update parameter when likely speech +#define WIDTH_PR_MAP (float)4.0 // width parameter in sigmoid map for prior model +#define LRT_FEATURE_THR (float)0.5 // default threshold for LRT feature +#define SF_FEATURE_THR (float)0.5 // default threshold for Spectral Flatness feature +#define SD_FEATURE_THR (float)0.5 // default threshold for Spectral Difference feature +#define PROB_RANGE (float)0.20 // probability threshold for noise state in + // speech/noise likelihood +#define HIST_PAR_EST 1000 // histogram size for estimation of parameters +#define GAMMA_PAUSE (float)0.05 // update for conservative noise estimate +// +#define B_LIM (float)0.5 // threshold in final energy gain factor calculation +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ diff --git a/src/modules/audio_processing/ns/main/source/noise_suppression.c b/src/modules/audio_processing/ns/main/source/noise_suppression.c new file mode 100644 index 0000000000..aed10b1460 --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/noise_suppression.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <string.h> + +#include "noise_suppression.h" +#include "ns_core.h" +#include "defines.h" + +int WebRtcNs_get_version(char *versionStr, short length) +{ + const char version[] = "NS 2.2.0"; + const short versionLen = (short)strlen(version) + 1; // +1 for null-termination + + if (versionStr == NULL) { + return -1; + } + + if (versionLen > length) { + return -1; + } + + strncpy(versionStr, version, versionLen); + + return 0; +} + +int WebRtcNs_Create(NsHandle **NS_inst) +{ + *NS_inst = (NsHandle*) malloc(sizeof(NSinst_t)); + if (*NS_inst!=NULL) { + (*(NSinst_t**)NS_inst)->initFlag=0; + return 0; + } else { + return -1; + } + +} + +int WebRtcNs_Free(NsHandle *NS_inst) +{ + free(NS_inst); + return 0; +} + + +int WebRtcNs_Init(NsHandle *NS_inst, WebRtc_UWord32 fs) +{ + return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs); +} + +int WebRtcNs_set_policy(NsHandle *NS_inst, int mode) +{ + return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode); +} + + +int WebRtcNs_Process(NsHandle *NS_inst, short *spframe, short *spframe_H, short *outframe, short *outframe_H) +{ + return WebRtcNs_ProcessCore((NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H); +} diff --git a/src/modules/audio_processing/ns/main/source/noise_suppression_x.c b/src/modules/audio_processing/ns/main/source/noise_suppression_x.c new file mode 100644 index 0000000000..f1ad730611 --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/noise_suppression_x.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <string.h> + +#include "noise_suppression_x.h" +#include "nsx_core.h" +#include "nsx_defines.h" + +int WebRtcNsx_get_version(char *versionStr, short length) +{ + const char version[] = "NS\t3.1.0"; + const short versionLen = (short)strlen(version) + 1; // +1 for null-termination + + if (versionStr == NULL) + { + return -1; + } + + if (versionLen > length) + { + return -1; + } + + strncpy(versionStr, version, versionLen); + + return 0; +} + +int WebRtcNsx_Create(NsxHandle **nsxInst) +{ + *nsxInst = (NsxHandle*)malloc(sizeof(NsxInst_t)); + if (*nsxInst != NULL) + { + (*(NsxInst_t**)nsxInst)->initFlag = 0; + return 0; + } else + { + return -1; + } + +} + +int WebRtcNsx_Free(NsxHandle *nsxInst) +{ + free(nsxInst); + return 0; +} + +int WebRtcNsx_Init(NsxHandle *nsxInst, WebRtc_UWord32 fs) +{ + return WebRtcNsx_InitCore((NsxInst_t*)nsxInst, fs); +} + +int WebRtcNsx_set_policy(NsxHandle *nsxInst, int mode) +{ + return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode); +} + +int WebRtcNsx_Process(NsxHandle *nsxInst, short *speechFrame, short *speechFrameHB, + short *outFrame, short *outFrameHB) +{ + return WebRtcNsx_ProcessCore((NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame, + outFrameHB); +} + diff --git a/src/modules/audio_processing/ns/main/source/ns.gyp b/src/modules/audio_processing/ns/main/source/ns.gyp new file mode 100644 index 0000000000..c8488b27e3 --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/ns.gyp @@ -0,0 +1,67 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +{ + 'includes': [ + '../../../../../common_settings.gypi', + ], + 'targets': [ + { + 'target_name': 'ns', + 'type': '<(library)', + 'dependencies': [ + '../../../../../common_audio/signal_processing_library/main/source/spl.gyp:spl', + '../../../utility/util.gyp:apm_util' + ], + 'include_dirs': [ + '../interface', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../interface', + ], + }, + 'sources': [ + '../interface/noise_suppression.h', + 'noise_suppression.c', + 'windows_private.h', + 'defines.h', + 'ns_core.c', + 'ns_core.h', + ], + }, + { + 'target_name': 'ns_fix', + 'type': '<(library)', + 'dependencies': [ + '../../../../../common_audio/signal_processing_library/main/source/spl.gyp:spl', + ], + 'include_dirs': [ + '../interface', + ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../interface', + ], + }, + 'sources': [ + '../interface/noise_suppression_x.h', + 'noise_suppression_x.c', + 'nsx_defines.h', + 'nsx_core.c', + 'nsx_core.h', + ], + }, + ], +} + +# Local Variables: +# tab-width:2 +# indent-tabs-mode:nil +# End: +# vim: set expandtab tabstop=2 shiftwidth=2: diff --git a/src/modules/audio_processing/ns/main/source/ns_core.c b/src/modules/audio_processing/ns/main/source/ns_core.c new file mode 100644 index 0000000000..10a1b831f7 --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/ns_core.c @@ -0,0 +1,1500 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <string.h> +#include <math.h> +//#include <stdio.h> +#include <stdlib.h> +#include "noise_suppression.h" +#include "ns_core.h" +#include "windows_private.h" +#include "fft4g.h" +#include "signal_processing_library.h" + +// Set Feature Extraction Parameters +void WebRtcNs_set_feature_extraction_parameters(NSinst_t *inst) +{ + //bin size of histogram + inst->featureExtractionParams.binSizeLrt = (float)0.1; + inst->featureExtractionParams.binSizeSpecFlat = (float)0.05; + inst->featureExtractionParams.binSizeSpecDiff = (float)0.1; + + //range of histogram over which lrt threshold is computed + inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0; + + //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain + // thresholds for prior model + inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff + inst->featureExtractionParams.factor2ModelPars = (float)0.9; //for spectral_flatness: + // used when noise is flatter than speech + + //peak limit for spectral flatness (varies between 0 and 1) + inst->featureExtractionParams.thresPosSpecFlat = (float)0.6; + + //limit on spacing of two highest peaks in histogram: spacing determined by bin size + inst->featureExtractionParams.limitPeakSpacingSpecFlat = 2 + * inst->featureExtractionParams.binSizeSpecFlat; + inst->featureExtractionParams.limitPeakSpacingSpecDiff = 2 + * inst->featureExtractionParams.binSizeSpecDiff; + + //limit on relevance of second peak: + inst->featureExtractionParams.limitPeakWeightsSpecFlat = (float)0.5; + inst->featureExtractionParams.limitPeakWeightsSpecDiff = (float)0.5; + + // fluctuation limit of lrt feature + inst->featureExtractionParams.thresFluctLrt = (float)0.05; + + //limit on the max and min values for the feature thresholds + inst->featureExtractionParams.maxLrt = (float)1.0; + inst->featureExtractionParams.minLrt = (float)0.20; + + inst->featureExtractionParams.maxSpecFlat = (float)0.95; + inst->featureExtractionParams.minSpecFlat = (float)0.10; + + inst->featureExtractionParams.maxSpecDiff = (float)1.0; + inst->featureExtractionParams.minSpecDiff = (float)0.16; + + //criteria of weight of histogram peak to accept/reject feature + inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3 + * (inst->modelUpdatePars[1])); //for spectral flatness + inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3 + * (inst->modelUpdatePars[1])); //for spectral difference +} + +// Initialize state +int WebRtcNs_InitCore(NSinst_t *inst, WebRtc_UWord32 fs) +{ + int i; + //We only support 10ms frames + + //check for valid pointer + if (inst == NULL) + { + return -1; + } + + // Initialization of struct + if (fs == 8000 || fs == 16000 || fs == 32000) + { + inst->fs = fs; + } + else + { + return -1; + } + inst->windShift = 0; + if (fs == 8000) + { + // We only support 10ms frames + inst->blockLen = 80; + inst->blockLen10ms = 80; + inst->anaLen = 128; + inst->window = kBlocks80w128; + inst->outLen = 0; + } + else if (fs == 16000) + { + // We only support 10ms frames + inst->blockLen = 160; + inst->blockLen10ms = 160; + inst->anaLen = 256; + inst->window = kBlocks160w256; + inst->outLen = 0; + } + else if (fs==32000) + { + // We only support 10ms frames + inst->blockLen = 160; + inst->blockLen10ms = 160; + inst->anaLen = 256; + inst->window = kBlocks160w256; + inst->outLen = 0; + } + inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins + + // Initialize fft work arrays. + inst->ip[0] = 0; // Setting this triggers initialization. + memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + rdft(inst->anaLen, 1, inst->dataBuf, inst->ip, inst->wfft); + + memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + memset(inst->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + + //for HB processing + memset(inst->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX); + + //for quantile noise estimation + memset(inst->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); + for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) + { + inst->lquantile[i] = (float)8.0; + inst->density[i] = (float)0.3; + } + + for (i = 0; i < SIMULT; i++) + { + inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); + } + + inst->updates = 0; + + // Wiener filter initialization + for (i = 0; i < HALF_ANAL_BLOCKL; i++) + { + inst->smooth[i] = (float)1.0; + } + + // Set the aggressiveness: default + inst->aggrMode = 0; + + //initialize variables for new method + inst->priorSpeechProb = (float)0.5; //prior prob for speech/noise + for (i = 0; i < HALF_ANAL_BLOCKL; i++) + { + inst->magnPrev[i] = (float)0.0; //previous mag spectrum + inst->noisePrev[i] = (float)0.0; //previous noise-spectrum + inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold) + inst->magnAvgPause[i] = (float)0.0; //conservative noise spectrum estimate + inst->speechProbHB[i] = (float)0.0; //for estimation of HB in second pass + inst->initMagnEst[i] = (float)0.0; //initial average mag spectrum + } + + //feature quantities + inst->featureData[0] = SF_FEATURE_THR; //spectral flatness (start on threshold) + inst->featureData[1] = (float)0.0; //spectral entropy: not used in this version + inst->featureData[2] = (float)0.0; //spectral variance: not used in this version + inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold) + inst->featureData[4] = SF_FEATURE_THR; //spectral template diff (start on threshold) + inst->featureData[5] = (float)0.0; //normalization for spectral-diff + inst->featureData[6] = (float)0.0; //window time-average of input magnitude spectrum + + //histogram quantities: used to estimate/update thresholds for features + for (i = 0; i < HIST_PAR_EST; i++) + { + inst->histLrt[i] = 0; + inst->histSpecFlat[i] = 0; + inst->histSpecDiff[i] = 0; + } + + inst->blockInd = -1; //frame counter + inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature + inst->priorModelPars[1] = (float)0.5; //threshold for spectral flatness: + // determined on-line + inst->priorModelPars[2] = (float)1.0; //sgn_map par for spectral measure: + // 1 for flatness measure + inst->priorModelPars[3] = (float)0.5; //threshold for template-difference feature: + // determined on-line + inst->priorModelPars[4] = (float)1.0; //default weighting parameter for lrt feature + inst->priorModelPars[5] = (float)0.0; //default weighting parameter for + // spectral flatness feature + inst->priorModelPars[6] = (float)0.0; //default weighting parameter for + // spectral difference feature + + inst->modelUpdatePars[0] = 2; //update flag for parameters: + // 0 no update, 1=update once, 2=update every window + inst->modelUpdatePars[1] = 500; //window for update + inst->modelUpdatePars[2] = 0; //counter for update of conservative noise spectrum + //counter if the feature thresholds are updated during the sequence + inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; + + inst->signalEnergy = 0.0; + inst->sumMagn = 0.0; + inst->whiteNoiseLevel = 0.0; + inst->pinkNoiseNumerator = 0.0; + inst->pinkNoiseExp = 0.0; + + WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration + + //default mode + WebRtcNs_set_policy_core(inst, 0); + + + memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX); + + inst->initFlag = 1; + return 0; +} + +int WebRtcNs_set_policy_core(NSinst_t *inst, int mode) +{ + // allow for modes:0,1,2,3 + if (mode < 0 || mode > 3) + { + return (-1); + } + + inst->aggrMode = mode; + if (mode == 0) + { + inst->overdrive = (float)1.0; + inst->denoiseBound = (float)0.5; + inst->gainmap = 0; + } + else if (mode == 1) + { + //inst->overdrive = (float)1.25; + inst->overdrive = (float)1.0; + inst->denoiseBound = (float)0.25; + inst->gainmap = 1; + } + else if (mode == 2) + { + //inst->overdrive = (float)1.25; + inst->overdrive = (float)1.1; + inst->denoiseBound = (float)0.125; + inst->gainmap = 1; + } + else if (mode == 3) + { + //inst->overdrive = (float)1.30; + inst->overdrive = (float)1.25; + inst->denoiseBound = (float)0.09; + inst->gainmap = 1; + } + return 0; +} + +// Estimate noise +void WebRtcNs_NoiseEstimation(NSinst_t *inst, float *magn, float *noise) +{ + int i, s, offset; + float lmagn[HALF_ANAL_BLOCKL], delta; + + if (inst->updates < END_STARTUP_LONG) + { + inst->updates++; + } + + for (i = 0; i < inst->magnLen; i++) + { + lmagn[i] = (float)log(magn[i]); + } + + // loop over simultaneous estimates + for (s = 0; s < SIMULT; s++) + { + offset = s * inst->magnLen; + + // newquantest(...) + for (i = 0; i < inst->magnLen; i++) + { + // compute delta + if (inst->density[offset + i] > 1.0) + { + delta = FACTOR * (float)1.0 / inst->density[offset + i]; + } + else + { + delta = FACTOR; + } + + // update log quantile estimate + if (lmagn[i] > inst->lquantile[offset + i]) + { + inst->lquantile[offset + i] += QUANTILE * delta + / (float)(inst->counter[s] + 1); + } + else + { + inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta + / (float)(inst->counter[s] + 1); + } + + // update density estimate + if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) + { + inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset + + i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] + + 1); + } + } // end loop over magnitude spectrum + + if (inst->counter[s] >= END_STARTUP_LONG) + { + inst->counter[s] = 0; + if (inst->updates >= END_STARTUP_LONG) + { + for (i = 0; i < inst->magnLen; i++) + { + inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); + } + } + } + + inst->counter[s]++; + } // end loop over simultaneous estimates + + // Sequentially update the noise during startup + if (inst->updates < END_STARTUP_LONG) + { + // Use the last "s" to get noise during startup that differ from zero. + for (i = 0; i < inst->magnLen; i++) + { + inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); + } + } + + for (i = 0; i < inst->magnLen; i++) + { + noise[i] = inst->quantile[i]; + } +} + +// Extract thresholds for feature parameters +// histograms are computed over some window_size (given by inst->modelUpdatePars[1]) +// thresholds and weights are extracted every window +// flag 0 means update histogram only, flag 1 means compute the thresholds/weights +// threshold and weights are returned in: inst->priorModelPars +void WebRtcNs_FeatureParameterExtraction(NSinst_t *inst, int flag) +{ + int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; + int maxPeak1, maxPeak2; + int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff; + + float binMid, featureSum; + float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; + float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; + + //3 features: lrt, flatness, difference + //lrt_feature = inst->featureData[3]; + //flat_feature = inst->featureData[0]; + //diff_feature = inst->featureData[4]; + + //update histograms + if (flag == 0) + { + // LRT + if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) + && (inst->featureData[3] >= 0.0)) + { + i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt); + inst->histLrt[i]++; + } + // Spectral flatness + if ((inst->featureData[0] < HIST_PAR_EST + * inst->featureExtractionParams.binSizeSpecFlat) + && (inst->featureData[0] >= 0.0)) + { + i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat); + inst->histSpecFlat[i]++; + } + // Spectral difference + if ((inst->featureData[4] < HIST_PAR_EST + * inst->featureExtractionParams.binSizeSpecDiff) + && (inst->featureData[4] >= 0.0)) + { + i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff); + inst->histSpecDiff[i]++; + } + } + + // extract parameters for speech/noise probability + if (flag == 1) + { + //lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt + avgHistLrt = 0.0; + avgHistLrtCompl = 0.0; + avgSquareHistLrt = 0.0; + numHistLrt = 0; + for (i = 0; i < HIST_PAR_EST; i++) + { + binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt; + if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) + { + avgHistLrt += inst->histLrt[i] * binMid; + numHistLrt += inst->histLrt[i]; + } + avgSquareHistLrt += inst->histLrt[i] * binMid * binMid; + avgHistLrtCompl += inst->histLrt[i] * binMid; + } + if (numHistLrt > 0) + { + avgHistLrt = avgHistLrt / ((float)numHistLrt); + } + avgHistLrtCompl = avgHistLrtCompl / ((float)inst->modelUpdatePars[1]); + avgSquareHistLrt = avgSquareHistLrt / ((float)inst->modelUpdatePars[1]); + fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; + // get threshold for lrt feature: + if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) + { + //very low fluct, so likely noise + inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; + } + else + { + inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars + * avgHistLrt; + // check if value is within min/max range + if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) + { + inst->priorModelPars[0] = inst->featureExtractionParams.minLrt; + } + if (inst->priorModelPars[0] > inst->featureExtractionParams.maxLrt) + { + inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; + } + } + // done with lrt feature + + // + // for spectral flatness and spectral difference: compute the main peaks of histogram + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecFlat = 0.0; + posPeak2SpecFlat = 0.0; + weightPeak1SpecFlat = 0; + weightPeak2SpecFlat = 0; + + // peaks for flatness + for (i = 0; i < HIST_PAR_EST; i++) + { + binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat; + if (inst->histSpecFlat[i] > maxPeak1) + { + // Found new "first" peak + maxPeak2 = maxPeak1; + weightPeak2SpecFlat = weightPeak1SpecFlat; + posPeak2SpecFlat = posPeak1SpecFlat; + + maxPeak1 = inst->histSpecFlat[i]; + weightPeak1SpecFlat = inst->histSpecFlat[i]; + posPeak1SpecFlat = binMid; + } + else if (inst->histSpecFlat[i] > maxPeak2) + { + // Found new "second" peak + maxPeak2 = inst->histSpecFlat[i]; + weightPeak2SpecFlat = inst->histSpecFlat[i]; + posPeak2SpecFlat = binMid; + } + } + + //compute two peaks for spectral difference + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecDiff = 0.0; + posPeak2SpecDiff = 0.0; + weightPeak1SpecDiff = 0; + weightPeak2SpecDiff = 0; + // peaks for spectral difference + for (i = 0; i < HIST_PAR_EST; i++) + { + binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff; + if (inst->histSpecDiff[i] > maxPeak1) + { + // Found new "first" peak + maxPeak2 = maxPeak1; + weightPeak2SpecDiff = weightPeak1SpecDiff; + posPeak2SpecDiff = posPeak1SpecDiff; + + maxPeak1 = inst->histSpecDiff[i]; + weightPeak1SpecDiff = inst->histSpecDiff[i]; + posPeak1SpecDiff = binMid; + } + else if (inst->histSpecDiff[i] > maxPeak2) + { + // Found new "second" peak + maxPeak2 = inst->histSpecDiff[i]; + weightPeak2SpecDiff = inst->histSpecDiff[i]; + posPeak2SpecDiff = binMid; + } + } + + // for spectrum flatness feature + useFeatureSpecFlat = 1; + // merge the two peaks if they are close + if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) + < inst->featureExtractionParams.limitPeakSpacingSpecFlat) + && (weightPeak2SpecFlat + > inst->featureExtractionParams.limitPeakWeightsSpecFlat + * weightPeak1SpecFlat)) + { + weightPeak1SpecFlat += weightPeak2SpecFlat; + posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat); + } + //reject if weight of peaks is not large enough, or peak value too small + if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat + || posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) + { + useFeatureSpecFlat = 0; + } + // if selected, get the threshold + if (useFeatureSpecFlat == 1) + { + // compute the threshold + inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars + * posPeak1SpecFlat; + //check if value is within min/max range + if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) + { + inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat; + } + if (inst->priorModelPars[1] > inst->featureExtractionParams.maxSpecFlat) + { + inst->priorModelPars[1] = inst->featureExtractionParams.maxSpecFlat; + } + } + // done with flatness feature + + // for template feature + useFeatureSpecDiff = 1; + // merge the two peaks if they are close + if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) + < inst->featureExtractionParams.limitPeakSpacingSpecDiff) + && (weightPeak2SpecDiff + > inst->featureExtractionParams.limitPeakWeightsSpecDiff + * weightPeak1SpecDiff)) + { + weightPeak1SpecDiff += weightPeak2SpecDiff; + posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff); + } + // get the threshold value + inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars + * posPeak1SpecDiff; + //reject if weight of peaks is not large enough + if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) + { + useFeatureSpecDiff = 0; + } + //check if value is within min/max range + if (inst->priorModelPars[3] < inst->featureExtractionParams.minSpecDiff) + { + inst->priorModelPars[3] = inst->featureExtractionParams.minSpecDiff; + } + if (inst->priorModelPars[3] > inst->featureExtractionParams.maxSpecDiff) + { + inst->priorModelPars[3] = inst->featureExtractionParams.maxSpecDiff; + } + // done with spectral difference feature + + // don't use template feature if fluctuation of lrt feature is very low: + // most likely just noise state + if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) + { + useFeatureSpecDiff = 0; + } + + // select the weights between the features + // inst->priorModelPars[4] is weight for lrt: always selected + // inst->priorModelPars[5] is weight for spectral flatness + // inst->priorModelPars[6] is weight for spectral difference + featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); + inst->priorModelPars[4] = (float)1.0 / featureSum; + inst->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; + inst->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; + + // set hists to zero for next update + if (inst->modelUpdatePars[0] >= 1) + { + for (i = 0; i < HIST_PAR_EST; i++) + { + inst->histLrt[i] = 0; + inst->histSpecFlat[i] = 0; + inst->histSpecDiff[i] = 0; + } + } + } // end of flag == 1 +} + +// Compute spectral flatness on input spectrum +// magnIn is the magnitude spectrum +// spectral flatness is returned in inst->featureData[0] +void WebRtcNs_ComputeSpectralFlatness(NSinst_t *inst, float *magnIn) +{ + int i; + int shiftLP = 1; //option to remove first bin(s) from spectral measures + float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; + + // comute spectral measures + // for flatness + avgSpectralFlatnessNum = 0.0; + avgSpectralFlatnessDen = inst->sumMagn; + for (i = 0; i < shiftLP; i++) + { + avgSpectralFlatnessDen -= magnIn[i]; + } + // compute log of ratio of the geometric to arithmetic mean: check for log(0) case + for (i = shiftLP; i < inst->magnLen; i++) + { + if (magnIn[i] > 0.0) + { + avgSpectralFlatnessNum += (float)log(magnIn[i]); + } + else + { + inst->featureData[0] -= SPECT_FL_TAVG * inst->featureData[0]; + return; + } + } + //normalize + avgSpectralFlatnessDen = avgSpectralFlatnessDen / inst->magnLen; + avgSpectralFlatnessNum = avgSpectralFlatnessNum / inst->magnLen; + + //ratio and inverse log: check for case of log(0) + spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; + + //time-avg update of spectral flatness feature + inst->featureData[0] += SPECT_FL_TAVG * (spectralTmp - inst->featureData[0]); + // done with flatness feature +} + +// Compute the difference measure between input spectrum and a template/learned noise spectrum +// magnIn is the input spectrum +// the reference/template spectrum is inst->magnAvgPause[i] +// returns (normalized) spectral difference in inst->featureData[4] +void WebRtcNs_ComputeSpectralDifference(NSinst_t *inst, float *magnIn) +{ + // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) + int i; + float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; + + avgPause = 0.0; + avgMagn = inst->sumMagn; + // compute average quantities + for (i = 0; i < inst->magnLen; i++) + { + //conservative smooth noise spectrum from pause frames + avgPause += inst->magnAvgPause[i]; + } + avgPause = avgPause / ((float)inst->magnLen); + avgMagn = avgMagn / ((float)inst->magnLen); + + covMagnPause = 0.0; + varPause = 0.0; + varMagn = 0.0; + // compute variance and covariance quantities + for (i = 0; i < inst->magnLen; i++) + { + covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause); + varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause); + varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); + } + covMagnPause = covMagnPause / ((float)inst->magnLen); + varPause = varPause / ((float)inst->magnLen); + varMagn = varMagn / ((float)inst->magnLen); + // update of average magnitude spectrum + inst->featureData[6] += inst->signalEnergy; + + avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001); + // normalize and compute time-avg update of difference feature + avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001)); + inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]); +} + +// Compute speech/noise probability +// speech/noise probability is returned in: probSpeechFinal +//magn is the input magnitude spectrum +//noise is the noise spectrum +//snrLocPrior is the prior snr for each freq. +//snr loc_post is the post snr for each freq. +void WebRtcNs_SpeechNoiseProb(NSinst_t *inst, float *probSpeechFinal, float *snrLocPrior, + float *snrLocPost) +{ + int i, sgnMap; + float invLrt, gainPrior, indPrior; + float logLrtTimeAvgKsum, besselTmp; + float indicator0, indicator1, indicator2; + float tmpFloat1, tmpFloat2; + float weightIndPrior0, weightIndPrior1, weightIndPrior2; + float threshPrior0, threshPrior1, threshPrior2; + float widthPrior, widthPrior0, widthPrior1, widthPrior2; + + widthPrior0 = WIDTH_PR_MAP; + widthPrior1 = (float)2.0 * WIDTH_PR_MAP; //width for pause region: + // lower range, so increase width in tanh map + widthPrior2 = (float)2.0 * WIDTH_PR_MAP; //for spectral-difference measure + + //threshold parameters for features + threshPrior0 = inst->priorModelPars[0]; + threshPrior1 = inst->priorModelPars[1]; + threshPrior2 = inst->priorModelPars[3]; + + //sign for flatness feature + sgnMap = (int)(inst->priorModelPars[2]); + + //weight parameters for features + weightIndPrior0 = inst->priorModelPars[4]; + weightIndPrior1 = inst->priorModelPars[5]; + weightIndPrior2 = inst->priorModelPars[6]; + + // compute feature based on average LR factor + // this is the average over all frequencies of the smooth log lrt + logLrtTimeAvgKsum = 0.0; + for (i = 0; i < inst->magnLen; i++) + { + tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i]; + tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001); + besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2; + inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1) + - inst->logLrtTimeAvg[i]); + logLrtTimeAvgKsum += inst->logLrtTimeAvg[i]; + } + logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen); + inst->featureData[3] = logLrtTimeAvgKsum; + // done with computation of LR factor + + // + //compute the indicator functions + // + + // average lrt feature + widthPrior = widthPrior0; + //use larger width in tanh map for pause regions + if (logLrtTimeAvgKsum < threshPrior0) + { + widthPrior = widthPrior1; + } + // compute indicator function: sigmoid map + indicator0 = (float)0.5 * ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + + (float)1.0); + + //spectral flatness feature + tmpFloat1 = inst->featureData[0]; + widthPrior = widthPrior0; + //use larger width in tanh map for pause regions + if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) + { + widthPrior = widthPrior1; + } + if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) + { + widthPrior = widthPrior1; + } + // compute indicator function: sigmoid map + indicator1 = (float)0.5 * ((float)tanh( + (float)sgnMap * widthPrior * (threshPrior1 + - tmpFloat1)) + (float)1.0); + + //for template spectrum-difference + tmpFloat1 = inst->featureData[4]; + widthPrior = widthPrior0; + //use larger width in tanh map for pause regions + if (tmpFloat1 < threshPrior2) + { + widthPrior = widthPrior2; + } + // compute indicator function: sigmoid map + indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + + (float)1.0); + + //combine the indicator function with the feature weights + indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 + * indicator2; + // done with computing indicator function + + //compute the prior probability + inst->priorSpeechProb += PRIOR_UPDATE * (indPrior - inst->priorSpeechProb); + // make sure probabilities are within range: keep floor to 0.01 + if (inst->priorSpeechProb > 1.0) + { + inst->priorSpeechProb = (float)1.0; + } + if (inst->priorSpeechProb < 0.01) + { + inst->priorSpeechProb = (float)0.01; + } + + //final speech probability: combine prior model with LR factor: + gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001); + for (i = 0; i < inst->magnLen; i++) + { + invLrt = (float)exp(-inst->logLrtTimeAvg[i]); + invLrt = (float)gainPrior * invLrt; + probSpeechFinal[i] = (float)1.0 / ((float)1.0 + invLrt); + } +} + +int WebRtcNs_ProcessCore(NSinst_t *inst, + short *speechFrame, + short *speechFrameHB, + short *outFrame, + short *outFrameHB) +{ + // main routine for noise reduction + + int flagHB = 0; + int i; + const int kStartBand = 5; // Skip first frequency bins during estimation. + int updateParsFlag; + + float energy1, energy2, gain, factor, factor1, factor2; + float signalEnergy, sumMagn; + float snrPrior, currentEstimateStsa; + float tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech; + float gammaNoiseTmp, gammaNoiseOld; + float noiseUpdateTmp, fTmp, dTmp; + float fin[BLOCKL_MAX], fout[BLOCKL_MAX]; + float winData[ANAL_BLOCKL_MAX]; + float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; + float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; + float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; + float probSpeechFinal[HALF_ANAL_BLOCKL], previousEstimateStsa[HALF_ANAL_BLOCKL]; + float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; + // Variables during startup + float sum_log_i = 0.0; + float sum_log_i_square = 0.0; + float sum_log_magn = 0.0; + float sum_log_i_log_magn = 0.0; + float parametric_noise = 0.0; + float parametric_exp = 0.0; + float parametric_num = 0.0; + + // SWB variables + int deltaBweHB = 1; + int deltaGainHB = 1; + float decayBweHB = 1.0; + float gainMapParHB = 1.0; + float gainTimeDomainHB = 1.0; + float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; + + // Check that initiation has been done + if (inst->initFlag != 1) + { + return (-1); + } + // Check for valid pointers based on sampling rate + if (inst->fs == 32000) + { + if (speechFrameHB == NULL) + { + return -1; + } + flagHB = 1; + // range for averaging low band quantities for H band gain + deltaBweHB = (int)inst->magnLen / 4; + deltaGainHB = deltaBweHB; + } + // + updateParsFlag = inst->modelUpdatePars[0]; + // + + //for LB do all processing + // convert to float + for (i = 0; i < inst->blockLen10ms; i++) + { + fin[i] = (float)speechFrame[i]; + } + // update analysis buffer for L band + memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms, + sizeof(float) * (inst->anaLen - inst->blockLen10ms)); + memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, fin, + sizeof(float) * inst->blockLen10ms); + + if (flagHB == 1) + { + // convert to float + for (i = 0; i < inst->blockLen10ms; i++) + { + fin[i] = (float)speechFrameHB[i]; + } + // update analysis buffer for H band + memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms, + sizeof(float) * (inst->anaLen - inst->blockLen10ms)); + memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, fin, + sizeof(float) * inst->blockLen10ms); + } + + // check if processing needed + if (inst->outLen == 0) + { + // windowing + energy1 = 0.0; + for (i = 0; i < inst->anaLen; i++) + { + winData[i] = inst->window[i] * inst->dataBuf[i]; + energy1 += winData[i] * winData[i]; + } + if (energy1 == 0.0) + { + // synthesize the special case of zero input + // we want to avoid updating statistics in this case: + // Updating feature statistics when we have zeros only will cause thresholds to + // move towards zero signal situations. This in turn has the effect that once the + // signal is "turned on" (non-zero values) everything will be treated as speech + // and there is no noise suppression effect. Depending on the duration of the + // inactive signal it takes a considerable amount of time for the system to learn + // what is noise and what is speech. + + // read out fully processed segment + for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) + { + fout[i - inst->windShift] = inst->syntBuf[i]; + } + // update synthesis buffer + memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, + sizeof(float) * (inst->anaLen - inst->blockLen)); + memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, + sizeof(float) * inst->blockLen); + + // out buffer + inst->outLen = inst->blockLen - inst->blockLen10ms; + if (inst->blockLen > inst->blockLen10ms) + { + for (i = 0; i < inst->outLen; i++) + { + inst->outBuf[i] = fout[i + inst->blockLen10ms]; + } + } + // convert to short + for (i = 0; i < inst->blockLen10ms; i++) + { + dTmp = fout[i]; + if (dTmp < WEBRTC_SPL_WORD16_MIN) + { + dTmp = WEBRTC_SPL_WORD16_MIN; + } + else if (dTmp > WEBRTC_SPL_WORD16_MAX) + { + dTmp = WEBRTC_SPL_WORD16_MAX; + } + outFrame[i] = (short)dTmp; + } + + // for time-domain gain of HB + if (flagHB == 1) + { + for (i = 0; i < inst->blockLen10ms; i++) + { + dTmp = inst->dataBufHB[i]; + if (dTmp < WEBRTC_SPL_WORD16_MIN) + { + dTmp = WEBRTC_SPL_WORD16_MIN; + } + else if (dTmp > WEBRTC_SPL_WORD16_MAX) + { + dTmp = WEBRTC_SPL_WORD16_MAX; + } + outFrameHB[i] = (short)dTmp; + } + } // end of H band gain computation + // + return 0; + } + + // + inst->blockInd++; // Update the block index only when we process a block. + // FFT + rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft); + + imag[0] = 0; + real[0] = winData[0]; + magn[0] = (float)(fabs(real[0]) + 1.0f); + imag[inst->magnLen - 1] = 0; + real[inst->magnLen - 1] = winData[1]; + magn[inst->magnLen - 1] = (float)(fabs(real[inst->magnLen - 1]) + 1.0f); + signalEnergy = (float)(real[0] * real[0]) + (float)(real[inst->magnLen - 1] + * real[inst->magnLen - 1]); + sumMagn = magn[0] + magn[inst->magnLen - 1]; + if (inst->blockInd < END_STARTUP_SHORT) + { + inst->initMagnEst[0] += magn[0]; + inst->initMagnEst[inst->magnLen - 1] += magn[inst->magnLen - 1]; + tmpFloat2 = log((float)(inst->magnLen - 1)); + sum_log_i = tmpFloat2; + sum_log_i_square = tmpFloat2 * tmpFloat2; + tmpFloat1 = log(magn[inst->magnLen - 1]); + sum_log_magn = tmpFloat1; + sum_log_i_log_magn = tmpFloat2 * tmpFloat1; + } + for (i = 1; i < inst->magnLen - 1; i++) + { + real[i] = winData[2 * i]; + imag[i] = winData[2 * i + 1]; + // magnitude spectrum + fTmp = real[i] * real[i]; + fTmp += imag[i] * imag[i]; + signalEnergy += fTmp; + magn[i] = ((float)sqrt(fTmp)) + 1.0f; + sumMagn += magn[i]; + if (inst->blockInd < END_STARTUP_SHORT) + { + inst->initMagnEst[i] += magn[i]; + if (i >= kStartBand) + { + tmpFloat2 = log((float)i); + sum_log_i += tmpFloat2; + sum_log_i_square += tmpFloat2 * tmpFloat2; + tmpFloat1 = log(magn[i]); + sum_log_magn += tmpFloat1; + sum_log_i_log_magn += tmpFloat2 * tmpFloat1; + } + } + } + signalEnergy = signalEnergy / ((float)inst->magnLen); + inst->signalEnergy = signalEnergy; + inst->sumMagn = sumMagn; + + //compute spectral flatness on input spectrum + WebRtcNs_ComputeSpectralFlatness(inst, magn); + // quantile noise estimate + WebRtcNs_NoiseEstimation(inst, magn, noise); + //compute simplified noise model during startup + if (inst->blockInd < END_STARTUP_SHORT) + { + // Estimate White noise + inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive; + // Estimate Pink noise parameters + tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand)); + tmpFloat1 -= (sum_log_i * sum_log_i); + tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); + tmpFloat3 = tmpFloat2 / tmpFloat1; + // Constrain the estimated spectrum to be positive + if (tmpFloat3 < 0.0f) + { + tmpFloat3 = 0.0f; + } + inst->pinkNoiseNumerator += tmpFloat3; + tmpFloat2 = (sum_log_i * sum_log_magn); + tmpFloat2 -= ((float)(inst->magnLen - kStartBand)) * sum_log_i_log_magn; + tmpFloat3 = tmpFloat2 / tmpFloat1; + // Constrain the pink noise power to be in the interval [0, 1]; + if (tmpFloat3 < 0.0f) + { + tmpFloat3 = 0.0f; + } + if (tmpFloat3 > 1.0f) + { + tmpFloat3 = 1.0f; + } + inst->pinkNoiseExp += tmpFloat3; + + // Calculate frequency independent parts of parametric noise estimate. + if (inst->pinkNoiseExp == 0.0f) + { + // Use white noise estimate + parametric_noise = inst->whiteNoiseLevel; + } + else + { + // Use pink noise estimate + parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); + parametric_num *= (float)(inst->blockInd + 1); + parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1); + parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp); + } + for (i = 0; i < inst->magnLen; i++) + { + // Estimate the background noise using the white and pink noise parameters + if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) + { + // Use pink noise estimate + parametric_noise = parametric_num / pow((float)i, parametric_exp); + } + theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise); + theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001); + // Weight quantile noise with modeled noise + noise[i] *= (inst->blockInd); + tmpFloat2 = parametric_noise * (END_STARTUP_SHORT - inst->blockInd); + noise[i] += (tmpFloat2 / (float)(inst->blockInd + 1)); + noise[i] /= END_STARTUP_SHORT; + } + } + //compute average signal during END_STARTUP_LONG time: + // used to normalize spectral difference measure + if (inst->blockInd < END_STARTUP_LONG) + { + inst->featureData[5] *= inst->blockInd; + inst->featureData[5] += signalEnergy; + inst->featureData[5] /= (inst->blockInd + 1); + } + +#ifdef PROCESS_FLOW_0 + if (inst->blockInd > END_STARTUP_LONG) + { + //option: average the quantile noise: for check with AEC2 + for (i = 0; i < inst->magnLen; i++) + { + noise[i] = (float)0.6 * inst->noisePrev[i] + (float)0.4 * noise[i]; + } + for (i = 0; i < inst->magnLen; i++) + { + // Wiener with over sub-substraction: + theFilter[i] = (magn[i] - inst->overdrive * noise[i]) / (magn[i] + (float)0.0001); + } + } +#else + //start processing at frames == converged+1 + // + // STEP 1: compute prior and post snr based on quantile noise est + // + + // compute DD estimate of prior SNR: needed for new method + for (i = 0; i < inst->magnLen; i++) + { + // post snr + snrLocPost[i] = (float)0.0; + if (magn[i] > noise[i]) + { + snrLocPost[i] = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; + } + // previous post snr + // previous estimate: based on previous frame with gain filter + previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001) + * (inst->smooth[i]); + // DD estimate is sum of two terms: current estimate and previous estimate + // directed decision update of snrPrior + snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) + * snrLocPost[i]; + // post and prior snr needed for step 2 + } // end of loop over freqs +#ifdef PROCESS_FLOW_1 + for (i = 0; i < inst->magnLen; i++) + { + // gain filter + tmpFloat1 = inst->overdrive + snrLocPrior[i]; + tmpFloat2 = (float)snrLocPrior[i] / tmpFloat1; + theFilter[i] = (float)tmpFloat2; + } // end of loop over freqs +#endif + // done with step 1: dd computation of prior and post snr + + // + //STEP 2: compute speech/noise likelihood + // +#ifdef PROCESS_FLOW_2 + // compute difference of input spectrum with learned/estimated noise spectrum + WebRtcNs_ComputeSpectralDifference(inst, magn); + // compute histograms for parameter decisions (thresholds and weights for features) + // parameters are extracted once every window time (=inst->modelUpdatePars[1]) + if (updateParsFlag >= 1) + { + // counter update + inst->modelUpdatePars[3]--; + // update histogram + if (inst->modelUpdatePars[3] > 0) + { + WebRtcNs_FeatureParameterExtraction(inst, 0); + } + // compute model parameters + if (inst->modelUpdatePars[3] == 0) + { + WebRtcNs_FeatureParameterExtraction(inst, 1); + inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; + // if wish to update only once, set flag to zero + if (updateParsFlag == 1) + { + inst->modelUpdatePars[0] = 0; + } + else + { + // update every window: + // get normalization for spectral difference for next window estimate + inst->featureData[6] = inst->featureData[6] + / ((float)inst->modelUpdatePars[1]); + inst->featureData[5] = (float)0.5 * (inst->featureData[6] + + inst->featureData[5]); + inst->featureData[6] = (float)0.0; + } + } + } + // compute speech/noise probability + WebRtcNs_SpeechNoiseProb(inst, probSpeechFinal, snrLocPrior, snrLocPost); + // time-avg parameter for noise update + gammaNoiseTmp = NOISE_UPDATE; + for (i = 0; i < inst->magnLen; i++) + { + probSpeech = probSpeechFinal[i]; + probNonSpeech = (float)1.0 - probSpeech; + // temporary noise update: + // use it for speech frames if update value is less than previous + noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) + * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); + // + // time-constant based on speech/noise state + gammaNoiseOld = gammaNoiseTmp; + gammaNoiseTmp = NOISE_UPDATE; + // increase gamma (i.e., less noise update) for frame likely to be speech + if (probSpeech > PROB_RANGE) + { + gammaNoiseTmp = SPEECH_UPDATE; + } + // conservative noise update + if (probSpeech < PROB_RANGE) + { + inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]); + } + // noise update + if (gammaNoiseTmp == gammaNoiseOld) + { + noise[i] = noiseUpdateTmp; + } + else + { + noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) + * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); + // allow for noise update downwards: + // if noise update decreases the noise, it is safe, so allow it to happen + if (noiseUpdateTmp < noise[i]) + { + noise[i] = noiseUpdateTmp; + } + } + } // end of freq loop + // done with step 2: noise update + + // + // STEP 3: compute dd update of prior snr and post snr based on new noise estimate + // + for (i = 0; i < inst->magnLen; i++) + { + // post and prior snr + currentEstimateStsa = (float)0.0; + if (magn[i] > noise[i]) + { + currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; + } + // DD estimate is sume of two terms: current estimate and previous estimate + // directed decision update of snrPrior + snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) + * currentEstimateStsa; + // gain filter + tmpFloat1 = inst->overdrive + snrPrior; + tmpFloat2 = (float)snrPrior / tmpFloat1; + theFilter[i] = (float)tmpFloat2; + } // end of loop over freqs + // done with step3 +#endif +#endif + + for (i = 0; i < inst->magnLen; i++) + { + // flooring bottom + if (theFilter[i] < inst->denoiseBound) + { + theFilter[i] = inst->denoiseBound; + } + // flooring top + if (theFilter[i] > (float)1.0) + { + theFilter[i] = 1.0; + } + if (inst->blockInd < END_STARTUP_SHORT) + { + // flooring bottom + if (theFilterTmp[i] < inst->denoiseBound) + { + theFilterTmp[i] = inst->denoiseBound; + } + // flooring top + if (theFilterTmp[i] > (float)1.0) + { + theFilterTmp[i] = 1.0; + } + // Weight the two suppression filters + theFilter[i] *= (inst->blockInd); + theFilterTmp[i] *= (END_STARTUP_SHORT - inst->blockInd); + theFilter[i] += theFilterTmp[i]; + theFilter[i] /= (END_STARTUP_SHORT); + } + // smoothing +#ifdef PROCESS_FLOW_0 + inst->smooth[i] *= SMOOTH; // value set to 0.7 in define.h file + inst->smooth[i] += ((float)1.0 - SMOOTH) * theFilter[i]; +#else + inst->smooth[i] = theFilter[i]; +#endif + real[i] *= inst->smooth[i]; + imag[i] *= inst->smooth[i]; + } + // keep track of noise and magn spectrum for next frame + for (i = 0; i < inst->magnLen; i++) + { + inst->noisePrev[i] = noise[i]; + inst->magnPrev[i] = magn[i]; + } + // back to time domain + winData[0] = real[0]; + winData[1] = real[inst->magnLen - 1]; + for (i = 1; i < inst->magnLen - 1; i++) + { + winData[2 * i] = real[i]; + winData[2 * i + 1] = imag[i]; + } + rdft(inst->anaLen, -1, winData, inst->ip, inst->wfft); + + for (i = 0; i < inst->anaLen; i++) + { + real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling + } + + //scale factor: only do it after END_STARTUP_LONG time + factor = (float)1.0; + if (inst->gainmap == 1 && inst->blockInd > END_STARTUP_LONG) + { + factor1 = (float)1.0; + factor2 = (float)1.0; + + energy2 = 0.0; + for (i = 0; i < inst->anaLen;i++) + { + energy2 += (float)real[i] * (float)real[i]; + } + gain = (float)sqrt(energy2 / (energy1 + (float)1.0)); + +#ifdef PROCESS_FLOW_2 + // scaling for new version + if (gain > B_LIM) + { + factor1 = (float)1.0 + (float)1.3 * (gain - B_LIM); + if (gain * factor1 > (float)1.0) + { + factor1 = (float)1.0 / gain; + } + } + if (gain < B_LIM) + { + //don't reduce scale too much for pause regions: + // attenuation here should be controlled by flooring + if (gain <= inst->denoiseBound) + { + gain = inst->denoiseBound; + } + factor2 = (float)1.0 - (float)0.3 * (B_LIM - gain); + } + //combine both scales with speech/noise prob: + // note prior (priorSpeechProb) is not frequency dependent + factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb) + * factor2; +#else + if (gain > B_LIM) + { + factor = (float)1.0 + (float)1.3 * (gain - B_LIM); + } + else + { + factor = (float)1.0 + (float)2.0 * (gain - B_LIM); + } + if (gain * factor > (float)1.0) + { + factor = (float)1.0 / gain; + } +#endif + } // out of inst->gainmap==1 + + // synthesis + for (i = 0; i < inst->anaLen; i++) + { + inst->syntBuf[i] += factor * inst->window[i] * (float)real[i]; + } + // read out fully processed segment + for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) + { + fout[i - inst->windShift] = inst->syntBuf[i]; + } + // update synthesis buffer + memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, + sizeof(float) * (inst->anaLen - inst->blockLen)); + memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, + sizeof(float) * inst->blockLen); + + // out buffer + inst->outLen = inst->blockLen - inst->blockLen10ms; + if (inst->blockLen > inst->blockLen10ms) + { + for (i = 0; i < inst->outLen; i++) + { + inst->outBuf[i] = fout[i + inst->blockLen10ms]; + } + } + } // end of if out.len==0 + else + { + for (i = 0; i < inst->blockLen10ms; i++) + { + fout[i] = inst->outBuf[i]; + } + memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms, + sizeof(float) * (inst->outLen - inst->blockLen10ms)); + memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0, + sizeof(float) * inst->blockLen10ms); + inst->outLen -= inst->blockLen10ms; + } + + // convert to short + for (i = 0; i < inst->blockLen10ms; i++) + { + dTmp = fout[i]; + if (dTmp < WEBRTC_SPL_WORD16_MIN) + { + dTmp = WEBRTC_SPL_WORD16_MIN; + } + else if (dTmp > WEBRTC_SPL_WORD16_MAX) + { + dTmp = WEBRTC_SPL_WORD16_MAX; + } + outFrame[i] = (short)dTmp; + } + + // for time-domain gain of HB + if (flagHB == 1) + { + for (i = 0; i < inst->magnLen; i++) + { + inst->speechProbHB[i] = probSpeechFinal[i]; + } + if (inst->blockInd > END_STARTUP_LONG) + { + // average speech prob from low band + // avg over second half (i.e., 4->8kHz) of freq. spectrum + avgProbSpeechHB = 0.0; + for (i = inst->magnLen - deltaBweHB - 1; i < inst->magnLen - 1; i++) + { + avgProbSpeechHB += inst->speechProbHB[i]; + } + avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); + // average filter gain from low band + // average over second half (i.e., 4->8kHz) of freq. spectrum + avgFilterGainHB = 0.0; + for (i = inst->magnLen - deltaGainHB - 1; i < inst->magnLen - 1; i++) + { + avgFilterGainHB += inst->smooth[i]; + } + avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); + avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0; + // gain based on speech prob: + gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); + //combine gain with low band gain + gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB; + if (avgProbSpeechHB >= (float)0.5) + { + gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB; + } + gainTimeDomainHB = gainTimeDomainHB * decayBweHB; + } // end of converged + //make sure gain is within flooring range + // flooring bottom + if (gainTimeDomainHB < inst->denoiseBound) + { + gainTimeDomainHB = inst->denoiseBound; + } + // flooring top + if (gainTimeDomainHB > (float)1.0) + { + gainTimeDomainHB = 1.0; + } + //apply gain + for (i = 0; i < inst->blockLen10ms; i++) + { + dTmp = gainTimeDomainHB * inst->dataBufHB[i]; + if (dTmp < WEBRTC_SPL_WORD16_MIN) + { + dTmp = WEBRTC_SPL_WORD16_MIN; + } + else if (dTmp > WEBRTC_SPL_WORD16_MAX) + { + dTmp = WEBRTC_SPL_WORD16_MAX; + } + outFrameHB[i] = (short)dTmp; + } + } // end of H band gain computation + // + + return 0; +} diff --git a/src/modules/audio_processing/ns/main/source/ns_core.h b/src/modules/audio_processing/ns/main/source/ns_core.h new file mode 100644 index 0000000000..f72e22bf1c --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/ns_core.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_ + +#include "defines.h" + +typedef struct NSParaExtract_t_ { + + //bin size of histogram + float binSizeLrt; + float binSizeSpecFlat; + float binSizeSpecDiff; + //range of histogram over which lrt threshold is computed + float rangeAvgHistLrt; + //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain + //thresholds for prior model + float factor1ModelPars; //for lrt and spectral difference + float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech + //peak limit for spectral flatness (varies between 0 and 1) + float thresPosSpecFlat; + //limit on spacing of two highest peaks in histogram: spacing determined by bin size + float limitPeakSpacingSpecFlat; + float limitPeakSpacingSpecDiff; + //limit on relevance of second peak: + float limitPeakWeightsSpecFlat; + float limitPeakWeightsSpecDiff; + //limit on fluctuation of lrt feature + float thresFluctLrt; + //limit on the max and min values for the feature thresholds + float maxLrt; + float minLrt; + float maxSpecFlat; + float minSpecFlat; + float maxSpecDiff; + float minSpecDiff; + //criteria of weight of histogram peak to accept/reject feature + int thresWeightSpecFlat; + int thresWeightSpecDiff; + +} NSParaExtract_t; + +typedef struct NSinst_t_ { + + WebRtc_UWord32 fs; + int blockLen; + int blockLen10ms; + int windShift; + int outLen; + int anaLen; + int magnLen; + int aggrMode; + const float* window; + float dataBuf[ANAL_BLOCKL_MAX]; + float syntBuf[ANAL_BLOCKL_MAX]; + float outBuf[3 * BLOCKL_MAX]; + + int initFlag; + // parameters for quantile noise estimation + float density[SIMULT * HALF_ANAL_BLOCKL]; + float lquantile[SIMULT * HALF_ANAL_BLOCKL]; + float quantile[HALF_ANAL_BLOCKL]; + int counter[SIMULT]; + int updates; + // parameters for Wiener filter + float smooth[HALF_ANAL_BLOCKL]; + float overdrive; + float denoiseBound; + int gainmap; + // fft work arrays. + int ip[IP_LENGTH]; + float wfft[W_LENGTH]; + + // parameters for new method: some not needed, will reduce/cleanup later + WebRtc_Word32 blockInd; //frame index counter + int modelUpdatePars[4]; //parameters for updating or estimating + // thresholds/weights for prior model + float priorModelPars[7]; //parameters for prior model + float noisePrev[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame + float magnPrev[HALF_ANAL_BLOCKL]; //magnitude spectrum of previous frame + float logLrtTimeAvg[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing + float priorSpeechProb; //prior speech/noise probability + float featureData[7]; //data for features + float magnAvgPause[HALF_ANAL_BLOCKL]; //conservative noise spectrum estimate + float signalEnergy; //energy of magn + float sumMagn; //sum of magn + float whiteNoiseLevel; //initial noise estimate + float initMagnEst[HALF_ANAL_BLOCKL]; //initial magnitude spectrum estimate + float pinkNoiseNumerator; //pink noise parameter: numerator + float pinkNoiseExp; //pink noise parameter: power of freq + NSParaExtract_t featureExtractionParams; //parameters for feature extraction + //histograms for parameter estimation + int histLrt[HIST_PAR_EST]; + int histSpecFlat[HIST_PAR_EST]; + int histSpecDiff[HIST_PAR_EST]; + //quantities for high band estimate + float speechProbHB[HALF_ANAL_BLOCKL]; //final speech/noise prob: prior + LRT + float dataBufHB[ANAL_BLOCKL_MAX]; //buffering data for HB + +} NSinst_t; + + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * WebRtcNs_InitCore(...) + * + * This function initializes a noise suppression instance + * + * Input: + * - inst : Instance that should be initialized + * - fs : Sampling frequency + * + * Output: + * - inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_InitCore(NSinst_t *inst, WebRtc_UWord32 fs); + +/**************************************************************************** + * WebRtcNs_set_policy_core(...) + * + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - inst : Instance that should be initialized + * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) + * + * Output: + * - NS_inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_set_policy_core(NSinst_t *inst, int mode); + +/**************************************************************************** + * WebRtcNs_ProcessCore + * + * Do noise suppression. + * + * Input: + * - inst : Instance that should be initialized + * - inFrameLow : Input speech frame for lower band + * - inFrameHigh : Input speech frame for higher band + * + * Output: + * - inst : Updated instance + * - outFrameLow : Output speech frame for lower band + * - outFrameHigh : Output speech frame for higher band + * + * Return value : 0 - OK + * -1 - Error + */ + + +int WebRtcNs_ProcessCore(NSinst_t *inst, + short *inFrameLow, + short *inFrameHigh, + short *outFrameLow, + short *outFrameHigh); + + +#ifdef __cplusplus +} +#endif +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_ diff --git a/src/modules/audio_processing/ns/main/source/nsx_core.c b/src/modules/audio_processing/ns/main/source/nsx_core.c new file mode 100644 index 0000000000..01d3e54080 --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/nsx_core.c @@ -0,0 +1,2493 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "noise_suppression_x.h" + +#include <assert.h> +#include <math.h> +#include <string.h> +#include <stdlib.h> + +#include "nsx_core.h" + +// Skip first frequency bins during estimation. (0 <= value < 64) +static const int kStartBand = 5; + +// Rounding +static const WebRtc_Word16 kRoundTable[16] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, + 2048, 4096, 8192, 16384}; + +// Constants to compensate for shifting signal log(2^shifts). +static const WebRtc_Word16 kLogTable[9] = {0, 177, 355, 532, 710, 887, 1065, 1242, 1420}; + +static const WebRtc_Word16 kCounterDiv[201] = {32767, 16384, 10923, 8192, 6554, 5461, 4681, + 4096, 3641, 3277, 2979, 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, + 1489, 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, + 886, 862, 840, 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, + 607, 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, 468, + 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, 386, 381, 377, + 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, 328, 324, 321, 318, 315, + 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, 285, 282, 280, 278, 275, 273, 271, + 269, 266, 264, 262, 260, 258, 256, 254, 252, 250, 248, 246, 245, 243, 241, 239, 237, + 236, 234, 232, 231, 229, 228, 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, + 210, 209, 207, 206, 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, + 189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, + 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163}; + +static const WebRtc_Word16 kLogTableFrac[256] = { + 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, + 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, + 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, + 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, + 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117, + 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, + 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, + 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 178, + 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, 188, 189, 190, 191, 192, 192, + 193, 194, 195, 196, 197, 198, 198, 199, 200, 201, 202, 203, 203, 204, 205, 206, + 207, 208, 208, 209, 210, 211, 212, 212, 213, 214, 215, 216, 216, 217, 218, 219, + 220, 220, 221, 222, 223, 224, 224, 225, 226, 227, 228, 228, 229, 230, 231, 231, + 232, 233, 234, 234, 235, 236, 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, + 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 +}; + +static const WebRtc_Word16 kPowTableFrac[1024] = { + 0, 1, 1, 2, 3, 3, 4, 5, + 6, 6, 7, 8, 8, 9, 10, 10, + 11, 12, 13, 13, 14, 15, 15, 16, + 17, 17, 18, 19, 20, 20, 21, 22, + 22, 23, 24, 25, 25, 26, 27, 27, + 28, 29, 30, 30, 31, 32, 32, 33, + 34, 35, 35, 36, 37, 37, 38, 39, + 40, 40, 41, 42, 42, 43, 44, 45, + 45, 46, 47, 48, 48, 49, 50, 50, + 51, 52, 53, 53, 54, 55, 56, 56, + 57, 58, 58, 59, 60, 61, 61, 62, + 63, 64, 64, 65, 66, 67, 67, 68, + 69, 69, 70, 71, 72, 72, 73, 74, + 75, 75, 76, 77, 78, 78, 79, 80, + 81, 81, 82, 83, 84, 84, 85, 86, + 87, 87, 88, 89, 90, 90, 91, 92, + 93, 93, 94, 95, 96, 96, 97, 98, + 99, 100, 100, 101, 102, 103, 103, 104, + 105, 106, 106, 107, 108, 109, 109, 110, + 111, 112, 113, 113, 114, 115, 116, 116, + 117, 118, 119, 119, 120, 121, 122, 123, + 123, 124, 125, 126, 126, 127, 128, 129, + 130, 130, 131, 132, 133, 133, 134, 135, + 136, 137, 137, 138, 139, 140, 141, 141, + 142, 143, 144, 144, 145, 146, 147, 148, + 148, 149, 150, 151, 152, 152, 153, 154, + 155, 156, 156, 157, 158, 159, 160, 160, + 161, 162, 163, 164, 164, 165, 166, 167, + 168, 168, 169, 170, 171, 172, 173, 173, + 174, 175, 176, 177, 177, 178, 179, 180, + 181, 181, 182, 183, 184, 185, 186, 186, + 187, 188, 189, 190, 190, 191, 192, 193, + 194, 195, 195, 196, 197, 198, 199, 200, + 200, 201, 202, 203, 204, 205, 205, 206, + 207, 208, 209, 210, 210, 211, 212, 213, + 214, 215, 215, 216, 217, 218, 219, 220, + 220, 221, 222, 223, 224, 225, 225, 226, + 227, 228, 229, 230, 231, 231, 232, 233, + 234, 235, 236, 237, 237, 238, 239, 240, + 241, 242, 243, 243, 244, 245, 246, 247, + 248, 249, 249, 250, 251, 252, 253, 254, + 255, 255, 256, 257, 258, 259, 260, 261, + 262, 262, 263, 264, 265, 266, 267, 268, + 268, 269, 270, 271, 272, 273, 274, 275, + 276, 276, 277, 278, 279, 280, 281, 282, + 283, 283, 284, 285, 286, 287, 288, 289, + 290, 291, 291, 292, 293, 294, 295, 296, + 297, 298, 299, 299, 300, 301, 302, 303, + 304, 305, 306, 307, 308, 308, 309, 310, + 311, 312, 313, 314, 315, 316, 317, 318, + 318, 319, 320, 321, 322, 323, 324, 325, + 326, 327, 328, 328, 329, 330, 331, 332, + 333, 334, 335, 336, 337, 338, 339, 339, + 340, 341, 342, 343, 344, 345, 346, 347, + 348, 349, 350, 351, 352, 352, 353, 354, + 355, 356, 357, 358, 359, 360, 361, 362, + 363, 364, 365, 366, 367, 367, 368, 369, + 370, 371, 372, 373, 374, 375, 376, 377, + 378, 379, 380, 381, 382, 383, 384, 385, + 385, 386, 387, 388, 389, 390, 391, 392, + 393, 394, 395, 396, 397, 398, 399, 400, + 401, 402, 403, 404, 405, 406, 407, 408, + 409, 410, 410, 411, 412, 413, 414, 415, + 416, 417, 418, 419, 420, 421, 422, 423, + 424, 425, 426, 427, 428, 429, 430, 431, + 432, 433, 434, 435, 436, 437, 438, 439, + 440, 441, 442, 443, 444, 445, 446, 447, + 448, 449, 450, 451, 452, 453, 454, 455, + 456, 457, 458, 459, 460, 461, 462, 463, + 464, 465, 466, 467, 468, 469, 470, 471, + 472, 473, 474, 475, 476, 477, 478, 479, + 480, 481, 482, 483, 484, 485, 486, 487, + 488, 489, 490, 491, 492, 493, 494, 495, + 496, 498, 499, 500, 501, 502, 503, 504, + 505, 506, 507, 508, 509, 510, 511, 512, + 513, 514, 515, 516, 517, 518, 519, 520, + 521, 522, 523, 525, 526, 527, 528, 529, + 530, 531, 532, 533, 534, 535, 536, 537, + 538, 539, 540, 541, 542, 544, 545, 546, + 547, 548, 549, 550, 551, 552, 553, 554, + 555, 556, 557, 558, 560, 561, 562, 563, + 564, 565, 566, 567, 568, 569, 570, 571, + 572, 574, 575, 576, 577, 578, 579, 580, + 581, 582, 583, 584, 585, 587, 588, 589, + 590, 591, 592, 593, 594, 595, 596, 597, + 599, 600, 601, 602, 603, 604, 605, 606, + 607, 608, 610, 611, 612, 613, 614, 615, + 616, 617, 618, 620, 621, 622, 623, 624, + 625, 626, 627, 628, 630, 631, 632, 633, + 634, 635, 636, 637, 639, 640, 641, 642, + 643, 644, 645, 646, 648, 649, 650, 651, + 652, 653, 654, 656, 657, 658, 659, 660, + 661, 662, 664, 665, 666, 667, 668, 669, + 670, 672, 673, 674, 675, 676, 677, 678, + 680, 681, 682, 683, 684, 685, 687, 688, + 689, 690, 691, 692, 693, 695, 696, 697, + 698, 699, 700, 702, 703, 704, 705, 706, + 708, 709, 710, 711, 712, 713, 715, 716, + 717, 718, 719, 720, 722, 723, 724, 725, + 726, 728, 729, 730, 731, 732, 733, 735, + 736, 737, 738, 739, 741, 742, 743, 744, + 745, 747, 748, 749, 750, 751, 753, 754, + 755, 756, 757, 759, 760, 761, 762, 763, + 765, 766, 767, 768, 770, 771, 772, 773, + 774, 776, 777, 778, 779, 780, 782, 783, + 784, 785, 787, 788, 789, 790, 792, 793, + 794, 795, 796, 798, 799, 800, 801, 803, + 804, 805, 806, 808, 809, 810, 811, 813, + 814, 815, 816, 818, 819, 820, 821, 823, + 824, 825, 826, 828, 829, 830, 831, 833, + 834, 835, 836, 838, 839, 840, 841, 843, + 844, 845, 846, 848, 849, 850, 851, 853, + 854, 855, 857, 858, 859, 860, 862, 863, + 864, 866, 867, 868, 869, 871, 872, 873, + 874, 876, 877, 878, 880, 881, 882, 883, + 885, 886, 887, 889, 890, 891, 893, 894, + 895, 896, 898, 899, 900, 902, 903, 904, + 906, 907, 908, 909, 911, 912, 913, 915, + 916, 917, 919, 920, 921, 923, 924, 925, + 927, 928, 929, 931, 932, 933, 935, 936, + 937, 938, 940, 941, 942, 944, 945, 946, + 948, 949, 950, 952, 953, 955, 956, 957, + 959, 960, 961, 963, 964, 965, 967, 968, + 969, 971, 972, 973, 975, 976, 977, 979, + 980, 981, 983, 984, 986, 987, 988, 990, + 991, 992, 994, 995, 996, 998, 999, 1001, + 1002, 1003, 1005, 1006, 1007, 1009, 1010, 1012, + 1013, 1014, 1016, 1017, 1018, 1020, 1021, 1023 +}; + +static const WebRtc_Word16 kIndicatorTable[17] = {0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, + 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187}; + +// hybrib Hanning & flat window +static const WebRtc_Word16 kBlocks80w128x[128] = { + 0, 536, 1072, 1606, 2139, 2669, 3196, 3720, 4240, 4756, 5266, + 5771, 6270, 6762, 7246, 7723, 8192, 8652, 9102, 9543, 9974, 10394, + 10803, 11200, 11585, 11958, 12318, 12665, 12998, 13318, 13623, 13913, 14189, + 14449, 14694, 14924, 15137, 15334, 15515, 15679, 15826, 15956, 16069, 16165, + 16244, 16305, 16349, 16375, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, + 16384, 16384, 16384, 16384, 16375, 16349, 16305, 16244, 16165, 16069, 15956, + 15826, 15679, 15515, 15334, 15137, 14924, 14694, 14449, 14189, 13913, 13623, + 13318, 12998, 12665, 12318, 11958, 11585, 11200, 10803, 10394, 9974, 9543, + 9102, 8652, 8192, 7723, 7246, 6762, 6270, 5771, 5266, 4756, 4240, + 3720, 3196, 2669, 2139, 1606, 1072, 536 +}; + +// hybrib Hanning & flat window +static const WebRtc_Word16 kBlocks160w256x[256] = { + 0, 268, 536, 804, 1072, 1339, 1606, 1872, + 2139, 2404, 2669, 2933, 3196, 3459, 3720, 3981, + 4240, 4499, 4756, 5012, 5266, 5520, 5771, 6021, + 6270, 6517, 6762, 7005, 7246, 7486, 7723, 7959, + 8192, 8423, 8652, 8878, 9102, 9324, 9543, 9760, + 9974, 10185, 10394, 10600, 10803, 11003, 11200, 11394, +11585, 11773, 11958, 12140, 12318, 12493, 12665, 12833, +12998, 13160, 13318, 13472, 13623, 13770, 13913, 14053, +14189, 14321, 14449, 14574, 14694, 14811, 14924, 15032, +15137, 15237, 15334, 15426, 15515, 15599, 15679, 15754, +15826, 15893, 15956, 16015, 16069, 16119, 16165, 16207, +16244, 16277, 16305, 16329, 16349, 16364, 16375, 16382, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16384, 16384, 16384, 16384, 16384, 16384, 16384, +16384, 16382, 16375, 16364, 16349, 16329, 16305, 16277, +16244, 16207, 16165, 16119, 16069, 16015, 15956, 15893, +15826, 15754, 15679, 15599, 15515, 15426, 15334, 15237, +15137, 15032, 14924, 14811, 14694, 14574, 14449, 14321, +14189, 14053, 13913, 13770, 13623, 13472, 13318, 13160, +12998, 12833, 12665, 12493, 12318, 12140, 11958, 11773, +11585, 11394, 11200, 11003, 10803, 10600, 10394, 10185, + 9974, 9760, 9543, 9324, 9102, 8878, 8652, 8423, + 8192, 7959, 7723, 7486, 7246, 7005, 6762, 6517, + 6270, 6021, 5771, 5520, 5266, 5012, 4756, 4499, + 4240, 3981, 3720, 3459, 3196, 2933, 2669, 2404, + 2139, 1872, 1606, 1339, 1072, 804, 536, 268 +}; + +// Gain factor table: Input value in Q8 and output value in Q13 +static const WebRtc_Word16 kFactor1Table[257] = { + 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8233, 8274, 8315, 8355, 8396, 8436, 8475, 8515, 8554, 8592, 8631, 8669, + 8707, 8745, 8783, 8820, 8857, 8894, 8931, 8967, 9003, 9039, 9075, 9111, 9146, 9181, + 9216, 9251, 9286, 9320, 9354, 9388, 9422, 9456, 9489, 9523, 9556, 9589, 9622, 9655, + 9687, 9719, 9752, 9784, 9816, 9848, 9879, 9911, 9942, 9973, 10004, 10035, 10066, + 10097, 10128, 10158, 10188, 10218, 10249, 10279, 10308, 10338, 10368, 10397, 10426, + 10456, 10485, 10514, 10543, 10572, 10600, 10629, 10657, 10686, 10714, 10742, 10770, + 10798, 10826, 10854, 10882, 10847, 10810, 10774, 10737, 10701, 10666, 10631, 10596, + 10562, 10527, 10494, 10460, 10427, 10394, 10362, 10329, 10297, 10266, 10235, 10203, + 10173, 10142, 10112, 10082, 10052, 10023, 9994, 9965, 9936, 9908, 9879, 9851, 9824, + 9796, 9769, 9742, 9715, 9689, 9662, 9636, 9610, 9584, 9559, 9534, 9508, 9484, 9459, + 9434, 9410, 9386, 9362, 9338, 9314, 9291, 9268, 9245, 9222, 9199, 9176, 9154, 9132, + 9110, 9088, 9066, 9044, 9023, 9002, 8980, 8959, 8939, 8918, 8897, 8877, 8857, 8836, + 8816, 8796, 8777, 8757, 8738, 8718, 8699, 8680, 8661, 8642, 8623, 8605, 8586, 8568, + 8550, 8532, 8514, 8496, 8478, 8460, 8443, 8425, 8408, 8391, 8373, 8356, 8339, 8323, + 8306, 8289, 8273, 8256, 8240, 8224, 8208, 8192 +}; + +// Gain factor table: Input value in Q8 and output value in Q13 +static const WebRtc_Word16 kFactor2Aggressiveness1[257] = { + 7577, 7577, 7577, 7577, 7577, 7577, + 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7596, 7614, 7632, + 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, + 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016, + 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162, + 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192 +}; + +// Gain factor table: Input value in Q8 and output value in Q13 +static const WebRtc_Word16 kFactor2Aggressiveness2[257] = { + 7270, 7270, 7270, 7270, 7270, 7306, + 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632, + 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, + 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016, + 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162, + 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192 +}; + +// Gain factor table: Input value in Q8 and output value in Q13 +static const WebRtc_Word16 kFactor2Aggressiveness3[257] = { + 7184, 7184, 7184, 7229, 7270, 7306, + 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632, + 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, + 7858, 7871, 7884, 7897, 7910, 7922, 7934, 7946, 7958, 7970, 7982, 7993, 8004, 8016, + 8027, 8038, 8049, 8060, 8070, 8081, 8091, 8102, 8112, 8122, 8132, 8143, 8152, 8162, + 8172, 8182, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192 +}; + +// sum of log2(i) from table index to inst->anaLen2 in Q5 +// Note that the first table value is invalid, since log2(0) = -infinity +static const WebRtc_Word16 kSumLogIndex[66] = { + 0, 22917, 22917, 22885, 22834, 22770, 22696, 22613, + 22524, 22428, 22326, 22220, 22109, 21994, 21876, 21754, + 21629, 21501, 21370, 21237, 21101, 20963, 20822, 20679, + 20535, 20388, 20239, 20089, 19937, 19783, 19628, 19470, + 19312, 19152, 18991, 18828, 18664, 18498, 18331, 18164, + 17994, 17824, 17653, 17480, 17306, 17132, 16956, 16779, + 16602, 16423, 16243, 16063, 15881, 15699, 15515, 15331, + 15146, 14960, 14774, 14586, 14398, 14209, 14019, 13829, + 13637, 13445 +}; + +// sum of log2(i)^2 from table index to inst->anaLen2 in Q2 +// Note that the first table value is invalid, since log2(0) = -infinity +static const WebRtc_Word16 kSumSquareLogIndex[66] = { + 0, 16959, 16959, 16955, 16945, 16929, 16908, 16881, + 16850, 16814, 16773, 16729, 16681, 16630, 16575, 16517, + 16456, 16392, 16325, 16256, 16184, 16109, 16032, 15952, + 15870, 15786, 15700, 15612, 15521, 15429, 15334, 15238, + 15140, 15040, 14938, 14834, 14729, 14622, 14514, 14404, + 14292, 14179, 14064, 13947, 13830, 13710, 13590, 13468, + 13344, 13220, 13094, 12966, 12837, 12707, 12576, 12444, + 12310, 12175, 12039, 11902, 11763, 11624, 11483, 11341, + 11198, 11054 +}; + +// log2(table index) in Q12 +// Note that the first table value is invalid, since log2(0) = -infinity +static const WebRtc_Word16 kLogIndex[129] = { + 0, 0, 4096, 6492, 8192, 9511, 10588, 11499, + 12288, 12984, 13607, 14170, 14684, 15157, 15595, 16003, + 16384, 16742, 17080, 17400, 17703, 17991, 18266, 18529, + 18780, 19021, 19253, 19476, 19691, 19898, 20099, 20292, + 20480, 20662, 20838, 21010, 21176, 21338, 21496, 21649, + 21799, 21945, 22087, 22226, 22362, 22495, 22625, 22752, + 22876, 22998, 23117, 23234, 23349, 23462, 23572, 23680, + 23787, 23892, 23994, 24095, 24195, 24292, 24388, 24483, + 24576, 24668, 24758, 24847, 24934, 25021, 25106, 25189, + 25272, 25354, 25434, 25513, 25592, 25669, 25745, 25820, + 25895, 25968, 26041, 26112, 26183, 26253, 26322, 26390, + 26458, 26525, 26591, 26656, 26721, 26784, 26848, 26910, + 26972, 27033, 27094, 27154, 27213, 27272, 27330, 27388, + 27445, 27502, 27558, 27613, 27668, 27722, 27776, 27830, + 27883, 27935, 27988, 28039, 28090, 28141, 28191, 28241, + 28291, 28340, 28388, 28437, 28484, 28532, 28579, 28626, + 28672 +}; + +// determinant of estimation matrix in Q0 corresponding to the log2 tables above +// Note that the first table value is invalid, since log2(0) = -infinity +static const WebRtc_Word16 kDeterminantEstMatrix[66] = { + 0, 29814, 25574, 22640, 20351, 18469, 16873, 15491, + 14277, 13199, 12233, 11362, 10571, 9851, 9192, 8587, + 8030, 7515, 7038, 6596, 6186, 5804, 5448, 5115, + 4805, 4514, 4242, 3988, 3749, 3524, 3314, 3116, + 2930, 2755, 2590, 2435, 2289, 2152, 2022, 1900, + 1785, 1677, 1575, 1478, 1388, 1302, 1221, 1145, + 1073, 1005, 942, 881, 825, 771, 721, 674, + 629, 587, 547, 510, 475, 442, 411, 382, + 355, 330 +}; + +void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t *inst, int offset) +{ + WebRtc_Word32 tmp32no1 = 0; + WebRtc_Word32 tmp32no2 = 0; + + WebRtc_Word16 tmp16no1 = 0; + WebRtc_Word16 tmp16no2 = 0; + WebRtc_Word16 exp2Const = 11819; // Q13 + + int i = 0; + + tmp16no2 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, inst->magnLen); + inst->qNoise = 14 + - (int)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(exp2Const, tmp16no2, 21); + for (i = 0; i < inst->magnLen; i++) + { + // inst->quantile[i]=exp(inst->lquantile[offset+i]); + // in Q21 + tmp32no2 = WEBRTC_SPL_MUL_16_16(exp2Const, inst->noiseEstLogQuantile[offset + i]); + tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); + tmp16no1 = -(WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); + tmp16no1 += 21;// shift 21 to get result in Q0 + tmp16no1 -= (WebRtc_Word16)inst->qNoise; //shift to get result in Q(qNoise) + if (tmp16no1 > 0) + { + inst->noiseEstQuantile[i] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32no1 + + kRoundTable[tmp16no1], tmp16no1); + } + else + { + inst->noiseEstQuantile[i] = (WebRtc_Word16)WEBRTC_SPL_LSHIFT_W32(tmp32no1, + -tmp16no1); + } + } +} + +void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t *inst, + WebRtc_Word16 pink_noise_exp_avg, + WebRtc_Word32 pink_noise_num_avg, + int freq_index, + WebRtc_UWord32 *noise_estimate, + WebRtc_UWord32 *noise_estimate_avg) +{ + WebRtc_Word32 tmp32no1 = 0; + WebRtc_Word32 tmp32no2 = 0; + + WebRtc_Word16 int_part = 0; + WebRtc_Word16 frac_part = 0; + + // Use pink noise estimate + // noise_estimate = 2^(pinkNoiseNumerator + pinkNoiseExp * log2(j)) + assert(freq_index > 0); + tmp32no2 = WEBRTC_SPL_MUL_16_16(pink_noise_exp_avg, kLogIndex[freq_index]); // Q26 + tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 15); // Q11 + tmp32no1 = pink_noise_num_avg - tmp32no2; // Q11 + + // Calculate output: 2^tmp32no1 + // Output in Q(minNorm-stages) + tmp32no1 += WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)(inst->minNorm - inst->stages), 11); + if (tmp32no1 > 0) + { + int_part = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 11); + frac_part = (WebRtc_Word16)(tmp32no1 & 0x000007ff); // Q11 + // Piecewise linear approximation of 'b' in + // 2^(int_part+frac_part) = 2^int_part * (1 + b) + // 'b' is given in Q11 and below stored in frac_part. + if (WEBRTC_SPL_RSHIFT_W32(frac_part, 10)) + { + // Upper fractional part + tmp32no2 = WEBRTC_SPL_MUL_32_16(2048 - frac_part, 1244); // Q21 + tmp32no2 = 2048 - WEBRTC_SPL_RSHIFT_W32(tmp32no2, 10); + } + else + { + // Lower fractional part + tmp32no2 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(frac_part, 804), 10); + } + // Shift fractional part to Q(minNorm-stages) + tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, int_part - 11); + *noise_estimate_avg = WEBRTC_SPL_LSHIFT_U32(1, int_part) + (WebRtc_UWord32)tmp32no2; + // Scale up to initMagnEst, which is not block averaged + *noise_estimate = (*noise_estimate_avg) * (WebRtc_UWord32)(inst->blockIndex + 1); + } +} + +// Initialize state +WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t *inst, WebRtc_UWord32 fs) +{ + int i; + + //check for valid pointer + if (inst == NULL) + { + return -1; + } + // + + // Initialization of struct + if (fs == 8000 || fs == 16000 || fs == 32000) + { + inst->fs = fs; + } else + { + return -1; + } + + if (fs == 8000) + { + inst->blockLen10ms = 80; + inst->anaLen = 128; + inst->stages = 7; + inst->window = kBlocks80w128x; + inst->thresholdLogLrt = 131072; //default threshold for LRT feature + inst->maxLrt = 0x0040000; + inst->minLrt = 52429; + } else if (fs == 16000) + { + inst->blockLen10ms = 160; + inst->anaLen = 256; + inst->stages = 8; + inst->window = kBlocks160w256x; + inst->thresholdLogLrt = 212644; //default threshold for LRT feature + inst->maxLrt = 0x0080000; + inst->minLrt = 104858; + } else if (fs == 32000) + { + inst->blockLen10ms = 160; + inst->anaLen = 256; + inst->stages = 8; + inst->window = kBlocks160w256x; + inst->thresholdLogLrt = 212644; //default threshold for LRT feature + inst->maxLrt = 0x0080000; + inst->minLrt = 104858; + } + inst->anaLen2 = WEBRTC_SPL_RSHIFT_W16(inst->anaLen, 1); + inst->magnLen = inst->anaLen2 + 1; + + WebRtcSpl_ZerosArrayW16(inst->analysisBuffer, ANAL_BLOCKL_MAX); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX); + + // for HB processing + WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX, ANAL_BLOCKL_MAX); + // for quantile noise estimation + WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL); + for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) + { + inst->noiseEstLogQuantile[i] = 2048; // Q8 + inst->noiseEstDensity[i] = 153; // Q9 + } + for (i = 0; i < SIMULT; i++) + { + inst->noiseEstCounter[i] = (WebRtc_Word16)(END_STARTUP_LONG * (i + 1)) / SIMULT; + } + + // Initialize suppression filter with ones + WebRtcSpl_MemSetW16((WebRtc_Word16*)inst->noiseSupFilter, 16384, HALF_ANAL_BLOCKL); + + // Set the aggressiveness: default + inst->aggrMode = 0; + + //initialize variables for new method + inst->priorNonSpeechProb = 8192; // Q14(0.5) prior probability for speech/noise + for (i = 0; i < HALF_ANAL_BLOCKL; i++) + { + inst->prevMagnU16[i] = 0; + inst->prevNoiseU32[i] = 0; //previous noise-spectrum + inst->logLrtTimeAvgW32[i] = 0; //smooth LR ratio + inst->avgMagnPause[i] = 0; //conservative noise spectrum estimate + inst->initMagnEst[i] = 0; //initial average magnitude spectrum + } + + //feature quantities + inst->thresholdSpecDiff = 50; //threshold for difference feature: determined on-line + inst->thresholdSpecFlat = 20480; //threshold for flatness: determined on-line + inst->featureLogLrt = inst->thresholdLogLrt; //average LRT factor (= threshold) + inst->featureSpecFlat = inst->thresholdSpecFlat; //spectral flatness (= threshold) + inst->featureSpecDiff = inst->thresholdSpecDiff; //spectral difference (= threshold) + inst->weightLogLrt = 6; //default weighting par for LRT feature + inst->weightSpecFlat = 0; //default weighting par for spectral flatness feature + inst->weightSpecDiff = 0; //default weighting par for spectral difference feature + + inst->curAvgMagnEnergy = 0; //window time-average of input magnitude spectrum + inst->timeAvgMagnEnergy = 0; //normalization for spectral difference + inst->timeAvgMagnEnergyTmp = 0; //normalization for spectral difference + + //histogram quantities: used to estimate/update thresholds for features + WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST); + + inst->blockIndex = -1; //frame counter + + //inst->modelUpdate = 500; //window for update + inst->modelUpdate = (1 << STAT_UPDATES); //window for update + inst->cntThresUpdate = 0; //counter feature thresholds updates + + inst->sumMagn = 0; + inst->magnEnergy = 0; + inst->prevQMagn = 0; + inst->qNoise = 0; + inst->prevQNoise = 0; + + inst->energyIn = 0; + inst->scaleEnergyIn = 0; + + inst->whiteNoiseLevel = 0; + inst->pinkNoiseNumerator = 0; + inst->pinkNoiseExp = 0; + inst->minNorm = 15; // Start with full scale + inst->zeroInputSignal = 0; + + //default mode + WebRtcNsx_set_policy_core(inst, 0); + +#ifdef NS_FILEDEBUG + inst->infile=fopen("indebug.pcm","wb"); + inst->outfile=fopen("outdebug.pcm","wb"); + inst->file1=fopen("file1.pcm","wb"); + inst->file2=fopen("file2.pcm","wb"); + inst->file3=fopen("file3.pcm","wb"); + inst->file4=fopen("file4.pcm","wb"); + inst->file5=fopen("file5.pcm","wb"); +#endif + + inst->initFlag = 1; + + return 0; +} + +int WebRtcNsx_set_policy_core(NsxInst_t *inst, int mode) +{ + // allow for modes:0,1,2,3 + if (mode < 0 || mode > 3) + { + return -1; + } + + inst->aggrMode = mode; + if (mode == 0) + { + inst->overdrive = 256; // Q8(1.0) + inst->denoiseBound = 8192; // Q14(0.5) + inst->gainMap = 0; // No gain compensation + } else if (mode == 1) + { + inst->overdrive = 256; // Q8(1.0) + inst->denoiseBound = 4096; // Q14(0.25) + inst->factor2Table = kFactor2Aggressiveness1; + inst->gainMap = 1; + } else if (mode == 2) + { + inst->overdrive = 282; // ~= Q8(1.1) + inst->denoiseBound = 2048; // Q14(0.125) + inst->factor2Table = kFactor2Aggressiveness2; + inst->gainMap = 1; + } else if (mode == 3) + { + inst->overdrive = 320; // Q8(1.25) + inst->denoiseBound = 1475; // ~= Q14(0.09) + inst->factor2Table = kFactor2Aggressiveness3; + inst->gainMap = 1; + } + return 0; +} + +void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWord32 *noise, + WebRtc_Word16 *qNoise) +{ + WebRtc_Word32 numerator; + + WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac; + WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2; + WebRtc_Word16 log2Const = 22713; // Q15 + WebRtc_Word16 widthFactor = 21845; + + int i, s, offset; + + numerator = FACTOR_Q16; + + tabind = inst->stages - inst->normData; + if (tabind < 0) + { + logval = -kLogTable[-tabind]; + } else + { + logval = kLogTable[tabind]; + } + + // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) + // magn is in Q(-stages), and the real lmagn values are: + // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) + // lmagn in Q8 + for (i = 0; i < inst->magnLen; i++) + { + if (magn[i]) + { + zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); + frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23); + // log2(magn(i)) + log2 = (WebRtc_Word16)(((31 - zeros) << 8) + kLogTableFrac[frac]); + // log2(magn(i))*log(2) + lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15); + // + log(2^stages) + lmagn[i] += logval; + } else + { + lmagn[i] = logval;//0; + } + } + + // loop over simultaneous estimates + for (s = 0; s < SIMULT; s++) + { + offset = s * inst->magnLen; + + // Get counter values from state + counter = inst->noiseEstCounter[s]; + countDiv = kCounterDiv[counter]; + countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv); + + // quant_est(...) + for (i = 0; i < inst->magnLen; i++) + { + // compute delta + if (inst->noiseEstDensity[offset + i] > 512) + { + delta = WebRtcSpl_DivW32W16ResW16(numerator, + inst->noiseEstDensity[offset + i]); + } else + { + delta = FACTOR_Q7; + } + + // update log quantile estimate + tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); + if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) + { + // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 + // CounterDiv=1/inst->counter[s] in Q15 + tmp16 += 2; + tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2); + inst->noiseEstLogQuantile[offset + i] += tmp16no1; + } else + { + tmp16 += 1; + tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); + // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); + inst->noiseEstLogQuantile[offset + i] -= tmp16no2; + } + + // update density estimate + if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) + < WIDTH_Q8) + { + tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->noiseEstDensity[offset + i], countProd, 15); + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(widthFactor, + countDiv, 15); + inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; + } + } // end loop over magnitude spectrum + + if (counter >= END_STARTUP_LONG) + { + inst->noiseEstCounter[s] = 0; + if (inst->blockIndex >= END_STARTUP_LONG) + { + WebRtcNsx_UpdateNoiseEstimate(inst, offset); + } + } + inst->noiseEstCounter[s]++; + + } // end loop over simultaneous estimates + + // Sequentially update the noise during startup + if (inst->blockIndex < END_STARTUP_LONG) + { + WebRtcNsx_UpdateNoiseEstimate(inst, offset); + } + + for (i = 0; i < inst->magnLen; i++) + { + noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise) + } + (*qNoise) = (WebRtc_Word16)inst->qNoise; +} + +// Extract thresholds for feature parameters +// histograms are computed over some window_size (given by window_pars) +// thresholds and weights are extracted every window +// flag 0 means update histogram only, flag 1 means compute the thresholds/weights +// threshold and weights are returned in: inst->priorModelPars +void WebRtcNsx_FeatureParameterExtraction(NsxInst_t *inst, int flag) +{ + WebRtc_UWord32 tmpU32; + WebRtc_UWord32 histIndex; + WebRtc_UWord32 posPeak1SpecFlatFX, posPeak2SpecFlatFX; + WebRtc_UWord32 posPeak1SpecDiffFX, posPeak2SpecDiffFX; + + WebRtc_Word32 tmp32; + WebRtc_Word32 fluctLrtFX, thresFluctLrtFX; + WebRtc_Word32 avgHistLrtFX, avgSquareHistLrtFX, avgHistLrtComplFX; + + WebRtc_Word16 j; + WebRtc_Word16 numHistLrt; + + int i; + int useFeatureSpecFlat, useFeatureSpecDiff, featureSum; + int maxPeak1, maxPeak2; + int weightPeak1SpecFlat, weightPeak2SpecFlat; + int weightPeak1SpecDiff, weightPeak2SpecDiff; + + //update histograms + if (!flag) + { + // LRT + // Type casting to UWord32 is safe since negative values will not be wrapped to larger + // values than HIST_PAR_EST + histIndex = (WebRtc_UWord32)(inst->featureLogLrt); + if (histIndex < HIST_PAR_EST) + { + inst->histLrt[histIndex]++; + } + // Spectral flatness + // (inst->featureSpecFlat*20)>>10 = (inst->featureSpecFlat*5)>>8 + histIndex = WEBRTC_SPL_RSHIFT_U32(inst->featureSpecFlat * 5, 8); + if (histIndex < HIST_PAR_EST) + { + inst->histSpecFlat[histIndex]++; + } + // Spectral difference + histIndex = HIST_PAR_EST; + if (inst->timeAvgMagnEnergy) + { + // Guard against division by zero + // If timeAvgMagnEnergy == 0 we have no normalizing statistics and therefore can't + // update the histogram + histIndex = WEBRTC_SPL_UDIV((inst->featureSpecDiff * 5) >> inst->stages, + inst->timeAvgMagnEnergy); + } + if (histIndex < HIST_PAR_EST) + { + inst->histSpecDiff[histIndex]++; + } + } + + // extract parameters for speech/noise probability + if (flag) + { + useFeatureSpecDiff = 1; + //for LRT feature: + // compute the average over inst->featureExtractionParams.rangeAvgHistLrt + avgHistLrtFX = 0; + avgSquareHistLrtFX = 0; + numHistLrt = 0; + for (i = 0; i < BIN_SIZE_LRT; i++) + { + j = (2 * i + 1); + tmp32 = WEBRTC_SPL_MUL_16_16(inst->histLrt[i], j); + avgHistLrtFX += tmp32; + numHistLrt += inst->histLrt[i]; + avgSquareHistLrtFX += WEBRTC_SPL_MUL_32_16(tmp32, j); + } + avgHistLrtComplFX = avgHistLrtFX; + for (; i < HIST_PAR_EST; i++) + { + j = (2 * i + 1); + tmp32 = WEBRTC_SPL_MUL_16_16(inst->histLrt[i], j); + avgHistLrtComplFX += tmp32; + avgSquareHistLrtFX += WEBRTC_SPL_MUL_32_16(tmp32, j); + } + fluctLrtFX = WEBRTC_SPL_MUL(avgSquareHistLrtFX, numHistLrt); + fluctLrtFX -= WEBRTC_SPL_MUL(avgHistLrtFX, avgHistLrtComplFX); + thresFluctLrtFX = THRES_FLUCT_LRT * numHistLrt; + // get threshold for LRT feature: + tmpU32 = (FACTOR_1_LRT_DIFF * (WebRtc_UWord32)avgHistLrtFX); + if ((fluctLrtFX < thresFluctLrtFX) || (numHistLrt == 0) || (tmpU32 + > (WebRtc_UWord32)(100 * numHistLrt))) + { + inst->thresholdLogLrt = inst->maxLrt; //very low fluctuation, so likely noise + } else + { + tmp32 = (WebRtc_Word32)((tmpU32 << (9 + inst->stages)) / numHistLrt / 25); + // check if value is within min/max range + inst->thresholdLogLrt = WEBRTC_SPL_SAT(inst->maxLrt, tmp32, inst->minLrt); + } + if (fluctLrtFX < thresFluctLrtFX) + { + // Do not use difference feature if fluctuation of LRT feature is very low: + // most likely just noise state + useFeatureSpecDiff = 0; + } + + // for spectral flatness and spectral difference: compute the main peaks of histogram + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecFlatFX = 0; + posPeak2SpecFlatFX = 0; + weightPeak1SpecFlat = 0; + weightPeak2SpecFlat = 0; + + // peaks for flatness + for (i = 0; i < HIST_PAR_EST; i++) + { + if (inst->histSpecFlat[i] > maxPeak1) + { + // Found new "first" peak + maxPeak2 = maxPeak1; + weightPeak2SpecFlat = weightPeak1SpecFlat; + posPeak2SpecFlatFX = posPeak1SpecFlatFX; + + maxPeak1 = inst->histSpecFlat[i]; + weightPeak1SpecFlat = inst->histSpecFlat[i]; + posPeak1SpecFlatFX = (WebRtc_UWord32)(2 * i + 1); + } else if (inst->histSpecFlat[i] > maxPeak2) + { + // Found new "second" peak + maxPeak2 = inst->histSpecFlat[i]; + weightPeak2SpecFlat = inst->histSpecFlat[i]; + posPeak2SpecFlatFX = (WebRtc_UWord32)(2 * i + 1); + } + } + + // for spectral flatness feature + useFeatureSpecFlat = 1; + // merge the two peaks if they are close + if ((posPeak1SpecFlatFX - posPeak2SpecFlatFX < LIM_PEAK_SPACE_FLAT_DIFF) + && (weightPeak2SpecFlat * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecFlat)) + { + weightPeak1SpecFlat += weightPeak2SpecFlat; + posPeak1SpecFlatFX = (posPeak1SpecFlatFX + posPeak2SpecFlatFX) >> 1; + } + //reject if weight of peaks is not large enough, or peak value too small + if (weightPeak1SpecFlat < THRES_WEIGHT_FLAT_DIFF || posPeak1SpecFlatFX + < THRES_PEAK_FLAT) + { + useFeatureSpecFlat = 0; + } else // if selected, get the threshold + { + // compute the threshold and check if value is within min/max range + inst->thresholdSpecFlat = WEBRTC_SPL_SAT(MAX_FLAT_Q10, FACTOR_2_FLAT_Q10 + * posPeak1SpecFlatFX, MIN_FLAT_Q10); //Q10 + } + // done with flatness feature + + if (useFeatureSpecDiff) + { + //compute two peaks for spectral difference + maxPeak1 = 0; + maxPeak2 = 0; + posPeak1SpecDiffFX = 0; + posPeak2SpecDiffFX = 0; + weightPeak1SpecDiff = 0; + weightPeak2SpecDiff = 0; + // peaks for spectral difference + for (i = 0; i < HIST_PAR_EST; i++) + { + if (inst->histSpecDiff[i] > maxPeak1) + { + // Found new "first" peak + maxPeak2 = maxPeak1; + weightPeak2SpecDiff = weightPeak1SpecDiff; + posPeak2SpecDiffFX = posPeak1SpecDiffFX; + + maxPeak1 = inst->histSpecDiff[i]; + weightPeak1SpecDiff = inst->histSpecDiff[i]; + posPeak1SpecDiffFX = (WebRtc_UWord32)(2 * i + 1); + } else if (inst->histSpecDiff[i] > maxPeak2) + { + // Found new "second" peak + maxPeak2 = inst->histSpecDiff[i]; + weightPeak2SpecDiff = inst->histSpecDiff[i]; + posPeak2SpecDiffFX = (WebRtc_UWord32)(2 * i + 1); + } + } + + // merge the two peaks if they are close + if ((posPeak1SpecDiffFX - posPeak2SpecDiffFX < LIM_PEAK_SPACE_FLAT_DIFF) + && (weightPeak2SpecDiff * LIM_PEAK_WEIGHT_FLAT_DIFF > weightPeak1SpecDiff)) + { + weightPeak1SpecDiff += weightPeak2SpecDiff; + posPeak1SpecDiffFX = (posPeak1SpecDiffFX + posPeak2SpecDiffFX) >> 1; + } + // get the threshold value and check if value is within min/max range + inst->thresholdSpecDiff = WEBRTC_SPL_SAT(MAX_DIFF, FACTOR_1_LRT_DIFF + * posPeak1SpecDiffFX, MIN_DIFF); //5x bigger + //reject if weight of peaks is not large enough + if (weightPeak1SpecDiff < THRES_WEIGHT_FLAT_DIFF) + { + useFeatureSpecDiff = 0; + } + // done with spectral difference feature + } + + // select the weights between the features + // inst->priorModelPars[4] is weight for LRT: always selected + featureSum = 6 / (1 + useFeatureSpecFlat + useFeatureSpecDiff); + inst->weightLogLrt = featureSum; + inst->weightSpecFlat = useFeatureSpecFlat * featureSum; + inst->weightSpecDiff = useFeatureSpecDiff * featureSum; + + // set histograms to zero for next update + WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST); + WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST); + } // end of flag == 1 +} + + +// Compute spectral flatness on input spectrum +// magn is the magnitude spectrum +// spectral flatness is returned in inst->featureSpecFlat +void WebRtcNsx_ComputeSpectralFlatness(NsxInst_t *inst, WebRtc_UWord16 *magn) +{ + WebRtc_UWord32 tmpU32; + WebRtc_UWord32 avgSpectralFlatnessNum, avgSpectralFlatnessDen; + + WebRtc_Word32 tmp32; + WebRtc_Word32 currentSpectralFlatness, logCurSpectralFlatness; + + WebRtc_Word16 zeros, frac, intPart; + + int i; + + // for flatness + avgSpectralFlatnessNum = 0; + avgSpectralFlatnessDen = inst->sumMagn - (WebRtc_UWord32)magn[0]; // Q(normData-stages) + + // compute log of ratio of the geometric to arithmetic mean: check for log(0) case + // flatness = exp( sum(log(magn[i]))/N - log(sum(magn[i])/N) ) + // = exp( sum(log(magn[i]))/N ) * N / sum(magn[i]) + // = 2^( sum(log2(magn[i]))/N - (log2(sum(magn[i])) - log2(N)) ) [This is used] + for (i = 1; i < inst->magnLen; i++) + { + // First bin is excluded from spectrum measures. Number of bins is now a power of 2 + if (magn[i]) + { + zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); + frac = (WebRtc_Word16)(((WebRtc_UWord32)((WebRtc_UWord32)(magn[i]) << zeros) + & 0x7FFFFFFF) >> 23); + // log2(magn(i)) + tmpU32 = (WebRtc_UWord32)(((31 - zeros) << 8) + kLogTableFrac[frac]); // Q8 + avgSpectralFlatnessNum += tmpU32; // Q8 + } else + { + //if at least one frequency component is zero, treat separately + tmpU32 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecFlat, SPECT_FLAT_TAVG_Q14); // Q24 + inst->featureSpecFlat -= WEBRTC_SPL_RSHIFT_U32(tmpU32, 14); // Q10 + return; + } + } + //ratio and inverse log: check for case of log(0) + zeros = WebRtcSpl_NormU32(avgSpectralFlatnessDen); + frac = (WebRtc_Word16)(((avgSpectralFlatnessDen << zeros) & 0x7FFFFFFF) >> 23); + // log2(avgSpectralFlatnessDen) + tmp32 = (WebRtc_Word32)(((31 - zeros) << 8) + kLogTableFrac[frac]); // Q8 + logCurSpectralFlatness = (WebRtc_Word32)avgSpectralFlatnessNum; + logCurSpectralFlatness += ((WebRtc_Word32)(inst->stages - 1) << (inst->stages + 7)); // Q(8+stages-1) + logCurSpectralFlatness -= (tmp32 << (inst->stages - 1)); + logCurSpectralFlatness = WEBRTC_SPL_LSHIFT_W32(logCurSpectralFlatness, 10 - inst->stages); // Q17 + tmp32 = (WebRtc_Word32)(0x00020000 | (WEBRTC_SPL_ABS_W32(logCurSpectralFlatness) + & 0x0001FFFF)); //Q17 + intPart = -(WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(logCurSpectralFlatness, 17); + intPart += 7; // Shift 7 to get the output in Q10 (from Q17 = -17+10) + if (intPart > 0) + { + currentSpectralFlatness = WEBRTC_SPL_RSHIFT_W32(tmp32, intPart); + } else + { + currentSpectralFlatness = WEBRTC_SPL_LSHIFT_W32(tmp32, -intPart); + } + + //time average update of spectral flatness feature + tmp32 = currentSpectralFlatness - (WebRtc_Word32)inst->featureSpecFlat; // Q10 + tmp32 = WEBRTC_SPL_MUL_32_16(SPECT_FLAT_TAVG_Q14, tmp32); // Q24 + inst->featureSpecFlat = (WebRtc_UWord32)((WebRtc_Word32)inst->featureSpecFlat + + WEBRTC_SPL_RSHIFT_W32(tmp32, 14)); // Q10 + // done with flatness feature +} + + +// Compute the difference measure between input spectrum and a template/learned noise spectrum +// magn_tmp is the input spectrum +// the reference/template spectrum is inst->magn_avg_pause[i] +// returns (normalized) spectral difference in inst->featureSpecDiff +void WebRtcNsx_ComputeSpectralDifference(NsxInst_t *inst, WebRtc_UWord16 *magnIn) +{ + // This is to be calculated: + // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) + + WebRtc_UWord32 tmpU32no1, tmpU32no2; + WebRtc_UWord32 varMagnUFX, varPauseUFX, avgDiffNormMagnUFX; + + WebRtc_Word32 tmp32no1, tmp32no2; + WebRtc_Word32 avgPauseFX, avgMagnFX, covMagnPauseFX; + WebRtc_Word32 maxPause, minPause; + + WebRtc_Word16 tmp16no1; + + int i, norm32, nShifts; + + avgPauseFX = 0; + maxPause = 0; + minPause = inst->avgMagnPause[0]; // Q(prevQMagn) + // compute average quantities + for (i = 0; i < inst->magnLen; i++) + { + // Compute mean of magn_pause + avgPauseFX += inst->avgMagnPause[i]; // in Q(prevQMagn) + maxPause = WEBRTC_SPL_MAX(maxPause, inst->avgMagnPause[i]); + minPause = WEBRTC_SPL_MIN(minPause, inst->avgMagnPause[i]); + } + // normalize by replacing div of "inst->magnLen" with "inst->stages-1" shifts + avgPauseFX = WEBRTC_SPL_RSHIFT_W32(avgPauseFX, inst->stages - 1); + avgMagnFX = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(inst->sumMagn, inst->stages - 1); + // Largest possible deviation in magnPause for (co)var calculations + tmp32no1 = WEBRTC_SPL_MAX(maxPause - avgPauseFX, avgPauseFX - minPause); + // Get number of shifts to make sure we don't get wrap around in varPause + nShifts = WEBRTC_SPL_MAX(0, 10 + inst->stages - WebRtcSpl_NormW32(tmp32no1)); + + varMagnUFX = 0; + varPauseUFX = 0; + covMagnPauseFX = 0; + for (i = 0; i < inst->magnLen; i++) + { + // Compute var and cov of magn and magn_pause + tmp16no1 = (WebRtc_Word16)((WebRtc_Word32)magnIn[i] - avgMagnFX); + tmp32no2 = inst->avgMagnPause[i] - avgPauseFX; + varMagnUFX += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); // Q(2*qMagn) + tmp32no1 = WEBRTC_SPL_MUL_32_16(tmp32no2, tmp16no1); // Q(prevQMagn+qMagn) + covMagnPauseFX += tmp32no1; // Q(prevQMagn+qMagn) + tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, nShifts); // Q(prevQMagn-minPause) + varPauseUFX += (WebRtc_UWord32)WEBRTC_SPL_MUL(tmp32no1, tmp32no1); // Q(2*(prevQMagn-minPause)) + } + //update of average magnitude spectrum: Q(-2*stages) and averaging replaced by shifts + inst->curAvgMagnEnergy += WEBRTC_SPL_RSHIFT_U32(inst->magnEnergy, 2 * inst->normData + + inst->stages - 1); + + avgDiffNormMagnUFX = varMagnUFX; // Q(2*qMagn) + if ((varPauseUFX) && (covMagnPauseFX)) + { + tmpU32no1 = (WebRtc_UWord32)WEBRTC_SPL_ABS_W32(covMagnPauseFX); // Q(prevQMagn+qMagn) + norm32 = WebRtcSpl_NormU32(tmpU32no1) - 16; + if (norm32 > 0) + { + tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(tmpU32no1, norm32); // Q(prevQMagn+qMagn+norm32) + } else + { + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, -norm32); // Q(prevQMagn+qMagn+norm32) + } + tmpU32no2 = WEBRTC_SPL_UMUL(tmpU32no1, tmpU32no1); // Q(2*(prevQMagn+qMagn-norm32)) + + nShifts += norm32; + nShifts <<= 1; + if (nShifts < 0) + { + varPauseUFX >>= (-nShifts); // Q(2*(qMagn+norm32+minPause)) + nShifts = 0; + } + tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no2, varPauseUFX); // Q(2*(qMagn+norm32-16+minPause)) + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, nShifts); + + avgDiffNormMagnUFX -= WEBRTC_SPL_MIN(avgDiffNormMagnUFX, tmpU32no1); // Q(2*qMagn) + } + //normalize and compute time average update of difference feature + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(avgDiffNormMagnUFX, 2 * inst->normData); + if (inst->featureSpecDiff > tmpU32no1) + { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecDiff - tmpU32no1, + SPECT_DIFF_TAVG_Q8); // Q(8-2*stages) + inst->featureSpecDiff -= WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 8); // Q(-2*stages) + } else + { + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no1 - inst->featureSpecDiff, + SPECT_DIFF_TAVG_Q8); // Q(8-2*stages) + inst->featureSpecDiff += WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 8); // Q(-2*stages) + } +} + +// Compute speech/noise probability +// speech/noise probability is returned in: probSpeechFinal +//snrLocPrior is the prior SNR for each frequency (in Q11) +//snrLocPost is the post SNR for each frequency (in Q11) +void WebRtcNsx_SpeechNoiseProb(NsxInst_t *inst, WebRtc_UWord16 *nonSpeechProbFinal, + WebRtc_UWord32 *priorLocSnr, WebRtc_UWord32 *postLocSnr) +{ + WebRtc_UWord32 zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3; + + WebRtc_Word32 invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32; + WebRtc_Word32 frac32, logTmp; + WebRtc_Word32 logLrtTimeAvgKsumFX; + + WebRtc_Word16 indPriorFX16; + WebRtc_Word16 tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart; + + int i, normTmp, normTmp2, nShifts; + + // compute feature based on average LR factor + // this is the average over all frequencies of the smooth log LRT + logLrtTimeAvgKsumFX = 0; + for (i = 0; i < inst->magnLen; i++) + { + besselTmpFX32 = (WebRtc_Word32)postLocSnr[i]; // Q11 + normTmp = WebRtcSpl_NormU32(postLocSnr[i]); + num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp) + if (normTmp > 10) + { + den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp) + } else + { + den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp) + } + besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11 + + // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) - inst->logLrtTimeAvg[i]); + // Here, LRT_TAVG = 0.5 + zeros = WebRtcSpl_NormU32(priorLocSnr[i]); + frac32 = (WebRtc_Word32)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19); + tmp32 = WEBRTC_SPL_MUL(frac32, frac32); + tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19); + tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16)frac32, 5412, 12); + frac32 = tmp32 + 37; + // tmp32 = log2(priorLocSnr[i]) + tmp32 = (WebRtc_Word32)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12 + logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8); // log2(priorLocSnr[i])*log(2) + tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1); // Q12 + inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12 + + logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12 + } + inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5, inst->stages + 10); // 5 = BIN_SIZE_LRT / 2 + // done with computation of LR factor + + // + //compute the indicator functions + // + + // average LRT feature + // FLOAT code + // indicator0 = 0.5 * (tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.0); + tmpIndFX = 16384; // Q14(1.0) + tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 + nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; + //use larger width in tanh map for pause regions + if (tmp32no1 < 0) + { + tmpIndFX = 0; + tmp32no1 = -tmp32no1; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 + // compute indicator function: sigmoid map + tableIndex = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14); + if ((tableIndex < 16) && (tableIndex >= 0)) + { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (WebRtc_Word16)(tmp32no1 & 0x00003fff); // Q14 + tmp16no2 += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); + if (tmpIndFX == 0) + { + tmpIndFX = 8192 - tmp16no2; // Q14 + } else + { + tmpIndFX = 8192 + tmp16no2; // Q14 + } + } + indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14 + + //spectral flatness feature + if (inst->weightSpecFlat) + { + tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 + nShifts = 4; + if (inst->thresholdSpecFlat < tmpU32no1) + { + tmpIndFX = 0; + tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = (WebRtc_Word32)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, + nShifts), 25); //Q14 + tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts), 25); //Q14 + // compute indicator function: sigmoid map + // FLOAT code + // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + 1.0); + tableIndex = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); + if (tableIndex < 16) + { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (WebRtc_Word16)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); + if (tmpIndFX) + { + tmpIndFX = 8192 + tmp16no2; // Q14 + } else + { + tmpIndFX = 8192 - tmp16no2; // Q14 + } + } + indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14 + } + + //for template spectral-difference + if (inst->weightSpecDiff) + { + tmpU32no1 = 0; + if (inst->featureSpecDiff) + { + normTmp = WEBRTC_SPL_MIN(20 - inst->stages, + WebRtcSpl_NormU32(inst->featureSpecDiff)); + tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp); // Q(normTmp-2*stages) + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy, 20 - inst->stages + - normTmp); + if (tmpU32no2) + { + tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); // Q14?? Q(20 - inst->stages) + } else + { + tmpU32no1 = (WebRtc_UWord32)(0x7fffffff); + } + } + tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff, 17), 25); + tmpU32no2 = tmpU32no1 - tmpU32no3; + nShifts = 1; + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + if (tmpU32no2 & 0x80000000) + { + tmpIndFX = 0; + tmpU32no2 = tmpU32no3 - tmpU32no1; + //widthPrior = widthPrior * 2.0; + nShifts--; + } + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts); + // compute indicator function: sigmoid map + /* FLOAT code + indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); + */ + tableIndex = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); + if (tableIndex < 16) + { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (WebRtc_Word16)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16no1, frac, + 14); + if (tmpIndFX) + { + tmpIndFX = 8192 + tmp16no2; + } else + { + tmpIndFX = 8192 - tmp16no2; + } + } + indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14 + } + + //combine the indicator function with the feature weights + // FLOAT code + // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 * indicator2); + indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 + // done with computing indicator function + + //compute the prior probability + // FLOAT code + // inst->priorNonSpeechProb += PRIOR_UPDATE * (indPriorNonSpeech - inst->priorNonSpeechProb); + tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 + inst->priorNonSpeechProb += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(PRIOR_UPDATE_Q14, + tmp16, 14); // Q14 + + //final speech probability: combine prior model with LR factor: + for (i = 0; i < inst->magnLen; i++) + { + // FLOAT code + // invLrt = exp(inst->logLrtTimeAvg[i]); + // invLrt = inst->priorSpeechProb * invLrt; + // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / (1.0 - inst->priorSpeechProb + invLrt); + // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt; + // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / (inst->priorNonSpeechProb + invLrt); + nonSpeechProbFinal[i] = 0; // Q8 + if ((inst->logLrtTimeAvgW32[i] < 65300) && (inst->priorNonSpeechProb > 0)) + { + tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(inst->logLrtTimeAvgW32[i], 23637), + 14); // Q12 + intPart = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12); + if (intPart < -8) + { + intPart = -8; + } + frac = (WebRtc_Word16)(tmp32no1 & 0x00000fff); // Q12 + // Quadratic approximation of 2^frac + tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12 + tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12 + invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart) + + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8 + + normTmp = WebRtcSpl_NormW32(invLrtFX); + normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb)); + if (normTmp + normTmp2 < 15) + { + invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp); // Q(normTmp+normTmp2-7) + tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); // Q(normTmp+normTmp2+7) + invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); // Q14 + } else + { + tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); // Q22 + invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14 + } + + tmp32no1 = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)inst->priorNonSpeechProb, 8); // Q22 + nonSpeechProbFinal[i] = (WebRtc_UWord16)WEBRTC_SPL_DIV(tmp32no1, + (WebRtc_Word32)inst->priorNonSpeechProb + + invLrtFX); // Q8 + if (7 - normTmp - normTmp2 > 0) + { + nonSpeechProbFinal[i] = 0; // Q8 + } + } + } +} + +// Transform input (speechFrame) to frequency domain magnitude (magnU16) +void WebRtcNsx_DataAnalysis(NsxInst_t *inst, short *speechFrame, WebRtc_UWord16 *magnU16) +{ + + WebRtc_UWord32 tmpU32no1, tmpU32no2; + + WebRtc_Word32 tmp_1_w32 = 0; + WebRtc_Word32 tmp_2_w32 = 0; + WebRtc_Word32 sum_log_magn = 0; + WebRtc_Word32 sum_log_i_log_magn = 0; + + WebRtc_UWord16 sum_log_magn_u16 = 0; + WebRtc_UWord16 tmp_u16 = 0; + + WebRtc_Word16 sum_log_i = 0; + WebRtc_Word16 sum_log_i_square = 0; + WebRtc_Word16 frac = 0; + WebRtc_Word16 log2 = 0; + WebRtc_Word16 matrix_determinant = 0; + WebRtc_Word16 winData[ANAL_BLOCKL_MAX], maxWinData; + WebRtc_Word16 realImag[ANAL_BLOCKL_MAX << 1]; + + int i, j; + int outCFFT; + int zeros; + int net_norm = 0; + int right_shifts_in_magnU16 = 0; + int right_shifts_in_initMagnEst = 0; + + // For lower band do all processing + // update analysis buffer for L band + WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, + inst->anaLen - inst->blockLen10ms); + WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, + speechFrame, inst->blockLen10ms); + + // Window data before FFT + for (i = 0; i < inst->anaLen; i++) + { + winData[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(inst->window[i], + inst->analysisBuffer[i], + 14); // Q0 + } + // Get input energy + inst->energyIn = WebRtcSpl_Energy(winData, (int)inst->anaLen, &(inst->scaleEnergyIn)); + + // Reset zero input flag + inst->zeroInputSignal = 0; + // Acquire norm for winData + maxWinData = WebRtcSpl_MaxAbsValueW16(winData, inst->anaLen); + inst->normData = WebRtcSpl_NormW16(maxWinData); + if (maxWinData == 0) + { + // Treat zero input separately. + inst->zeroInputSignal = 1; + return; + } + + // Determine the net normalization in the frequency domain + net_norm = inst->stages - inst->normData; + // Track lowest normalization factor and use it to prevent wrap around in shifting + right_shifts_in_magnU16 = inst->normData - inst->minNorm; + right_shifts_in_initMagnEst = WEBRTC_SPL_MAX(-right_shifts_in_magnU16, 0); + inst->minNorm -= right_shifts_in_initMagnEst; + right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0); + + // create realImag as winData interleaved with zeros (= imag. part), normalize it + for (i = 0; i < inst->anaLen; i++) + { + j = WEBRTC_SPL_LSHIFT_W16(i, 1); + realImag[j] = WEBRTC_SPL_LSHIFT_W16(winData[i], inst->normData); // Q(normData) + realImag[j + 1] = 0; // Insert zeros in imaginary part + } + + // bit-reverse position of elements in array and FFT the array + WebRtcSpl_ComplexBitReverse(realImag, inst->stages); // Q(normData-stages) + outCFFT = WebRtcSpl_ComplexFFT(realImag, inst->stages, 1); + + inst->imag[0] = 0; // Q(normData-stages) + inst->imag[inst->anaLen2] = 0; + inst->real[0] = realImag[0]; // Q(normData-stages) + inst->real[inst->anaLen2] = realImag[inst->anaLen]; + // Q(2*(normData-stages)) + inst->magnEnergy = (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(inst->real[0], inst->real[0]); + inst->magnEnergy += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(inst->real[inst->anaLen2], + inst->real[inst->anaLen2]); + magnU16[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(inst->real[0]); // Q(normData-stages) + magnU16[inst->anaLen2] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(inst->real[inst->anaLen2]); + inst->sumMagn = (WebRtc_UWord32)magnU16[0]; // Q(normData-stages) + inst->sumMagn += (WebRtc_UWord32)magnU16[inst->anaLen2]; + + // Gather information during startup for noise parameter estimation + if (inst->blockIndex < END_STARTUP_SHORT) + { + // Switch initMagnEst to Q(minNorm-stages) + inst->initMagnEst[0] = WEBRTC_SPL_RSHIFT_U32(inst->initMagnEst[0], + right_shifts_in_initMagnEst); + inst->initMagnEst[inst->anaLen2] = + WEBRTC_SPL_RSHIFT_U32(inst->initMagnEst[inst->anaLen2], + right_shifts_in_initMagnEst); // Q(minNorm-stages) + + // Shift magnU16 to same domain as initMagnEst + tmpU32no1 = WEBRTC_SPL_RSHIFT_W32((WebRtc_UWord32)magnU16[0], + right_shifts_in_magnU16); // Q(minNorm-stages) + tmpU32no2 = WEBRTC_SPL_RSHIFT_W32((WebRtc_UWord32)magnU16[inst->anaLen2], + right_shifts_in_magnU16); // Q(minNorm-stages) + + // Update initMagnEst + inst->initMagnEst[0] += tmpU32no1; // Q(minNorm-stages) + inst->initMagnEst[inst->anaLen2] += tmpU32no2; // Q(minNorm-stages) + + log2 = 0; + if (magnU16[inst->anaLen2]) + { + // Calculate log2(magnU16[inst->anaLen2]) + zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magnU16[inst->anaLen2]); + frac = (WebRtc_Word16)((((WebRtc_UWord32)magnU16[inst->anaLen2] << zeros) & + 0x7FFFFFFF) >> 23); // Q8 + // log2(magnU16(i)) in Q8 + log2 = (WebRtc_Word16)(((31 - zeros) << 8) + kLogTableFrac[frac]); + } + + sum_log_magn = (WebRtc_Word32)log2; // Q8 + // sum_log_i_log_magn in Q17 + sum_log_i_log_magn = (WEBRTC_SPL_MUL_16_16(kLogIndex[inst->anaLen2], log2) >> 3); + } + + for (i = 1; i < inst->anaLen2; i++) + { + j = WEBRTC_SPL_LSHIFT_W16(i, 1); + inst->real[i] = realImag[j]; + inst->imag[i] = -realImag[j + 1]; + // magnitude spectrum + // energy in Q(2*(normData-stages)) + tmpU32no1 = (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j], realImag[j]); + tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]); + inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) + + magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages) + inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages) + if (inst->blockIndex < END_STARTUP_SHORT) + { + // Switch initMagnEst to Q(minNorm-stages) + inst->initMagnEst[i] = WEBRTC_SPL_RSHIFT_U32(inst->initMagnEst[i], + right_shifts_in_initMagnEst); + + // Shift magnU16 to same domain as initMagnEst, i.e., Q(minNorm-stages) + tmpU32no1 = WEBRTC_SPL_RSHIFT_W32((WebRtc_UWord32)magnU16[i], + right_shifts_in_magnU16); + // Update initMagnEst + inst->initMagnEst[i] += tmpU32no1; // Q(minNorm-stages) + + if (i >= kStartBand) + { + // For pink noise estimation. Collect data neglecting lower frequency band + log2 = 0; + if (magnU16[i]) + { + zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magnU16[i]); + frac = (WebRtc_Word16)((((WebRtc_UWord32)magnU16[i] << zeros) & + 0x7FFFFFFF) >> 23); + // log2(magnU16(i)) in Q8 + log2 = (WebRtc_Word16)(((31 - zeros) << 8) + kLogTableFrac[frac]); + } + sum_log_magn += (WebRtc_Word32)log2; // Q8 + // sum_log_i_log_magn in Q17 + sum_log_i_log_magn += (WEBRTC_SPL_MUL_16_16(kLogIndex[i], log2) >> 3); + } + } + } + + //compute simplified noise model during startup + if (inst->blockIndex < END_STARTUP_SHORT) + { + // Estimate White noise + // Switch whiteNoiseLevel to Q(minNorm-stages) + inst->whiteNoiseLevel = WEBRTC_SPL_RSHIFT_U32(inst->whiteNoiseLevel, + right_shifts_in_initMagnEst); + + // Update the average magnitude spectrum, used as noise estimate. + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(inst->sumMagn, inst->overdrive); + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, inst->stages + 8); + + // Replacing division above with 'stages' shifts + // Shift to same Q-domain as whiteNoiseLevel + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, right_shifts_in_magnU16); + // This operation is safe from wrap around as long as END_STARTUP_SHORT < 128 + assert(END_STARTUP_SHORT < 128); + inst->whiteNoiseLevel += tmpU32no1; // Q(minNorm-stages) + + // Estimate Pink noise parameters + // Denominator used in both parameter estimates. + // The value is only dependent on the size of the frequency band (kStartBand) + // and to reduce computational complexity stored in a table (kDeterminantEstMatrix[]) + matrix_determinant = kDeterminantEstMatrix[kStartBand]; // Q0 + sum_log_i = kSumLogIndex[kStartBand]; // Q5 + sum_log_i_square = kSumSquareLogIndex[kStartBand]; // Q2 + if (inst->fs == 8000) + { + // Adjust values to shorter blocks in narrow band. + tmp_1_w32 = (WebRtc_Word32)matrix_determinant; + tmp_1_w32 += WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], sum_log_i, 9); + tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], kSumLogIndex[65], 10); + tmp_1_w32 -= WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)sum_log_i_square, 4); + tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16)(inst->magnLen + - kStartBand), kSumSquareLogIndex[65], 2); + matrix_determinant = (WebRtc_Word16)tmp_1_w32; + sum_log_i -= kSumLogIndex[65]; // Q5 + sum_log_i_square -= kSumSquareLogIndex[65]; // Q2 + } + + // Necessary number of shifts to fit sum_log_magn in a word16 + zeros = 16 - WebRtcSpl_NormW32(sum_log_magn); + if (zeros < 0) + { + zeros = 0; + } + tmp_1_w32 = WEBRTC_SPL_LSHIFT_W32(sum_log_magn, 1); // Q9 + sum_log_magn_u16 = (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_W32(tmp_1_w32, zeros);//Q(9-zeros) + + // Calculate and update pinkNoiseNumerator. Result in Q11. + tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i_square, sum_log_magn_u16); // Q(11-zeros) + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32((WebRtc_UWord32)sum_log_i_log_magn, 12); // Q5 + + // Shift the largest value of sum_log_i and tmp32no3 before multiplication + tmp_u16 = WEBRTC_SPL_LSHIFT_U16((WebRtc_UWord16)sum_log_i, 1); // Q6 + if ((WebRtc_UWord32)sum_log_i > tmpU32no1) + { + tmp_u16 = WEBRTC_SPL_RSHIFT_U16(tmp_u16, zeros); + } + else + { + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, zeros); + } + tmp_2_w32 -= (WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(tmpU32no1, tmp_u16); // Q(11-zeros) + matrix_determinant = WEBRTC_SPL_RSHIFT_W16(matrix_determinant, zeros); // Q(-zeros) + tmp_2_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q11 + tmp_2_w32 += WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)net_norm, 11); // Q11 + if (tmp_2_w32 < 0) + { + tmp_2_w32 = 0; + } + inst->pinkNoiseNumerator += tmp_2_w32; // Q11 + + // Calculate and update pinkNoiseExp. Result in Q14. + tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i, sum_log_magn_u16); // Q(14-zeros) + tmp_1_w32 = WEBRTC_SPL_RSHIFT_W32(sum_log_i_log_magn, 3 + zeros); + tmp_1_w32 = WEBRTC_SPL_MUL((WebRtc_Word32)(inst->magnLen - kStartBand), + tmp_1_w32); + tmp_2_w32 -= tmp_1_w32; // Q(14-zeros) + if (tmp_2_w32 > 0) + { + // If the exponential parameter is negative force it to zero, which means a + // flat spectrum. + tmp_1_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q14 + inst->pinkNoiseExp += WEBRTC_SPL_SAT(16384, tmp_1_w32, 0); // Q14 + } + } +} + +void WebRtcNsx_DataSynthesis(NsxInst_t *inst, short *outFrame) +{ + WebRtc_Word32 tmp32no1; + WebRtc_Word32 energyOut; + + WebRtc_Word16 realImag[ANAL_BLOCKL_MAX << 1]; + WebRtc_Word16 tmp16no1, tmp16no2; + WebRtc_Word16 energyRatio; + WebRtc_Word16 gainFactor, gainFactor1, gainFactor2; + + int i, j; + int outCIFFT; + int scaleEnergyOut = 0; + + if (inst->zeroInputSignal) + { + // synthesize the special case of zero input + // read out fully processed segment + for (i = 0; i < inst->blockLen10ms; i++) + { + outFrame[i] = inst->synthesisBuffer[i]; // Q0 + } + // update synthesis buffer + WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, + inst->synthesisBuffer + inst->blockLen10ms, + inst->anaLen - inst->blockLen10ms); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms, + inst->blockLen10ms); + return; + } + // Filter the data in the frequency domain + for (i = 0; i < inst->magnLen; i++) + { + inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->real[i], + (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) + inst->imag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->imag[i], + (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) + } + // back to time domain + // Create spectrum + realImag[0] = inst->real[0]; + realImag[1] = -inst->imag[0]; + for (i = 1; i < inst->anaLen2; i++) + { + j = WEBRTC_SPL_LSHIFT_W16(i, 1); + tmp16no1 = (inst->anaLen << 1) - j; + realImag[j] = inst->real[i]; + realImag[j + 1] = -inst->imag[i]; + realImag[tmp16no1] = inst->real[i]; + realImag[tmp16no1 + 1] = inst->imag[i]; + } + realImag[inst->anaLen] = inst->real[inst->anaLen2]; + realImag[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; + + // bit-reverse position of elements in array and IFFT it + WebRtcSpl_ComplexBitReverse(realImag, inst->stages); + outCIFFT = WebRtcSpl_ComplexIFFT(realImag, inst->stages, 1); + + for (i = 0; i < inst->anaLen; i++) + { + j = WEBRTC_SPL_LSHIFT_W16(i, 1); + tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)realImag[j], outCIFFT - inst->normData); + inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, tmp32no1, + WEBRTC_SPL_WORD16_MIN); + } + + //scale factor: only do it after END_STARTUP_LONG time + gainFactor = 8192; // 8192 = Q13(1.0) + if (inst->gainMap == 1 && + inst->blockIndex > END_STARTUP_LONG && + inst->energyIn > 0) + { + energyOut = WebRtcSpl_Energy(inst->real, (int)inst->anaLen, &scaleEnergyOut); // Q(-scaleEnergyOut) + if (scaleEnergyOut == 0 && !(energyOut & 0x7f800000)) + { + energyOut = WEBRTC_SPL_SHIFT_W32(energyOut, 8 + scaleEnergyOut + - inst->scaleEnergyIn); + } else + { + inst->energyIn = WEBRTC_SPL_RSHIFT_W32(inst->energyIn, 8 + scaleEnergyOut + - inst->scaleEnergyIn); // Q(-8-scaleEnergyOut) + } + + assert(inst->energyIn > 0); + energyRatio = (WebRtc_Word16)WEBRTC_SPL_DIV(energyOut + + WEBRTC_SPL_RSHIFT_W32(inst->energyIn, 1), inst->energyIn); // Q8 + + // // original FLOAT code + // if (gain > blim) { + // factor1=1.0+1.3*(gain-blim); + // if (gain*factor1 > 1.0) { // FLOAT + // factor1 = 1.0/gain; // FLOAT + // } + // } + // else { + // factor1=1.0; // FLOAT + // } + // + // if (gain > blim) { + // factor2=1.0; //FLOAT + // } + // else { + // //don't reduce scale too much for pause regions: attenuation here should be controlled by flooring + // factor2=1.0-0.3*(blim-gain); // FLOAT + // if (gain <= inst->denoiseBound) { + // factor2=1.0-0.3*(blim-inst->denoiseBound); // FLOAT + // } + // } + + // all done in lookup tables now + gainFactor1 = kFactor1Table[energyRatio]; // Q8 + gainFactor2 = inst->factor2Table[energyRatio]; // Q8 + + //combine both scales with speech/noise prob: note prior (priorSpeechProb) is not frequency dependent + + // factor = inst->priorSpeechProb*factor1 + (1.0-inst->priorSpeechProb)*factor2; // original code + tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(16384 - inst->priorNonSpeechProb, + gainFactor1, 14); // Q13 16384 = Q14(1.0) + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->priorNonSpeechProb, + gainFactor2, 14); // Q13; + gainFactor = tmp16no1 + tmp16no2; // Q13 + } // out of flag_gain_map==1 + + // synthesis + for (i = 0; i < inst->anaLen; i++) + { + tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(inst->window[i], + inst->real[i], 14); // Q0, window in Q14 + tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16no1, gainFactor, 13); // Q0 + // Down shift with rounding + tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, tmp32no1, + WEBRTC_SPL_WORD16_MIN); // Q0 + inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(inst->synthesisBuffer[i], tmp16no2); // Q0 + } + + // read out fully processed segment + for (i = 0; i < inst->blockLen10ms; i++) + { + outFrame[i] = inst->synthesisBuffer[i]; // Q0 + } + // update synthesis buffer + WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, + inst->anaLen - inst->blockLen10ms); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms, + inst->blockLen10ms); +} + +int WebRtcNsx_ProcessCore(NsxInst_t *inst, short *speechFrame, short *speechFrameHB, + short *outFrame, short *outFrameHB) +{ + // main routine for noise suppression + + WebRtc_UWord32 tmpU32no1, tmpU32no2, tmpU32no3; + WebRtc_UWord32 satMax, maxNoiseU32; + WebRtc_UWord32 tmpMagnU32, tmpNoiseU32; + WebRtc_UWord32 nearMagnEst; + WebRtc_UWord32 noiseUpdateU32; + WebRtc_UWord32 noiseU32[HALF_ANAL_BLOCKL]; + WebRtc_UWord32 postLocSnr[HALF_ANAL_BLOCKL]; + WebRtc_UWord32 priorLocSnr[HALF_ANAL_BLOCKL]; + WebRtc_UWord32 prevNearSnr[HALF_ANAL_BLOCKL]; + WebRtc_UWord32 curNearSnr; + WebRtc_UWord32 priorSnr; + WebRtc_UWord32 noise_estimate = 0; + WebRtc_UWord32 noise_estimate_avg = 0; + WebRtc_UWord32 numerator = 0; + + WebRtc_Word32 tmp32no1, tmp32no2; + WebRtc_Word32 pink_noise_num_avg = 0; + + WebRtc_UWord16 tmpU16no1; + WebRtc_UWord16 magnU16[HALF_ANAL_BLOCKL]; + WebRtc_UWord16 prevNoiseU16[HALF_ANAL_BLOCKL]; + WebRtc_UWord16 nonSpeechProbFinal[HALF_ANAL_BLOCKL]; + WebRtc_UWord16 gammaNoise, prevGammaNoise; + WebRtc_UWord16 noiseSupFilterTmp[HALF_ANAL_BLOCKL]; + + WebRtc_Word16 qMagn, qNoise; + WebRtc_Word16 avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB; + WebRtc_Word16 tmp16no1; + WebRtc_Word16 int_part = 0; + WebRtc_Word16 frac_part = 0; + WebRtc_Word16 pink_noise_exp_avg = 0; + + int i; + int nShifts, postShifts; + int norm32no1, norm32no2; + int flag, sign; + int q_domain_to_use = 0; + +#ifdef NS_FILEDEBUG + fwrite(spframe, sizeof(short), inst->blockLen10ms, inst->infile); +#endif + + // Check that initialization has been done + if (inst->initFlag != 1) + { + return -1; + } + // Check for valid pointers based on sampling rate + if ((inst->fs == 32000) && (speechFrameHB == NULL)) + { + return -1; + } + + // Store speechFrame and transform to frequency domain + WebRtcNsx_DataAnalysis(inst, speechFrame, magnU16); + + if (inst->zeroInputSignal) + { + WebRtcNsx_DataSynthesis(inst, outFrame); + + if (inst->fs == 32000) + { + // update analysis buffer for H band + // append new data to buffer FX + WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX, inst->dataBufHBFX + inst->blockLen10ms, + inst->anaLen - inst->blockLen10ms); + WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX + inst->anaLen - inst->blockLen10ms, + speechFrameHB, inst->blockLen10ms); + for (i = 0; i < inst->blockLen10ms; i++) + { + outFrameHB[i] = inst->dataBufHBFX[i]; // Q0 + } + } // end of H band gain computation + return 0; + } + + // Update block index when we have something to process + inst->blockIndex++; + // + + // Norm of magn + qMagn = inst->normData - inst->stages; + + // Compute spectral flatness on input spectrum + WebRtcNsx_ComputeSpectralFlatness(inst, magnU16); + + // quantile noise estimate + WebRtcNsx_NoiseEstimation(inst, magnU16, noiseU32, &qNoise); + + //noise estimate from previous frame + for (i = 0; i < inst->magnLen; i++) + { + prevNoiseU16[i] = (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_U32(inst->prevNoiseU32[i], 11); // Q(prevQNoise) + } + + if (inst->blockIndex < END_STARTUP_SHORT) + { + // Noise Q-domain to be used later; see description at end of section. + q_domain_to_use = WEBRTC_SPL_MIN((int)qNoise, inst->minNorm - inst->stages); + + // Calculate frequency independent parts in parametric noise estimate and calculate + // the estimate for the lower frequency band (same values for all frequency bins) + if (inst->pinkNoiseExp) + { + pink_noise_exp_avg = (WebRtc_Word16)WebRtcSpl_DivW32W16(inst->pinkNoiseExp, + (WebRtc_Word16)(inst->blockIndex + 1)); // Q14 + pink_noise_num_avg = WebRtcSpl_DivW32W16(inst->pinkNoiseNumerator, + (WebRtc_Word16)(inst->blockIndex + 1)); // Q11 + WebRtcNsx_CalcParametricNoiseEstimate(inst, + pink_noise_exp_avg, + pink_noise_num_avg, + kStartBand, + &noise_estimate, + &noise_estimate_avg); + } + else + { + // Use white noise estimate if we have poor pink noise parameter estimates + noise_estimate = inst->whiteNoiseLevel; // Q(minNorm-stages) + noise_estimate_avg = noise_estimate / (inst->blockIndex + 1); // Q(minNorm-stages) + } + for (i = 0; i < inst->magnLen; i++) + { + // Estimate the background noise using the pink noise parameters if permitted + if ((inst->pinkNoiseExp) && (i >= kStartBand)) + { + // Reset noise_estimate + noise_estimate = 0; + noise_estimate_avg = 0; + // Calculate the parametric noise estimate for current frequency bin + WebRtcNsx_CalcParametricNoiseEstimate(inst, + pink_noise_exp_avg, + pink_noise_num_avg, + i, + &noise_estimate, + &noise_estimate_avg); + } + // Calculate parametric Wiener filter + noiseSupFilterTmp[i] = inst->denoiseBound; + if (inst->initMagnEst[i]) + { + // numerator = (initMagnEst - noise_estimate * overdrive) + // Result in Q(8+minNorm-stages) + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(noise_estimate, inst->overdrive); + numerator = WEBRTC_SPL_LSHIFT_U32(inst->initMagnEst[i], 8); + if (numerator > tmpU32no1) + { + // Suppression filter coefficient larger than zero, so calculate. + numerator -= tmpU32no1; + + // Determine number of left shifts in numerator for best accuracy after + // division + nShifts = WebRtcSpl_NormU32(numerator); + nShifts = WEBRTC_SPL_SAT(6, nShifts, 0); + + // Shift numerator to Q(nShifts+8+minNorm-stages) + numerator = WEBRTC_SPL_LSHIFT_U32(numerator, nShifts); + + // Shift denominator to Q(nShifts-6+minNorm-stages) + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(inst->initMagnEst[i], 6 - nShifts); + tmpU32no2 = WEBRTC_SPL_UDIV(numerator, tmpU32no1); // Q14 + noiseSupFilterTmp[i] = (WebRtc_UWord16)WEBRTC_SPL_SAT(16384, tmpU32no2, + (WebRtc_UWord32)(inst->denoiseBound)); // Q14 + } + } + // Weight quantile noise 'noiseU32' with modeled noise 'noise_estimate_avg' + // 'noiseU32 is in Q(qNoise) and 'noise_estimate' in Q(minNorm-stages) + // To guarantee that we do not get wrap around when shifting to the same domain + // we use the lowest one. Furthermore, we need to save 6 bits for the weighting. + // 'noise_estimate_avg' can handle this operation by construction, but 'noiseU32' + // may not. + + // Shift 'noiseU32' to 'q_domain_to_use' + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(noiseU32[i], (int)qNoise - q_domain_to_use); + // Shift 'noise_estimate_avg' to 'q_domain_to_use' + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(noise_estimate_avg, inst->minNorm - inst->stages + - q_domain_to_use); + // Make a simple check to see if we have enough room for weighting 'tmpU32no1' + // without wrap around + nShifts = 0; + if (tmpU32no1 & 0xfc000000) { + tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 6); + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 6); + nShifts = 6; + } + // Add them together and divide by startup length + noiseU32[i] = WebRtcSpl_DivU32U16(tmpU32no1 + tmpU32no2, END_STARTUP_SHORT); + // Shift back if necessary + noiseU32[i] = WEBRTC_SPL_LSHIFT_U32(noiseU32[i], nShifts); + } + // Update new Q-domain for 'noiseU32' + qNoise = q_domain_to_use; + } + // compute average signal during END_STARTUP_LONG time: + // used to normalize spectral difference measure + if (inst->blockIndex < END_STARTUP_LONG) + { + // substituting division with shift ending up in Q(-2*stages) + inst->timeAvgMagnEnergyTmp + += WEBRTC_SPL_RSHIFT_U32(inst->magnEnergy, + 2 * inst->normData + inst->stages - 1); + inst->timeAvgMagnEnergy = WebRtcSpl_DivU32U16(inst->timeAvgMagnEnergyTmp, + inst->blockIndex + 1); + } + + //start processing at frames == converged+1 + // STEP 1: compute prior and post SNR based on quantile noise estimates + + // compute direct decision (DD) estimate of prior SNR: needed for new method + satMax = (WebRtc_UWord32)1048575;// Largest possible value without getting overflow despite shifting 12 steps + postShifts = 6 + qMagn - qNoise; + nShifts = 5 - inst->prevQMagn + inst->prevQNoise; + for (i = 0; i < inst->magnLen; i++) + { + // FLOAT: + // post SNR + // postLocSnr[i] = 0.0; + // if (magn[i] > noise[i]) + // { + // postLocSnr[i] = magn[i] / (noise[i] + 0.0001); + // } + // // previous post SNR + // // previous estimate: based on previous frame with gain filter (smooth is previous filter) + // + // prevNearSnr[i] = inst->prevMagnU16[i] / (inst->noisePrev[i] + 0.0001) * (inst->smooth[i]); + // + // // DD estimate is sum of two terms: current estimate and previous estimate + // // directed decision update of priorSnr (or we actually store [2*priorSnr+1]) + // + // priorLocSnr[i] = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * (postLocSnr[i] - 1.0); + + // calculate post SNR: output in Q11 + postLocSnr[i] = 2048; // 1.0 in Q11 + tmpU32no1 = WEBRTC_SPL_LSHIFT_U32((WebRtc_UWord32)magnU16[i], 6); // Q(6+qMagn) + if (postShifts < 0) + { + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(noiseU32[i], -postShifts); // Q(6+qMagn) + } else + { + tmpU32no2 = WEBRTC_SPL_LSHIFT_U32(noiseU32[i], postShifts); // Q(6+qMagn) + } + if (tmpU32no1 > tmpU32no2) + { + // Current magnitude larger than noise + tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(tmpU32no1, 11); // Q(17+qMagn) + if (tmpU32no2) + { + tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); // Q11 + postLocSnr[i] = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 + } else + { + postLocSnr[i] = satMax; + } + } + + // calculate prevNearSnr[i] and save for later instead of recalculating it later + nearMagnEst = WEBRTC_SPL_UMUL_16_16(inst->prevMagnU16[i], inst->noiseSupFilter[i]); // Q(prevQMagn+14) + tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(nearMagnEst, 3); // Q(prevQMagn+17) + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->prevNoiseU32[i], nShifts); // Q(prevQMagn+6) + + if (tmpU32no2) + { + tmpU32no1 = WEBRTC_SPL_DIV(tmpU32no1, tmpU32no2); // Q11 + tmpU32no1 = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 + } else + { + tmpU32no1 = satMax; // Q11 + } + prevNearSnr[i] = tmpU32no1; // Q11 + + //directed decision update of priorSnr + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22 + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(postLocSnr[i] - 2048, ONE_MINUS_DD_PR_SNR_Q11); // Q22 + priorSnr = tmpU32no1 + tmpU32no2 + 512; // Q22 (added 512 for rounding) + // priorLocSnr = 1 + 2*priorSnr + priorLocSnr[i] = 2048 + WEBRTC_SPL_RSHIFT_U32(priorSnr, 10); // Q11 + } // end of loop over frequencies + // done with step 1: DD computation of prior and post SNR + + // STEP 2: compute speech/noise likelihood + + //compute difference of input spectrum with learned/estimated noise spectrum + WebRtcNsx_ComputeSpectralDifference(inst, magnU16); + //compute histograms for determination of parameters (thresholds and weights for features) + //parameters are extracted once every window time (=inst->modelUpdate) + //counter update + inst->cntThresUpdate++; + flag = (int)(inst->cntThresUpdate == inst->modelUpdate); + //update histogram + WebRtcNsx_FeatureParameterExtraction(inst, flag); + //compute model parameters + if (flag) + { + inst->cntThresUpdate = 0; // Reset counter + //update every window: + // get normalization for spectral difference for next window estimate + + // Shift to Q(-2*stages) + inst->curAvgMagnEnergy = WEBRTC_SPL_RSHIFT_U32(inst->curAvgMagnEnergy, STAT_UPDATES); + + tmpU32no1 = (inst->curAvgMagnEnergy + inst->timeAvgMagnEnergy + 1) >> 1; //Q(-2*stages) + // Update featureSpecDiff + if ((tmpU32no1 != inst->timeAvgMagnEnergy) && (inst->featureSpecDiff)) + { + norm32no1 = 0; + tmpU32no3 = tmpU32no1; + while (0xFFFF0000 & tmpU32no3) + { + tmpU32no3 >>= 1; + norm32no1++; + } + tmpU32no2 = inst->featureSpecDiff; + while (0xFFFF0000 & tmpU32no2) + { + tmpU32no2 >>= 1; + norm32no1++; + } + tmpU32no3 = WEBRTC_SPL_UMUL(tmpU32no3, tmpU32no2); + tmpU32no3 = WEBRTC_SPL_UDIV(tmpU32no3, inst->timeAvgMagnEnergy); + if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) + { + inst->featureSpecDiff = 0x007FFFFF; + } else + { + inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF, + WEBRTC_SPL_LSHIFT_U32(tmpU32no3, norm32no1)); + } + } + + inst->timeAvgMagnEnergy = tmpU32no1; // Q(-2*stages) + inst->curAvgMagnEnergy = 0; + } + + //compute speech/noise probability + WebRtcNsx_SpeechNoiseProb(inst, nonSpeechProbFinal, priorLocSnr, postLocSnr); + + //time-avg parameter for noise update + gammaNoise = NOISE_UPDATE_Q8; // Q8 + + maxNoiseU32 = 0; + postShifts = inst->prevQNoise - qMagn; + nShifts = inst->prevQMagn - qMagn; + for (i = 0; i < inst->magnLen; i++) + { + // temporary noise update: use it for speech frames if update value is less than previous + // the formula has been rewritten into: + // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i]) + + if (postShifts < 0) + { + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(magnU16[i], -postShifts); // Q(prevQNoise) + } else + { + tmpU32no2 = WEBRTC_SPL_LSHIFT_U32(magnU16[i], postShifts); // Q(prevQNoise) + } + if (prevNoiseU16[i] > tmpU32no2) + { + sign = -1; + tmpU32no1 = prevNoiseU16[i] - tmpU32no2; + } else + { + sign = 1; + tmpU32no1 = tmpU32no2 - prevNoiseU16[i]; + } + noiseUpdateU32 = inst->prevNoiseU32[i]; // Q(prevQNoise+11) + tmpU32no3 = 0; + if ((tmpU32no1) && (nonSpeechProbFinal[i])) + { + // This value will be used later, if gammaNoise changes + tmpU32no3 = WEBRTC_SPL_UMUL_32_16(tmpU32no1, nonSpeechProbFinal[i]); // Q(prevQNoise+8) + if (0x7c000000 & tmpU32no3) + { + // Shifting required before multiplication + tmpU32no2 + = WEBRTC_SPL_UMUL_32_16(WEBRTC_SPL_RSHIFT_U32(tmpU32no3, 5), gammaNoise); // Q(prevQNoise+11) + } else + { + // We can do shifting after multiplication + tmpU32no2 + = WEBRTC_SPL_RSHIFT_U32(WEBRTC_SPL_UMUL_32_16(tmpU32no3, gammaNoise), 5); // Q(prevQNoise+11) + } + if (sign > 0) + { + noiseUpdateU32 += tmpU32no2; // Q(prevQNoise+11) + } else + { + // This operation is safe. We can never get wrap around, since worst + // case scenario means magnU16 = 0 + noiseUpdateU32 -= tmpU32no2; // Q(prevQNoise+11) + } + } + + //increase gamma (i.e., less noise update) for frame likely to be speech + prevGammaNoise = gammaNoise; + gammaNoise = NOISE_UPDATE_Q8; + //time-constant based on speech/noise state + //increase gamma (i.e., less noise update) for frames likely to be speech + if (nonSpeechProbFinal[i] < ONE_MINUS_PROB_RANGE_Q8) + { + gammaNoise = GAMMA_NOISE_TRANS_AND_SPEECH_Q8; + } + + if (prevGammaNoise != gammaNoise) + { + // new noise update + // this line is the same as above, only that the result is stored in a different variable and the gammaNoise + // has changed + // + // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i]) + + if (0x7c000000 & tmpU32no3) + { + // Shifting required before multiplication + tmpU32no2 + = WEBRTC_SPL_UMUL_32_16(WEBRTC_SPL_RSHIFT_U32(tmpU32no3, 5), gammaNoise); // Q(prevQNoise+11) + } else + { + // We can do shifting after multiplication + tmpU32no2 + = WEBRTC_SPL_RSHIFT_U32(WEBRTC_SPL_UMUL_32_16(tmpU32no3, gammaNoise), 5); // Q(prevQNoise+11) + } + if (sign > 0) + { + tmpU32no1 = inst->prevNoiseU32[i] + tmpU32no2; // Q(prevQNoise+11) + } else + { + tmpU32no1 = inst->prevNoiseU32[i] - tmpU32no2; // Q(prevQNoise+11) + } + if (noiseUpdateU32 > tmpU32no1) + { + noiseUpdateU32 = tmpU32no1; // Q(prevQNoise+11) + } + } + noiseU32[i] = noiseUpdateU32; // Q(prevQNoise+11) + if (noiseUpdateU32 > maxNoiseU32) + { + maxNoiseU32 = noiseUpdateU32; + } + + // conservative noise update + // // original FLOAT code + // if (prob_speech < PROB_RANGE) { + // inst->avgMagnPause[i] = inst->avgMagnPause[i] + (1.0 - gamma_pause)*(magn[i] - inst->avgMagnPause[i]); + // } + + tmp32no2 = WEBRTC_SPL_SHIFT_W32(inst->avgMagnPause[i], -nShifts); + if (nonSpeechProbFinal[i] > ONE_MINUS_PROB_RANGE_Q8) + { + if (nShifts < 0) + { + tmp32no1 = (WebRtc_Word32)magnU16[i] - tmp32no2; // Q(qMagn) + tmp32no1 = WEBRTC_SPL_MUL_32_16(tmp32no1, ONE_MINUS_GAMMA_PAUSE_Q8); // Q(8+prevQMagn+nShifts) + tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1 + 128, 8); // Q(qMagn) + } else + { + tmp32no1 = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)magnU16[i], nShifts) + - inst->avgMagnPause[i]; // Q(qMagn+nShifts) + tmp32no1 = WEBRTC_SPL_MUL_32_16(tmp32no1, ONE_MINUS_GAMMA_PAUSE_Q8); // Q(8+prevQMagn+nShifts) + tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1 + (128 << nShifts), 8 + nShifts); // Q(qMagn) + } + tmp32no2 += tmp32no1; // Q(qMagn) + } + inst->avgMagnPause[i] = tmp32no2; + } // end of frequency loop + + norm32no1 = WebRtcSpl_NormU32(maxNoiseU32); + qNoise = inst->prevQNoise + norm32no1 - 5; + // done with step 2: noise update + + // STEP 3: compute dd update of prior snr and post snr based on new noise estimate + nShifts = inst->prevQNoise + 11 - qMagn; + for (i = 0; i < inst->magnLen; i++) + { + // FLOAT code + // // post and prior SNR + // curNearSnr = 0.0; + // if (magn[i] > noise[i]) + // { + // curNearSnr = magn[i] / (noise[i] + 0.0001) - 1.0; + // } + // // DD estimate is sum of two terms: current estimate and previous estimate + // // directed decision update of snrPrior + // snrPrior = DD_PR_SNR * prevNearSnr[i] + (1.0 - DD_PR_SNR) * curNearSnr; + // // gain filter + // tmpFloat1 = inst->overdrive + snrPrior; + // tmpFloat2 = snrPrior / tmpFloat1; + // theFilter[i] = tmpFloat2; + + // calculate curNearSnr again, this is necessary because a new noise estimate has been made since then. for the original + curNearSnr = 0; // Q11 + if (nShifts < 0) + { + // This case is equivalent with magn < noise which implies curNearSnr = 0; + tmpMagnU32 = (WebRtc_UWord32)magnU16[i]; // Q(qMagn) + tmpNoiseU32 = WEBRTC_SPL_LSHIFT_U32(noiseU32[i], -nShifts); // Q(qMagn) + } else if (nShifts > 17) + { + tmpMagnU32 = WEBRTC_SPL_LSHIFT_U32(magnU16[i], 17); // Q(qMagn+17) + tmpNoiseU32 = WEBRTC_SPL_RSHIFT_U32(noiseU32[i], nShifts - 17); // Q(qMagn+17) + } else + { + tmpMagnU32 = WEBRTC_SPL_LSHIFT_U32((WebRtc_UWord32)magnU16[i], nShifts); // Q(qNoise_prev+11) + tmpNoiseU32 = noiseU32[i]; // Q(qNoise_prev+11) + } + if (tmpMagnU32 > tmpNoiseU32) + { + tmpU32no1 = tmpMagnU32 - tmpNoiseU32; // Q(qCur) + norm32no2 = WEBRTC_SPL_MIN(11, WebRtcSpl_NormU32(tmpU32no1)); + tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(tmpU32no1, norm32no2); // Q(qCur+norm32no2) + tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpNoiseU32, 11 - norm32no2); // Q(qCur+norm32no2-11) + if (tmpU32no2) + { + tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); // Q11 + } + curNearSnr = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 + } + + //directed decision update of priorSnr + // FLOAT + // priorSnr = DD_PR_SNR * prevNearSnr + (1.0-DD_PR_SNR) * curNearSnr; + + tmpU32no1 = WEBRTC_SPL_UMUL_32_16(prevNearSnr[i], DD_PR_SNR_Q11); // Q22 + tmpU32no2 = WEBRTC_SPL_UMUL_32_16(curNearSnr, ONE_MINUS_DD_PR_SNR_Q11); // Q22 + priorSnr = tmpU32no1 + tmpU32no2; // Q22 + + //gain filter + tmpU32no1 = (WebRtc_UWord32)(inst->overdrive) + + WEBRTC_SPL_RSHIFT_U32(priorSnr + 8192, 14); // Q8 + tmpU16no1 = (WebRtc_UWord16)WEBRTC_SPL_UDIV(priorSnr + (tmpU32no1 >> 1), tmpU32no1); // Q14 + inst->noiseSupFilter[i] = WEBRTC_SPL_SAT(16384, tmpU16no1, inst->denoiseBound); // 16384 = Q14(1.0) // Q14 + + // Weight in the parametric Wiener filter during startup + if (inst->blockIndex < END_STARTUP_SHORT) + { + // Weight the two suppression filters + tmpU32no1 = WEBRTC_SPL_UMUL_16_16(inst->noiseSupFilter[i], + (WebRtc_UWord16)inst->blockIndex); + tmpU32no2 = WEBRTC_SPL_UMUL_16_16(noiseSupFilterTmp[i], + (WebRtc_UWord16)(END_STARTUP_SHORT + - inst->blockIndex)); + tmpU32no1 += tmpU32no2; + inst->noiseSupFilter[i] = (WebRtc_UWord16)WebRtcSpl_DivU32U16(tmpU32no1, + END_STARTUP_SHORT); + } + } // end of loop over frequencies + //done with step3 + + // save noise and magnitude spectrum for next frame + inst->prevQNoise = qNoise; + inst->prevQMagn = qMagn; + if (norm32no1 > 5) + { + for (i = 0; i < inst->magnLen; i++) + { + inst->prevNoiseU32[i] = WEBRTC_SPL_LSHIFT_U32(noiseU32[i], norm32no1 - 5); // Q(qNoise+11) + inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn) + } + } else + { + for (i = 0; i < inst->magnLen; i++) + { + inst->prevNoiseU32[i] = WEBRTC_SPL_RSHIFT_U32(noiseU32[i], 5 - norm32no1); // Q(qNoise+11) + inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn) + } + } + + WebRtcNsx_DataSynthesis(inst, outFrame); +#ifdef NS_FILEDEBUG + fwrite(outframe, sizeof(short), inst->blockLen10ms, inst->outfile); +#endif + + //for H band: + // only update data buffer, then apply time-domain gain is applied derived from L band + if (inst->fs == 32000) + { + // update analysis buffer for H band + // append new data to buffer FX + WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX, inst->dataBufHBFX + inst->blockLen10ms, inst->anaLen - inst->blockLen10ms); + WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX + inst->anaLen - inst->blockLen10ms, speechFrameHB, inst->blockLen10ms); + // range for averaging low band quantities for H band gain + + gainTimeDomainHB = 16384; // 16384 = Q14(1.0) + //average speech prob from low band + //average filter gain from low band + //avg over second half (i.e., 4->8kHz) of freq. spectrum + tmpU32no1 = 0; // Q12 + tmpU16no1 = 0; // Q8 + for (i = inst->anaLen2 - (inst->anaLen2 >> 2); i < inst->anaLen2; i++) + { + tmpU16no1 += nonSpeechProbFinal[i]; // Q8 + tmpU32no1 += (WebRtc_UWord32)(inst->noiseSupFilter[i]); // Q14 + } + avgProbSpeechHB = (WebRtc_Word16)(4096 + - WEBRTC_SPL_RSHIFT_U16(tmpU16no1, inst->stages - 7)); // Q12 + avgFilterGainHB = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, inst->stages - 3); // Q14 + + // // original FLOAT code + // // gain based on speech probability: + // avg_prob_speech_tt=(float)2.0*avg_prob_speech-(float)1.0; + // gain_mod=(float)0.5*((float)1.0+(float)tanh(avg_prob_speech_tt)); // between 0 and 1 + + // gain based on speech probability: + // original expression: "0.5 * (1 + tanh(2x-1))" + // avgProbSpeechHB has been anyway saturated to a value between 0 and 1 so the other cases don't have to be dealt with + // avgProbSpeechHB and gainModHB are in Q12, 3607 = Q12(0.880615234375) which is a zero point of + // |0.5 * (1 + tanh(2x-1)) - x| - |0.5 * (1 + tanh(2x-1)) - 0.880615234375| meaning that from that point the error of approximating + // the expression with f(x) = x would be greater than the error of approximating the expression with f(x) = 0.880615234375 + // error: "|0.5 * (1 + tanh(2x-1)) - x| from x=0 to 0.880615234375" -> http://www.wolframalpha.com/input/?i=|0.5+*+(1+%2B+tanh(2x-1))+-+x|+from+x%3D0+to+0.880615234375 + // and: "|0.5 * (1 + tanh(2x-1)) - 0.880615234375| from x=0.880615234375 to 1" -> http://www.wolframalpha.com/input/?i=+|0.5+*+(1+%2B+tanh(2x-1))+-+0.880615234375|+from+x%3D0.880615234375+to+1 + gainModHB = WEBRTC_SPL_MIN(avgProbSpeechHB, 3607); + + // // original FLOAT code + // //combine gain with low band gain + // if (avg_prob_speech < (float)0.5) { + // gain_time_domain_HB=(float)0.5*gain_mod+(float)0.5*avg_filter_gain; + // } + // else { + // gain_time_domain_HB=(float)0.25*gain_mod+(float)0.75*avg_filter_gain; + // } + + + //combine gain with low band gain + if (avgProbSpeechHB < 2048) + { // 2048 = Q12(0.5) + // the next two lines in float are "gain_time_domain = 0.5 * gain_mod + 0.5 * avg_filter_gain"; Q2(0.5) = 2 equals one left shift + gainTimeDomainHB = (gainModHB << 1) + (avgFilterGainHB >> 1); // Q14 + } else + { + // "gain_time_domain = 0.25 * gain_mod + 0.75 * agv_filter_gain;" + gainTimeDomainHB = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(3, avgFilterGainHB, 2); // 3 = Q2(0.75); Q14 + gainTimeDomainHB += gainModHB; // Q14 + } + //make sure gain is within flooring range + gainTimeDomainHB + = WEBRTC_SPL_SAT(16384, gainTimeDomainHB, (WebRtc_Word16)(inst->denoiseBound)); // 16384 = Q14(1.0) + + + //apply gain + for (i = 0; i < inst->blockLen10ms; i++) + { + outFrameHB[i] + = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(gainTimeDomainHB, inst->dataBufHBFX[i], 14); // Q0 + } + } // end of H band gain computation + + return 0; +} diff --git a/src/modules/audio_processing/ns/main/source/nsx_core.h b/src/modules/audio_processing/ns/main/source/nsx_core.h new file mode 100644 index 0000000000..2e74303505 --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/nsx_core.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ + +#include "typedefs.h" +#include "signal_processing_library.h" + +#include "nsx_defines.h" + +#ifdef NS_FILEDEBUG +#include <stdio.h> +#endif + +typedef struct NsxInst_t_ +{ + WebRtc_UWord32 fs; + + const WebRtc_Word16* window; + WebRtc_Word16 analysisBuffer[ANAL_BLOCKL_MAX]; + WebRtc_Word16 synthesisBuffer[ANAL_BLOCKL_MAX]; + WebRtc_UWord16 noiseSupFilter[HALF_ANAL_BLOCKL]; + WebRtc_UWord16 overdrive; /* Q8 */ + WebRtc_UWord16 denoiseBound; /* Q14 */ + const WebRtc_Word16* factor2Table; + WebRtc_Word16 noiseEstLogQuantile[SIMULT * HALF_ANAL_BLOCKL]; + WebRtc_Word16 noiseEstDensity[SIMULT * HALF_ANAL_BLOCKL]; + WebRtc_Word16 noiseEstCounter[SIMULT]; + WebRtc_Word16 noiseEstQuantile[HALF_ANAL_BLOCKL]; + + WebRtc_Word16 anaLen; + int anaLen2; + int magnLen; + int aggrMode; + int stages; + int initFlag; + int gainMap; + + WebRtc_Word32 maxLrt; + WebRtc_Word32 minLrt; + WebRtc_Word32 logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing in Q8 + WebRtc_Word32 featureLogLrt; + WebRtc_Word32 thresholdLogLrt; + WebRtc_Word16 weightLogLrt; + + WebRtc_UWord32 featureSpecDiff; + WebRtc_UWord32 thresholdSpecDiff; + WebRtc_Word16 weightSpecDiff; + + WebRtc_UWord32 featureSpecFlat; + WebRtc_UWord32 thresholdSpecFlat; + WebRtc_Word16 weightSpecFlat; + + WebRtc_Word32 avgMagnPause[HALF_ANAL_BLOCKL]; //conservative estimate of noise spectrum + WebRtc_UWord32 magnEnergy; + WebRtc_UWord32 sumMagn; + WebRtc_UWord32 curAvgMagnEnergy; + WebRtc_UWord32 timeAvgMagnEnergy; + WebRtc_UWord32 timeAvgMagnEnergyTmp; + + WebRtc_UWord32 whiteNoiseLevel; //initial noise estimate + WebRtc_UWord32 initMagnEst[HALF_ANAL_BLOCKL];//initial magnitude spectrum estimate + WebRtc_Word32 pinkNoiseNumerator; //pink noise parameter: numerator + WebRtc_Word32 pinkNoiseExp; //pink noise parameter: power of freq + int minNorm; //smallest normalization factor + int zeroInputSignal; //zero input signal flag + + WebRtc_UWord32 prevNoiseU32[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame + WebRtc_UWord16 prevMagnU16[HALF_ANAL_BLOCKL]; //magnitude spectrum from previous frame + WebRtc_Word16 priorNonSpeechProb; //prior speech/noise probability // Q14 + + int blockIndex; //frame index counter + int modelUpdate; //parameter for updating or estimating thresholds/weights for prior model + int cntThresUpdate; + + //histograms for parameter estimation + WebRtc_Word16 histLrt[HIST_PAR_EST]; + WebRtc_Word16 histSpecFlat[HIST_PAR_EST]; + WebRtc_Word16 histSpecDiff[HIST_PAR_EST]; + + //quantities for high band estimate + WebRtc_Word16 dataBufHBFX[ANAL_BLOCKL_MAX]; /* Q0 */ + + int qNoise; + int prevQNoise; + int prevQMagn; + int blockLen10ms; + + WebRtc_Word16 real[ANAL_BLOCKL_MAX]; + WebRtc_Word16 imag[ANAL_BLOCKL_MAX]; + WebRtc_Word32 energyIn; + int scaleEnergyIn; + int normData; + +} NsxInst_t; + +#ifdef __cplusplus +extern "C" +{ +#endif + +/**************************************************************************** + * WebRtcNsx_InitCore(...) + * + * This function initializes a noise suppression instance + * + * Input: + * - inst : Instance that should be initialized + * - fs : Sampling frequency + * + * Output: + * - inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t *inst, WebRtc_UWord32 fs); + +/**************************************************************************** + * WebRtcNsx_set_policy_core(...) + * + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - inst : Instance that should be initialized + * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) + * + * Output: + * - NS_inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_set_policy_core(NsxInst_t *inst, int mode); + +/**************************************************************************** + * WebRtcNsx_ProcessCore + * + * Do noise suppression. + * + * Input: + * - inst : Instance that should be initialized + * - inFrameLow : Input speech frame for lower band + * - inFrameHigh : Input speech frame for higher band + * + * Output: + * - inst : Updated instance + * - outFrameLow : Output speech frame for lower band + * - outFrameHigh : Output speech frame for higher band + * + * Return value : 0 - OK + * -1 - Error + */ +int WebRtcNsx_ProcessCore(NsxInst_t *inst, short *inFrameLow, short *inFrameHigh, + short *outFrameLow, short *outFrameHigh); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ diff --git a/src/modules/audio_processing/ns/main/source/nsx_defines.h b/src/modules/audio_processing/ns/main/source/nsx_defines.h new file mode 100644 index 0000000000..58796b9a3f --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/nsx_defines.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ + +#define ANAL_BLOCKL_MAX 256 // max analysis block length +#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1 +#define SIMULT 3 +#define END_STARTUP_LONG 200 +#define END_STARTUP_SHORT 50 +#define FACTOR_Q16 (WebRtc_Word32)2621440 // 40 in Q16 +#define FACTOR_Q7 (WebRtc_Word16)5120 // 40 in Q7 +#define WIDTH_Q8 3 // 0.01 in Q8 (or 25 ) +//PARAMETERS FOR NEW METHOD +#define DD_PR_SNR_Q11 2007 // ~= Q11(0.98) DD update of prior SNR +#define ONE_MINUS_DD_PR_SNR_Q11 41 // DD update of prior SNR +#define SPECT_FLAT_TAVG_Q14 4915 // (0.30) tavg parameter for spectral flatness measure +#define SPECT_DIFF_TAVG_Q8 77 // (0.30) tavg parameter for spectral flatness measure +#define PRIOR_UPDATE_Q14 1638 // Q14(0.1) update parameter of prior model +#define NOISE_UPDATE_Q8 26 // 26 ~= Q8(0.1) update parameter for noise +// probability threshold for noise state in speech/noise likelihood +#define ONE_MINUS_PROB_RANGE_Q8 205 // 205 ~= Q8(0.8) +#define HIST_PAR_EST 1000 // histogram size for estimation of parameters +//FEATURE EXTRACTION CONFIG +//bin size of histogram +#define BIN_SIZE_LRT 10 +//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain +// thresholds for prior model +#define FACTOR_1_LRT_DIFF 6 //for LRT and spectral difference (5 times bigger) +//for spectral_flatness: used when noise is flatter than speech (10 times bigger) +#define FACTOR_2_FLAT_Q10 922 +//peak limit for spectral flatness (varies between 0 and 1) +#define THRES_PEAK_FLAT 24 // * 2 * BIN_SIZE_FLAT_FX +//limit on spacing of two highest peaks in histogram: spacing determined by bin size +#define LIM_PEAK_SPACE_FLAT_DIFF 4 // * 2 * BIN_SIZE_DIFF_FX +//limit on relevance of second peak: +#define LIM_PEAK_WEIGHT_FLAT_DIFF 2 +#define THRES_FLUCT_LRT 10240 //=20 * inst->modelUpdate; fluctuation limit of LRT feat. +//limit on the max and min values for the feature thresholds +#define MAX_FLAT_Q10 38912 // * 2 * BIN_SIZE_FLAT_FX +#define MIN_FLAT_Q10 4096 // * 2 * BIN_SIZE_FLAT_FX +#define MAX_DIFF 100 // * 2 * BIN_SIZE_DIFF_FX +#define MIN_DIFF 16 // * 2 * BIN_SIZE_DIFF_FX +//criteria of weight of histogram peak to accept/reject feature +#define THRES_WEIGHT_FLAT_DIFF 154//(int)(0.3*(inst->modelUpdate)) for flatness and difference +// +#define STAT_UPDATES 9 // Update every 512 = 1 << 9 block +#define ONE_MINUS_GAMMA_PAUSE_Q8 13 // ~= Q8(0.05) update for conservative noise estimate +#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 // ~= Q8(0.01) update for transition and noise region +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ diff --git a/src/modules/audio_processing/ns/main/source/windows_private.h b/src/modules/audio_processing/ns/main/source/windows_private.h new file mode 100644 index 0000000000..8f9006ed72 --- /dev/null +++ b/src/modules/audio_processing/ns/main/source/windows_private.h @@ -0,0 +1,573 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_ + +// Hanning window for 4ms 16kHz +static const float kHanning64w128[128] = { +0.00000000000000f, 0.02454122852291f, 0.04906767432742f, +0.07356456359967f, 0.09801714032956f, 0.12241067519922f, +0.14673047445536f, 0.17096188876030f, 0.19509032201613f, +0.21910124015687f, 0.24298017990326f, 0.26671275747490f, +0.29028467725446f, 0.31368174039889f, 0.33688985339222f, +0.35989503653499f, 0.38268343236509f, 0.40524131400499f, +0.42755509343028f, 0.44961132965461f, 0.47139673682600f, +0.49289819222978f, 0.51410274419322f, 0.53499761988710f, +0.55557023301960f, 0.57580819141785f, 0.59569930449243f, +0.61523159058063f, 0.63439328416365f, 0.65317284295378f, +0.67155895484702f, 0.68954054473707f, 0.70710678118655f, +0.72424708295147f, 0.74095112535496f, 0.75720884650648f, +0.77301045336274f, 0.78834642762661f, 0.80320753148064f, +0.81758481315158f, 0.83146961230255f, 0.84485356524971f, +0.85772861000027f, 0.87008699110871f, 0.88192126434835f, +0.89322430119552f, 0.90398929312344f, 0.91420975570353f, +0.92387953251129f, 0.93299279883474f, 0.94154406518302f, +0.94952818059304f, 0.95694033573221f, 0.96377606579544f, +0.97003125319454f, 0.97570213003853f, 0.98078528040323f, +0.98527764238894f, 0.98917650996478f, 0.99247953459871f, +0.99518472667220f, 0.99729045667869f, 0.99879545620517f, +0.99969881869620f, 1.00000000000000f, +0.99969881869620f, 0.99879545620517f, 0.99729045667869f, +0.99518472667220f, 0.99247953459871f, 0.98917650996478f, +0.98527764238894f, 0.98078528040323f, 0.97570213003853f, +0.97003125319454f, 0.96377606579544f, 0.95694033573221f, +0.94952818059304f, 0.94154406518302f, 0.93299279883474f, +0.92387953251129f, 0.91420975570353f, 0.90398929312344f, +0.89322430119552f, 0.88192126434835f, 0.87008699110871f, +0.85772861000027f, 0.84485356524971f, 0.83146961230255f, +0.81758481315158f, 0.80320753148064f, 0.78834642762661f, +0.77301045336274f, 0.75720884650648f, 0.74095112535496f, +0.72424708295147f, 0.70710678118655f, 0.68954054473707f, +0.67155895484702f, 0.65317284295378f, 0.63439328416365f, +0.61523159058063f, 0.59569930449243f, 0.57580819141785f, +0.55557023301960f, 0.53499761988710f, 0.51410274419322f, +0.49289819222978f, 0.47139673682600f, 0.44961132965461f, +0.42755509343028f, 0.40524131400499f, 0.38268343236509f, +0.35989503653499f, 0.33688985339222f, 0.31368174039889f, +0.29028467725446f, 0.26671275747490f, 0.24298017990326f, +0.21910124015687f, 0.19509032201613f, 0.17096188876030f, +0.14673047445536f, 0.12241067519922f, 0.09801714032956f, +0.07356456359967f, 0.04906767432742f, 0.02454122852291f +}; + + + +// hybrib Hanning & flat window +static const float kBlocks80w128[128] = { +(float)0.00000000, (float)0.03271908, (float)0.06540313, (float)0.09801714, (float)0.13052619, +(float)0.16289547, (float)0.19509032, (float)0.22707626, (float)0.25881905, (float)0.29028468, +(float)0.32143947, (float)0.35225005, (float)0.38268343, (float)0.41270703, (float)0.44228869, +(float)0.47139674, (float)0.50000000, (float)0.52806785, (float)0.55557023, (float)0.58247770, +(float)0.60876143, (float)0.63439328, (float)0.65934582, (float)0.68359230, (float)0.70710678, +(float)0.72986407, (float)0.75183981, (float)0.77301045, (float)0.79335334, (float)0.81284668, +(float)0.83146961, (float)0.84920218, (float)0.86602540, (float)0.88192126, (float)0.89687274, +(float)0.91086382, (float)0.92387953, (float)0.93590593, (float)0.94693013, (float)0.95694034, +(float)0.96592583, (float)0.97387698, (float)0.98078528, (float)0.98664333, (float)0.99144486, +(float)0.99518473, (float)0.99785892, (float)0.99946459, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)0.99946459, (float)0.99785892, (float)0.99518473, (float)0.99144486, +(float)0.98664333, (float)0.98078528, (float)0.97387698, (float)0.96592583, (float)0.95694034, +(float)0.94693013, (float)0.93590593, (float)0.92387953, (float)0.91086382, (float)0.89687274, +(float)0.88192126, (float)0.86602540, (float)0.84920218, (float)0.83146961, (float)0.81284668, +(float)0.79335334, (float)0.77301045, (float)0.75183981, (float)0.72986407, (float)0.70710678, +(float)0.68359230, (float)0.65934582, (float)0.63439328, (float)0.60876143, (float)0.58247770, +(float)0.55557023, (float)0.52806785, (float)0.50000000, (float)0.47139674, (float)0.44228869, +(float)0.41270703, (float)0.38268343, (float)0.35225005, (float)0.32143947, (float)0.29028468, +(float)0.25881905, (float)0.22707626, (float)0.19509032, (float)0.16289547, (float)0.13052619, +(float)0.09801714, (float)0.06540313, (float)0.03271908 +}; + +// hybrib Hanning & flat window +static const float kBlocks160w256[256] = { +(float)0.00000000, (float)0.01636173, (float)0.03271908, (float)0.04906767, (float)0.06540313, +(float)0.08172107, (float)0.09801714, (float)0.11428696, (float)0.13052619, (float)0.14673047, +(float)0.16289547, (float)0.17901686, (float)0.19509032, (float)0.21111155, (float)0.22707626, +(float)0.24298018, (float)0.25881905, (float)0.27458862, (float)0.29028468, (float)0.30590302, +(float)0.32143947, (float)0.33688985, (float)0.35225005, (float)0.36751594, (float)0.38268343, +(float)0.39774847, (float)0.41270703, (float)0.42755509, (float)0.44228869, (float)0.45690388, +(float)0.47139674, (float)0.48576339, (float)0.50000000, (float)0.51410274, (float)0.52806785, +(float)0.54189158, (float)0.55557023, (float)0.56910015, (float)0.58247770, (float)0.59569930, +(float)0.60876143, (float)0.62166057, (float)0.63439328, (float)0.64695615, (float)0.65934582, +(float)0.67155895, (float)0.68359230, (float)0.69544264, (float)0.70710678, (float)0.71858162, +(float)0.72986407, (float)0.74095113, (float)0.75183981, (float)0.76252720, (float)0.77301045, +(float)0.78328675, (float)0.79335334, (float)0.80320753, (float)0.81284668, (float)0.82226822, +(float)0.83146961, (float)0.84044840, (float)0.84920218, (float)0.85772861, (float)0.86602540, +(float)0.87409034, (float)0.88192126, (float)0.88951608, (float)0.89687274, (float)0.90398929, +(float)0.91086382, (float)0.91749450, (float)0.92387953, (float)0.93001722, (float)0.93590593, +(float)0.94154407, (float)0.94693013, (float)0.95206268, (float)0.95694034, (float)0.96156180, +(float)0.96592583, (float)0.97003125, (float)0.97387698, (float)0.97746197, (float)0.98078528, +(float)0.98384601, (float)0.98664333, (float)0.98917651, (float)0.99144486, (float)0.99344778, +(float)0.99518473, (float)0.99665524, (float)0.99785892, (float)0.99879546, (float)0.99946459, +(float)0.99986614, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)0.99986614, (float)0.99946459, (float)0.99879546, (float)0.99785892, +(float)0.99665524, (float)0.99518473, (float)0.99344778, (float)0.99144486, (float)0.98917651, +(float)0.98664333, (float)0.98384601, (float)0.98078528, (float)0.97746197, (float)0.97387698, +(float)0.97003125, (float)0.96592583, (float)0.96156180, (float)0.95694034, (float)0.95206268, +(float)0.94693013, (float)0.94154407, (float)0.93590593, (float)0.93001722, (float)0.92387953, +(float)0.91749450, (float)0.91086382, (float)0.90398929, (float)0.89687274, (float)0.88951608, +(float)0.88192126, (float)0.87409034, (float)0.86602540, (float)0.85772861, (float)0.84920218, +(float)0.84044840, (float)0.83146961, (float)0.82226822, (float)0.81284668, (float)0.80320753, +(float)0.79335334, (float)0.78328675, (float)0.77301045, (float)0.76252720, (float)0.75183981, +(float)0.74095113, (float)0.72986407, (float)0.71858162, (float)0.70710678, (float)0.69544264, +(float)0.68359230, (float)0.67155895, (float)0.65934582, (float)0.64695615, (float)0.63439328, +(float)0.62166057, (float)0.60876143, (float)0.59569930, (float)0.58247770, (float)0.56910015, +(float)0.55557023, (float)0.54189158, (float)0.52806785, (float)0.51410274, (float)0.50000000, +(float)0.48576339, (float)0.47139674, (float)0.45690388, (float)0.44228869, (float)0.42755509, +(float)0.41270703, (float)0.39774847, (float)0.38268343, (float)0.36751594, (float)0.35225005, +(float)0.33688985, (float)0.32143947, (float)0.30590302, (float)0.29028468, (float)0.27458862, +(float)0.25881905, (float)0.24298018, (float)0.22707626, (float)0.21111155, (float)0.19509032, +(float)0.17901686, (float)0.16289547, (float)0.14673047, (float)0.13052619, (float)0.11428696, +(float)0.09801714, (float)0.08172107, (float)0.06540313, (float)0.04906767, (float)0.03271908, +(float)0.01636173 +}; + +// hybrib Hanning & flat window: for 20ms +static const float kBlocks320w512[512] = { +(float)0.00000000, (float)0.00818114, (float)0.01636173, (float)0.02454123, (float)0.03271908, +(float)0.04089475, (float)0.04906767, (float)0.05723732, (float)0.06540313, (float)0.07356456, +(float)0.08172107, (float)0.08987211, (float)0.09801714, (float)0.10615561, (float)0.11428696, +(float)0.12241068, (float)0.13052619, (float)0.13863297, (float)0.14673047, (float)0.15481816, +(float)0.16289547, (float)0.17096189, (float)0.17901686, (float)0.18705985, (float)0.19509032, +(float)0.20310773, (float)0.21111155, (float)0.21910124, (float)0.22707626, (float)0.23503609, +(float)0.24298018, (float)0.25090801, (float)0.25881905, (float)0.26671276, (float)0.27458862, +(float)0.28244610, (float)0.29028468, (float)0.29810383, (float)0.30590302, (float)0.31368174, +(float)0.32143947, (float)0.32917568, (float)0.33688985, (float)0.34458148, (float)0.35225005, +(float)0.35989504, (float)0.36751594, (float)0.37511224, (float)0.38268343, (float)0.39022901, +(float)0.39774847, (float)0.40524131, (float)0.41270703, (float)0.42014512, (float)0.42755509, +(float)0.43493645, (float)0.44228869, (float)0.44961133, (float)0.45690388, (float)0.46416584, +(float)0.47139674, (float)0.47859608, (float)0.48576339, (float)0.49289819, (float)0.50000000, +(float)0.50706834, (float)0.51410274, (float)0.52110274, (float)0.52806785, (float)0.53499762, +(float)0.54189158, (float)0.54874927, (float)0.55557023, (float)0.56235401, (float)0.56910015, +(float)0.57580819, (float)0.58247770, (float)0.58910822, (float)0.59569930, (float)0.60225052, +(float)0.60876143, (float)0.61523159, (float)0.62166057, (float)0.62804795, (float)0.63439328, +(float)0.64069616, (float)0.64695615, (float)0.65317284, (float)0.65934582, (float)0.66547466, +(float)0.67155895, (float)0.67759830, (float)0.68359230, (float)0.68954054, (float)0.69544264, +(float)0.70129818, (float)0.70710678, (float)0.71286806, (float)0.71858162, (float)0.72424708, +(float)0.72986407, (float)0.73543221, (float)0.74095113, (float)0.74642045, (float)0.75183981, +(float)0.75720885, (float)0.76252720, (float)0.76779452, (float)0.77301045, (float)0.77817464, +(float)0.78328675, (float)0.78834643, (float)0.79335334, (float)0.79830715, (float)0.80320753, +(float)0.80805415, (float)0.81284668, (float)0.81758481, (float)0.82226822, (float)0.82689659, +(float)0.83146961, (float)0.83598698, (float)0.84044840, (float)0.84485357, (float)0.84920218, +(float)0.85349396, (float)0.85772861, (float)0.86190585, (float)0.86602540, (float)0.87008699, +(float)0.87409034, (float)0.87803519, (float)0.88192126, (float)0.88574831, (float)0.88951608, +(float)0.89322430, (float)0.89687274, (float)0.90046115, (float)0.90398929, (float)0.90745693, +(float)0.91086382, (float)0.91420976, (float)0.91749450, (float)0.92071783, (float)0.92387953, +(float)0.92697940, (float)0.93001722, (float)0.93299280, (float)0.93590593, (float)0.93875641, +(float)0.94154407, (float)0.94426870, (float)0.94693013, (float)0.94952818, (float)0.95206268, +(float)0.95453345, (float)0.95694034, (float)0.95928317, (float)0.96156180, (float)0.96377607, +(float)0.96592583, (float)0.96801094, (float)0.97003125, (float)0.97198664, (float)0.97387698, +(float)0.97570213, (float)0.97746197, (float)0.97915640, (float)0.98078528, (float)0.98234852, +(float)0.98384601, (float)0.98527764, (float)0.98664333, (float)0.98794298, (float)0.98917651, +(float)0.99034383, (float)0.99144486, (float)0.99247953, (float)0.99344778, (float)0.99434953, +(float)0.99518473, (float)0.99595331, (float)0.99665524, (float)0.99729046, (float)0.99785892, +(float)0.99836060, (float)0.99879546, (float)0.99916346, (float)0.99946459, (float)0.99969882, +(float)0.99986614, (float)0.99996653, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, (float)1.00000000, +(float)1.00000000, (float)0.99996653, (float)0.99986614, (float)0.99969882, (float)0.99946459, +(float)0.99916346, (float)0.99879546, (float)0.99836060, (float)0.99785892, (float)0.99729046, +(float)0.99665524, (float)0.99595331, (float)0.99518473, (float)0.99434953, (float)0.99344778, +(float)0.99247953, (float)0.99144486, (float)0.99034383, (float)0.98917651, (float)0.98794298, +(float)0.98664333, (float)0.98527764, (float)0.98384601, (float)0.98234852, (float)0.98078528, +(float)0.97915640, (float)0.97746197, (float)0.97570213, (float)0.97387698, (float)0.97198664, +(float)0.97003125, (float)0.96801094, (float)0.96592583, (float)0.96377607, (float)0.96156180, +(float)0.95928317, (float)0.95694034, (float)0.95453345, (float)0.95206268, (float)0.94952818, +(float)0.94693013, (float)0.94426870, (float)0.94154407, (float)0.93875641, (float)0.93590593, +(float)0.93299280, (float)0.93001722, (float)0.92697940, (float)0.92387953, (float)0.92071783, +(float)0.91749450, (float)0.91420976, (float)0.91086382, (float)0.90745693, (float)0.90398929, +(float)0.90046115, (float)0.89687274, (float)0.89322430, (float)0.88951608, (float)0.88574831, +(float)0.88192126, (float)0.87803519, (float)0.87409034, (float)0.87008699, (float)0.86602540, +(float)0.86190585, (float)0.85772861, (float)0.85349396, (float)0.84920218, (float)0.84485357, +(float)0.84044840, (float)0.83598698, (float)0.83146961, (float)0.82689659, (float)0.82226822, +(float)0.81758481, (float)0.81284668, (float)0.80805415, (float)0.80320753, (float)0.79830715, +(float)0.79335334, (float)0.78834643, (float)0.78328675, (float)0.77817464, (float)0.77301045, +(float)0.76779452, (float)0.76252720, (float)0.75720885, (float)0.75183981, (float)0.74642045, +(float)0.74095113, (float)0.73543221, (float)0.72986407, (float)0.72424708, (float)0.71858162, +(float)0.71286806, (float)0.70710678, (float)0.70129818, (float)0.69544264, (float)0.68954054, +(float)0.68359230, (float)0.67759830, (float)0.67155895, (float)0.66547466, (float)0.65934582, +(float)0.65317284, (float)0.64695615, (float)0.64069616, (float)0.63439328, (float)0.62804795, +(float)0.62166057, (float)0.61523159, (float)0.60876143, (float)0.60225052, (float)0.59569930, +(float)0.58910822, (float)0.58247770, (float)0.57580819, (float)0.56910015, (float)0.56235401, +(float)0.55557023, (float)0.54874927, (float)0.54189158, (float)0.53499762, (float)0.52806785, +(float)0.52110274, (float)0.51410274, (float)0.50706834, (float)0.50000000, (float)0.49289819, +(float)0.48576339, (float)0.47859608, (float)0.47139674, (float)0.46416584, (float)0.45690388, +(float)0.44961133, (float)0.44228869, (float)0.43493645, (float)0.42755509, (float)0.42014512, +(float)0.41270703, (float)0.40524131, (float)0.39774847, (float)0.39022901, (float)0.38268343, +(float)0.37511224, (float)0.36751594, (float)0.35989504, (float)0.35225005, (float)0.34458148, +(float)0.33688985, (float)0.32917568, (float)0.32143947, (float)0.31368174, (float)0.30590302, +(float)0.29810383, (float)0.29028468, (float)0.28244610, (float)0.27458862, (float)0.26671276, +(float)0.25881905, (float)0.25090801, (float)0.24298018, (float)0.23503609, (float)0.22707626, +(float)0.21910124, (float)0.21111155, (float)0.20310773, (float)0.19509032, (float)0.18705985, +(float)0.17901686, (float)0.17096189, (float)0.16289547, (float)0.15481816, (float)0.14673047, +(float)0.13863297, (float)0.13052619, (float)0.12241068, (float)0.11428696, (float)0.10615561, +(float)0.09801714, (float)0.08987211, (float)0.08172107, (float)0.07356456, (float)0.06540313, +(float)0.05723732, (float)0.04906767, (float)0.04089475, (float)0.03271908, (float)0.02454123, +(float)0.01636173, (float)0.00818114 +}; + + +// Hanning window: for 15ms at 16kHz with symmetric zeros +static const float kBlocks240w512[512] = { +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00654494, (float)0.01308960, (float)0.01963369, +(float)0.02617695, (float)0.03271908, (float)0.03925982, (float)0.04579887, (float)0.05233596, +(float)0.05887080, (float)0.06540313, (float)0.07193266, (float)0.07845910, (float)0.08498218, +(float)0.09150162, (float)0.09801714, (float)0.10452846, (float)0.11103531, (float)0.11753740, +(float)0.12403446, (float)0.13052620, (float)0.13701233, (float)0.14349262, (float)0.14996676, +(float)0.15643448, (float)0.16289547, (float)0.16934951, (float)0.17579629, (float)0.18223552, +(float)0.18866697, (float)0.19509032, (float)0.20150533, (float)0.20791170, (float)0.21430916, +(float)0.22069745, (float)0.22707628, (float)0.23344538, (float)0.23980446, (float)0.24615330, +(float)0.25249159, (float)0.25881904, (float)0.26513544, (float)0.27144045, (float)0.27773386, +(float)0.28401536, (float)0.29028466, (float)0.29654160, (float)0.30278578, (float)0.30901700, +(float)0.31523499, (float)0.32143945, (float)0.32763019, (float)0.33380687, (float)0.33996925, +(float)0.34611708, (float)0.35225007, (float)0.35836795, (float)0.36447051, (float)0.37055743, +(float)0.37662852, (float)0.38268346, (float)0.38872197, (float)0.39474389, (float)0.40074885, +(float)0.40673664, (float)0.41270703, (float)0.41865975, (float)0.42459452, (float)0.43051112, +(float)0.43640924, (float)0.44228873, (float)0.44814920, (float)0.45399052, (float)0.45981237, +(float)0.46561453, (float)0.47139674, (float)0.47715878, (float)0.48290035, (float)0.48862126, +(float)0.49432120, (float)0.50000000, (float)0.50565743, (float)0.51129311, (float)0.51690692, +(float)0.52249855, (float)0.52806789, (float)0.53361452, (float)0.53913832, (float)0.54463905, +(float)0.55011642, (float)0.55557024, (float)0.56100029, (float)0.56640625, (float)0.57178795, +(float)0.57714522, (float)0.58247769, (float)0.58778524, (float)0.59306765, (float)0.59832460, +(float)0.60355598, (float)0.60876143, (float)0.61394083, (float)0.61909395, (float)0.62422055, +(float)0.62932038, (float)0.63439333, (float)0.63943899, (float)0.64445734, (float)0.64944810, +(float)0.65441096, (float)0.65934587, (float)0.66425246, (float)0.66913062, (float)0.67398012, +(float)0.67880076, (float)0.68359232, (float)0.68835455, (float)0.69308740, (float)0.69779050, +(float)0.70246369, (float)0.70710677, (float)0.71171963, (float)0.71630198, (float)0.72085363, +(float)0.72537440, (float)0.72986406, (float)0.73432255, (float)0.73874950, (float)0.74314487, +(float)0.74750835, (float)0.75183982, (float)0.75613910, (float)0.76040596, (float)0.76464027, +(float)0.76884186, (float)0.77301043, (float)0.77714598, (float)0.78124821, (float)0.78531694, +(float)0.78935206, (float)0.79335338, (float)0.79732066, (float)0.80125386, (float)0.80515265, +(float)0.80901700, (float)0.81284672, (float)0.81664157, (float)0.82040149, (float)0.82412618, +(float)0.82781565, (float)0.83146966, (float)0.83508795, (float)0.83867061, (float)0.84221727, +(float)0.84572780, (float)0.84920216, (float)0.85264021, (float)0.85604161, (float)0.85940641, +(float)0.86273444, (float)0.86602545, (float)0.86927933, (float)0.87249607, (float)0.87567532, +(float)0.87881714, (float)0.88192129, (float)0.88498765, (float)0.88801610, (float)0.89100653, +(float)0.89395881, (float)0.89687276, (float)0.89974827, (float)0.90258533, (float)0.90538365, +(float)0.90814316, (float)0.91086388, (float)0.91354549, (float)0.91618794, (float)0.91879123, +(float)0.92135513, (float)0.92387950, (float)0.92636442, (float)0.92880958, (float)0.93121493, +(float)0.93358046, (float)0.93590593, (float)0.93819135, (float)0.94043654, (float)0.94264150, +(float)0.94480604, (float)0.94693011, (float)0.94901365, (float)0.95105654, (float)0.95305866, +(float)0.95501995, (float)0.95694035, (float)0.95881975, (float)0.96065807, (float)0.96245527, +(float)0.96421117, (float)0.96592581, (float)0.96759909, (float)0.96923089, (float)0.97082120, +(float)0.97236991, (float)0.97387701, (float)0.97534233, (float)0.97676587, (float)0.97814763, +(float)0.97948742, (float)0.98078531, (float)0.98204112, (float)0.98325491, (float)0.98442656, +(float)0.98555607, (float)0.98664331, (float)0.98768836, (float)0.98869103, (float)0.98965138, +(float)0.99056935, (float)0.99144489, (float)0.99227792, (float)0.99306846, (float)0.99381649, +(float)0.99452192, (float)0.99518472, (float)0.99580491, (float)0.99638247, (float)0.99691731, +(float)0.99740952, (float)0.99785894, (float)0.99826562, (float)0.99862951, (float)0.99895066, +(float)0.99922901, (float)0.99946457, (float)0.99965733, (float)0.99980724, (float)0.99991435, +(float)0.99997860, (float)1.00000000, (float)0.99997860, (float)0.99991435, (float)0.99980724, +(float)0.99965733, (float)0.99946457, (float)0.99922901, (float)0.99895066, (float)0.99862951, +(float)0.99826562, (float)0.99785894, (float)0.99740946, (float)0.99691731, (float)0.99638247, +(float)0.99580491, (float)0.99518472, (float)0.99452192, (float)0.99381644, (float)0.99306846, +(float)0.99227792, (float)0.99144489, (float)0.99056935, (float)0.98965138, (float)0.98869103, +(float)0.98768836, (float)0.98664331, (float)0.98555607, (float)0.98442656, (float)0.98325491, +(float)0.98204112, (float)0.98078525, (float)0.97948742, (float)0.97814757, (float)0.97676587, +(float)0.97534227, (float)0.97387695, (float)0.97236991, (float)0.97082120, (float)0.96923089, +(float)0.96759909, (float)0.96592581, (float)0.96421117, (float)0.96245521, (float)0.96065807, +(float)0.95881969, (float)0.95694029, (float)0.95501995, (float)0.95305860, (float)0.95105648, +(float)0.94901365, (float)0.94693011, (float)0.94480604, (float)0.94264150, (float)0.94043654, +(float)0.93819129, (float)0.93590593, (float)0.93358046, (float)0.93121493, (float)0.92880952, +(float)0.92636436, (float)0.92387950, (float)0.92135507, (float)0.91879123, (float)0.91618794, +(float)0.91354543, (float)0.91086382, (float)0.90814310, (float)0.90538365, (float)0.90258527, +(float)0.89974827, (float)0.89687276, (float)0.89395875, (float)0.89100647, (float)0.88801610, +(float)0.88498759, (float)0.88192123, (float)0.87881714, (float)0.87567532, (float)0.87249595, +(float)0.86927933, (float)0.86602539, (float)0.86273432, (float)0.85940641, (float)0.85604161, +(float)0.85264009, (float)0.84920216, (float)0.84572780, (float)0.84221715, (float)0.83867055, +(float)0.83508795, (float)0.83146954, (float)0.82781565, (float)0.82412612, (float)0.82040137, +(float)0.81664157, (float)0.81284660, (float)0.80901700, (float)0.80515265, (float)0.80125374, +(float)0.79732066, (float)0.79335332, (float)0.78935200, (float)0.78531694, (float)0.78124815, +(float)0.77714586, (float)0.77301049, (float)0.76884180, (float)0.76464021, (float)0.76040596, +(float)0.75613904, (float)0.75183970, (float)0.74750835, (float)0.74314481, (float)0.73874938, +(float)0.73432249, (float)0.72986400, (float)0.72537428, (float)0.72085363, (float)0.71630186, +(float)0.71171951, (float)0.70710677, (float)0.70246363, (float)0.69779032, (float)0.69308734, +(float)0.68835449, (float)0.68359220, (float)0.67880070, (float)0.67398006, (float)0.66913044, +(float)0.66425240, (float)0.65934575, (float)0.65441096, (float)0.64944804, (float)0.64445722, +(float)0.63943905, (float)0.63439327, (float)0.62932026, (float)0.62422055, (float)0.61909389, +(float)0.61394072, (float)0.60876143, (float)0.60355592, (float)0.59832448, (float)0.59306765, +(float)0.58778518, (float)0.58247757, (float)0.57714522, (float)0.57178789, (float)0.56640613, +(float)0.56100023, (float)0.55557019, (float)0.55011630, (float)0.54463905, (float)0.53913826, +(float)0.53361434, (float)0.52806783, (float)0.52249849, (float)0.51690674, (float)0.51129305, +(float)0.50565726, (float)0.50000006, (float)0.49432117, (float)0.48862115, (float)0.48290038, +(float)0.47715873, (float)0.47139663, (float)0.46561456, (float)0.45981231, (float)0.45399037, +(float)0.44814920, (float)0.44228864, (float)0.43640912, (float)0.43051112, (float)0.42459446, +(float)0.41865960, (float)0.41270703, (float)0.40673658, (float)0.40074870, (float)0.39474386, +(float)0.38872188, (float)0.38268328, (float)0.37662849, (float)0.37055734, (float)0.36447033, +(float)0.35836792, (float)0.35224995, (float)0.34611690, (float)0.33996922, (float)0.33380675, +(float)0.32763001, (float)0.32143945, (float)0.31523487, (float)0.30901679, (float)0.30278572, +(float)0.29654145, (float)0.29028472, (float)0.28401530, (float)0.27773371, (float)0.27144048, +(float)0.26513538, (float)0.25881892, (float)0.25249159, (float)0.24615324, (float)0.23980433, +(float)0.23344538, (float)0.22707619, (float)0.22069728, (float)0.21430916, (float)0.20791161, +(float)0.20150517, (float)0.19509031, (float)0.18866688, (float)0.18223536, (float)0.17579627, +(float)0.16934940, (float)0.16289529, (float)0.15643445, (float)0.14996666, (float)0.14349243, +(float)0.13701232, (float)0.13052608, (float)0.12403426, (float)0.11753736, (float)0.11103519, +(float)0.10452849, (float)0.09801710, (float)0.09150149, (float)0.08498220, (float)0.07845904, +(float)0.07193252, (float)0.06540315, (float)0.05887074, (float)0.05233581, (float)0.04579888, +(float)0.03925974, (float)0.03271893, (float)0.02617695, (float)0.01963361, (float)0.01308943, +(float)0.00654493, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000 +}; + + +// Hanning window: for 30ms with 1024 fft with symmetric zeros at 16kHz +static const float kBlocks480w1024[1024] = { +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00327249, (float)0.00654494, +(float)0.00981732, (float)0.01308960, (float)0.01636173, (float)0.01963369, (float)0.02290544, +(float)0.02617695, (float)0.02944817, (float)0.03271908, (float)0.03598964, (float)0.03925982, +(float)0.04252957, (float)0.04579887, (float)0.04906768, (float)0.05233596, (float)0.05560368, +(float)0.05887080, (float)0.06213730, (float)0.06540313, (float)0.06866825, (float)0.07193266, +(float)0.07519628, (float)0.07845910, (float)0.08172107, (float)0.08498218, (float)0.08824237, +(float)0.09150162, (float)0.09475989, (float)0.09801714, (float)0.10127335, (float)0.10452846, +(float)0.10778246, (float)0.11103531, (float)0.11428697, (float)0.11753740, (float)0.12078657, +(float)0.12403446, (float)0.12728101, (float)0.13052620, (float)0.13376999, (float)0.13701233, +(float)0.14025325, (float)0.14349262, (float)0.14673047, (float)0.14996676, (float)0.15320145, +(float)0.15643448, (float)0.15966582, (float)0.16289547, (float)0.16612339, (float)0.16934951, +(float)0.17257382, (float)0.17579629, (float)0.17901687, (float)0.18223552, (float)0.18545224, +(float)0.18866697, (float)0.19187967, (float)0.19509032, (float)0.19829889, (float)0.20150533, +(float)0.20470962, (float)0.20791170, (float)0.21111156, (float)0.21430916, (float)0.21750447, +(float)0.22069745, (float)0.22388805, (float)0.22707628, (float)0.23026206, (float)0.23344538, +(float)0.23662618, (float)0.23980446, (float)0.24298020, (float)0.24615330, (float)0.24932377, +(float)0.25249159, (float)0.25565669, (float)0.25881904, (float)0.26197866, (float)0.26513544, +(float)0.26828939, (float)0.27144045, (float)0.27458861, (float)0.27773386, (float)0.28087610, +(float)0.28401536, (float)0.28715158, (float)0.29028466, (float)0.29341471, (float)0.29654160, +(float)0.29966527, (float)0.30278578, (float)0.30590302, (float)0.30901700, (float)0.31212768, +(float)0.31523499, (float)0.31833893, (float)0.32143945, (float)0.32453656, (float)0.32763019, +(float)0.33072028, (float)0.33380687, (float)0.33688986, (float)0.33996925, (float)0.34304500, +(float)0.34611708, (float)0.34918544, (float)0.35225007, (float)0.35531089, (float)0.35836795, +(float)0.36142117, (float)0.36447051, (float)0.36751595, (float)0.37055743, (float)0.37359497, +(float)0.37662852, (float)0.37965801, (float)0.38268346, (float)0.38570479, (float)0.38872197, +(float)0.39173502, (float)0.39474389, (float)0.39774847, (float)0.40074885, (float)0.40374491, +(float)0.40673664, (float)0.40972406, (float)0.41270703, (float)0.41568562, (float)0.41865975, +(float)0.42162940, (float)0.42459452, (float)0.42755508, (float)0.43051112, (float)0.43346250, +(float)0.43640924, (float)0.43935132, (float)0.44228873, (float)0.44522133, (float)0.44814920, +(float)0.45107228, (float)0.45399052, (float)0.45690390, (float)0.45981237, (float)0.46271592, +(float)0.46561453, (float)0.46850815, (float)0.47139674, (float)0.47428030, (float)0.47715878, +(float)0.48003215, (float)0.48290035, (float)0.48576337, (float)0.48862126, (float)0.49147385, +(float)0.49432120, (float)0.49716330, (float)0.50000000, (float)0.50283140, (float)0.50565743, +(float)0.50847799, (float)0.51129311, (float)0.51410276, (float)0.51690692, (float)0.51970553, +(float)0.52249855, (float)0.52528602, (float)0.52806789, (float)0.53084403, (float)0.53361452, +(float)0.53637928, (float)0.53913832, (float)0.54189163, (float)0.54463905, (float)0.54738063, +(float)0.55011642, (float)0.55284631, (float)0.55557024, (float)0.55828828, (float)0.56100029, +(float)0.56370628, (float)0.56640625, (float)0.56910014, (float)0.57178795, (float)0.57446963, +(float)0.57714522, (float)0.57981455, (float)0.58247769, (float)0.58513463, (float)0.58778524, +(float)0.59042960, (float)0.59306765, (float)0.59569931, (float)0.59832460, (float)0.60094351, +(float)0.60355598, (float)0.60616195, (float)0.60876143, (float)0.61135441, (float)0.61394083, +(float)0.61652070, (float)0.61909395, (float)0.62166059, (float)0.62422055, (float)0.62677383, +(float)0.62932038, (float)0.63186020, (float)0.63439333, (float)0.63691956, (float)0.63943899, +(float)0.64195162, (float)0.64445734, (float)0.64695615, (float)0.64944810, (float)0.65193301, +(float)0.65441096, (float)0.65688187, (float)0.65934587, (float)0.66180271, (float)0.66425246, +(float)0.66669512, (float)0.66913062, (float)0.67155898, (float)0.67398012, (float)0.67639405, +(float)0.67880076, (float)0.68120021, (float)0.68359232, (float)0.68597710, (float)0.68835455, +(float)0.69072467, (float)0.69308740, (float)0.69544262, (float)0.69779050, (float)0.70013082, +(float)0.70246369, (float)0.70478904, (float)0.70710677, (float)0.70941699, (float)0.71171963, +(float)0.71401459, (float)0.71630198, (float)0.71858168, (float)0.72085363, (float)0.72311789, +(float)0.72537440, (float)0.72762316, (float)0.72986406, (float)0.73209721, (float)0.73432255, +(float)0.73653996, (float)0.73874950, (float)0.74095118, (float)0.74314487, (float)0.74533057, +(float)0.74750835, (float)0.74967808, (float)0.75183982, (float)0.75399351, (float)0.75613910, +(float)0.75827658, (float)0.76040596, (float)0.76252723, (float)0.76464027, (float)0.76674515, +(float)0.76884186, (float)0.77093029, (float)0.77301043, (float)0.77508241, (float)0.77714598, +(float)0.77920127, (float)0.78124821, (float)0.78328675, (float)0.78531694, (float)0.78733873, +(float)0.78935206, (float)0.79135692, (float)0.79335338, (float)0.79534125, (float)0.79732066, +(float)0.79929149, (float)0.80125386, (float)0.80320752, (float)0.80515265, (float)0.80708915, +(float)0.80901700, (float)0.81093621, (float)0.81284672, (float)0.81474853, (float)0.81664157, +(float)0.81852591, (float)0.82040149, (float)0.82226825, (float)0.82412618, (float)0.82597536, +(float)0.82781565, (float)0.82964706, (float)0.83146966, (float)0.83328325, (float)0.83508795, +(float)0.83688378, (float)0.83867061, (float)0.84044838, (float)0.84221727, (float)0.84397703, +(float)0.84572780, (float)0.84746957, (float)0.84920216, (float)0.85092574, (float)0.85264021, +(float)0.85434544, (float)0.85604161, (float)0.85772866, (float)0.85940641, (float)0.86107504, +(float)0.86273444, (float)0.86438453, (float)0.86602545, (float)0.86765707, (float)0.86927933, +(float)0.87089235, (float)0.87249607, (float)0.87409031, (float)0.87567532, (float)0.87725097, +(float)0.87881714, (float)0.88037390, (float)0.88192129, (float)0.88345921, (float)0.88498765, +(float)0.88650668, (float)0.88801610, (float)0.88951612, (float)0.89100653, (float)0.89248741, +(float)0.89395881, (float)0.89542055, (float)0.89687276, (float)0.89831537, (float)0.89974827, +(float)0.90117162, (float)0.90258533, (float)0.90398932, (float)0.90538365, (float)0.90676826, +(float)0.90814316, (float)0.90950841, (float)0.91086388, (float)0.91220951, (float)0.91354549, +(float)0.91487163, (float)0.91618794, (float)0.91749454, (float)0.91879123, (float)0.92007810, +(float)0.92135513, (float)0.92262226, (float)0.92387950, (float)0.92512691, (float)0.92636442, +(float)0.92759192, (float)0.92880958, (float)0.93001723, (float)0.93121493, (float)0.93240267, +(float)0.93358046, (float)0.93474817, (float)0.93590593, (float)0.93705362, (float)0.93819135, +(float)0.93931901, (float)0.94043654, (float)0.94154406, (float)0.94264150, (float)0.94372880, +(float)0.94480604, (float)0.94587320, (float)0.94693011, (float)0.94797695, (float)0.94901365, +(float)0.95004016, (float)0.95105654, (float)0.95206273, (float)0.95305866, (float)0.95404440, +(float)0.95501995, (float)0.95598525, (float)0.95694035, (float)0.95788521, (float)0.95881975, +(float)0.95974404, (float)0.96065807, (float)0.96156180, (float)0.96245527, (float)0.96333838, +(float)0.96421117, (float)0.96507370, (float)0.96592581, (float)0.96676767, (float)0.96759909, +(float)0.96842021, (float)0.96923089, (float)0.97003126, (float)0.97082120, (float)0.97160077, +(float)0.97236991, (float)0.97312868, (float)0.97387701, (float)0.97461486, (float)0.97534233, +(float)0.97605932, (float)0.97676587, (float)0.97746199, (float)0.97814763, (float)0.97882277, +(float)0.97948742, (float)0.98014158, (float)0.98078531, (float)0.98141843, (float)0.98204112, +(float)0.98265332, (float)0.98325491, (float)0.98384601, (float)0.98442656, (float)0.98499662, +(float)0.98555607, (float)0.98610497, (float)0.98664331, (float)0.98717111, (float)0.98768836, +(float)0.98819500, (float)0.98869103, (float)0.98917651, (float)0.98965138, (float)0.99011570, +(float)0.99056935, (float)0.99101239, (float)0.99144489, (float)0.99186671, (float)0.99227792, +(float)0.99267852, (float)0.99306846, (float)0.99344778, (float)0.99381649, (float)0.99417448, +(float)0.99452192, (float)0.99485862, (float)0.99518472, (float)0.99550015, (float)0.99580491, +(float)0.99609905, (float)0.99638247, (float)0.99665523, (float)0.99691731, (float)0.99716878, +(float)0.99740952, (float)0.99763954, (float)0.99785894, (float)0.99806762, (float)0.99826562, +(float)0.99845290, (float)0.99862951, (float)0.99879545, (float)0.99895066, (float)0.99909520, +(float)0.99922901, (float)0.99935216, (float)0.99946457, (float)0.99956632, (float)0.99965733, +(float)0.99973762, (float)0.99980724, (float)0.99986613, (float)0.99991435, (float)0.99995178, +(float)0.99997860, (float)0.99999464, (float)1.00000000, (float)0.99999464, (float)0.99997860, +(float)0.99995178, (float)0.99991435, (float)0.99986613, (float)0.99980724, (float)0.99973762, +(float)0.99965733, (float)0.99956632, (float)0.99946457, (float)0.99935216, (float)0.99922901, +(float)0.99909520, (float)0.99895066, (float)0.99879545, (float)0.99862951, (float)0.99845290, +(float)0.99826562, (float)0.99806762, (float)0.99785894, (float)0.99763954, (float)0.99740946, +(float)0.99716872, (float)0.99691731, (float)0.99665523, (float)0.99638247, (float)0.99609905, +(float)0.99580491, (float)0.99550015, (float)0.99518472, (float)0.99485862, (float)0.99452192, +(float)0.99417448, (float)0.99381644, (float)0.99344778, (float)0.99306846, (float)0.99267852, +(float)0.99227792, (float)0.99186671, (float)0.99144489, (float)0.99101239, (float)0.99056935, +(float)0.99011564, (float)0.98965138, (float)0.98917651, (float)0.98869103, (float)0.98819494, +(float)0.98768836, (float)0.98717111, (float)0.98664331, (float)0.98610497, (float)0.98555607, +(float)0.98499656, (float)0.98442656, (float)0.98384601, (float)0.98325491, (float)0.98265326, +(float)0.98204112, (float)0.98141843, (float)0.98078525, (float)0.98014158, (float)0.97948742, +(float)0.97882277, (float)0.97814757, (float)0.97746193, (float)0.97676587, (float)0.97605932, +(float)0.97534227, (float)0.97461486, (float)0.97387695, (float)0.97312862, (float)0.97236991, +(float)0.97160077, (float)0.97082120, (float)0.97003126, (float)0.96923089, (float)0.96842015, +(float)0.96759909, (float)0.96676761, (float)0.96592581, (float)0.96507365, (float)0.96421117, +(float)0.96333838, (float)0.96245521, (float)0.96156180, (float)0.96065807, (float)0.95974404, +(float)0.95881969, (float)0.95788515, (float)0.95694029, (float)0.95598525, (float)0.95501995, +(float)0.95404440, (float)0.95305860, (float)0.95206267, (float)0.95105648, (float)0.95004016, +(float)0.94901365, (float)0.94797695, (float)0.94693011, (float)0.94587314, (float)0.94480604, +(float)0.94372880, (float)0.94264150, (float)0.94154406, (float)0.94043654, (float)0.93931895, +(float)0.93819129, (float)0.93705362, (float)0.93590593, (float)0.93474817, (float)0.93358046, +(float)0.93240267, (float)0.93121493, (float)0.93001723, (float)0.92880952, (float)0.92759192, +(float)0.92636436, (float)0.92512691, (float)0.92387950, (float)0.92262226, (float)0.92135507, +(float)0.92007804, (float)0.91879123, (float)0.91749448, (float)0.91618794, (float)0.91487157, +(float)0.91354543, (float)0.91220951, (float)0.91086382, (float)0.90950835, (float)0.90814310, +(float)0.90676820, (float)0.90538365, (float)0.90398932, (float)0.90258527, (float)0.90117157, +(float)0.89974827, (float)0.89831525, (float)0.89687276, (float)0.89542055, (float)0.89395875, +(float)0.89248741, (float)0.89100647, (float)0.88951600, (float)0.88801610, (float)0.88650662, +(float)0.88498759, (float)0.88345915, (float)0.88192123, (float)0.88037384, (float)0.87881714, +(float)0.87725091, (float)0.87567532, (float)0.87409031, (float)0.87249595, (float)0.87089223, +(float)0.86927933, (float)0.86765701, (float)0.86602539, (float)0.86438447, (float)0.86273432, +(float)0.86107504, (float)0.85940641, (float)0.85772860, (float)0.85604161, (float)0.85434544, +(float)0.85264009, (float)0.85092574, (float)0.84920216, (float)0.84746951, (float)0.84572780, +(float)0.84397697, (float)0.84221715, (float)0.84044844, (float)0.83867055, (float)0.83688372, +(float)0.83508795, (float)0.83328319, (float)0.83146954, (float)0.82964706, (float)0.82781565, +(float)0.82597530, (float)0.82412612, (float)0.82226813, (float)0.82040137, (float)0.81852591, +(float)0.81664157, (float)0.81474847, (float)0.81284660, (float)0.81093609, (float)0.80901700, +(float)0.80708915, (float)0.80515265, (float)0.80320752, (float)0.80125374, (float)0.79929143, +(float)0.79732066, (float)0.79534125, (float)0.79335332, (float)0.79135686, (float)0.78935200, +(float)0.78733861, (float)0.78531694, (float)0.78328675, (float)0.78124815, (float)0.77920121, +(float)0.77714586, (float)0.77508223, (float)0.77301049, (float)0.77093029, (float)0.76884180, +(float)0.76674509, (float)0.76464021, (float)0.76252711, (float)0.76040596, (float)0.75827658, +(float)0.75613904, (float)0.75399339, (float)0.75183970, (float)0.74967796, (float)0.74750835, +(float)0.74533057, (float)0.74314481, (float)0.74095106, (float)0.73874938, (float)0.73653996, +(float)0.73432249, (float)0.73209721, (float)0.72986400, (float)0.72762305, (float)0.72537428, +(float)0.72311789, (float)0.72085363, (float)0.71858162, (float)0.71630186, (float)0.71401453, +(float)0.71171951, (float)0.70941705, (float)0.70710677, (float)0.70478898, (float)0.70246363, +(float)0.70013070, (float)0.69779032, (float)0.69544268, (float)0.69308734, (float)0.69072461, +(float)0.68835449, (float)0.68597704, (float)0.68359220, (float)0.68120021, (float)0.67880070, +(float)0.67639399, (float)0.67398006, (float)0.67155886, (float)0.66913044, (float)0.66669512, +(float)0.66425240, (float)0.66180259, (float)0.65934575, (float)0.65688181, (float)0.65441096, +(float)0.65193301, (float)0.64944804, (float)0.64695609, (float)0.64445722, (float)0.64195150, +(float)0.63943905, (float)0.63691956, (float)0.63439327, (float)0.63186014, (float)0.62932026, +(float)0.62677372, (float)0.62422055, (float)0.62166059, (float)0.61909389, (float)0.61652064, +(float)0.61394072, (float)0.61135429, (float)0.60876143, (float)0.60616189, (float)0.60355592, +(float)0.60094339, (float)0.59832448, (float)0.59569913, (float)0.59306765, (float)0.59042960, +(float)0.58778518, (float)0.58513451, (float)0.58247757, (float)0.57981461, (float)0.57714522, +(float)0.57446963, (float)0.57178789, (float)0.56910002, (float)0.56640613, (float)0.56370628, +(float)0.56100023, (float)0.55828822, (float)0.55557019, (float)0.55284619, (float)0.55011630, +(float)0.54738069, (float)0.54463905, (float)0.54189152, (float)0.53913826, (float)0.53637916, +(float)0.53361434, (float)0.53084403, (float)0.52806783, (float)0.52528596, (float)0.52249849, +(float)0.51970541, (float)0.51690674, (float)0.51410276, (float)0.51129305, (float)0.50847787, +(float)0.50565726, (float)0.50283122, (float)0.50000006, (float)0.49716327, (float)0.49432117, +(float)0.49147379, (float)0.48862115, (float)0.48576325, (float)0.48290038, (float)0.48003212, +(float)0.47715873, (float)0.47428021, (float)0.47139663, (float)0.46850798, (float)0.46561456, +(float)0.46271589, (float)0.45981231, (float)0.45690379, (float)0.45399037, (float)0.45107210, +(float)0.44814920, (float)0.44522130, (float)0.44228864, (float)0.43935123, (float)0.43640912, +(float)0.43346232, (float)0.43051112, (float)0.42755505, (float)0.42459446, (float)0.42162928, +(float)0.41865960, (float)0.41568545, (float)0.41270703, (float)0.40972400, (float)0.40673658, +(float)0.40374479, (float)0.40074870, (float)0.39774850, (float)0.39474386, (float)0.39173496, +(float)0.38872188, (float)0.38570464, (float)0.38268328, (float)0.37965804, (float)0.37662849, +(float)0.37359491, (float)0.37055734, (float)0.36751580, (float)0.36447033, (float)0.36142117, +(float)0.35836792, (float)0.35531086, (float)0.35224995, (float)0.34918529, (float)0.34611690, +(float)0.34304500, (float)0.33996922, (float)0.33688980, (float)0.33380675, (float)0.33072016, +(float)0.32763001, (float)0.32453656, (float)0.32143945, (float)0.31833887, (float)0.31523487, +(float)0.31212750, (float)0.30901679, (float)0.30590302, (float)0.30278572, (float)0.29966521, +(float)0.29654145, (float)0.29341453, (float)0.29028472, (float)0.28715155, (float)0.28401530, +(float)0.28087601, (float)0.27773371, (float)0.27458847, (float)0.27144048, (float)0.26828936, +(float)0.26513538, (float)0.26197854, (float)0.25881892, (float)0.25565651, (float)0.25249159, +(float)0.24932374, (float)0.24615324, (float)0.24298008, (float)0.23980433, (float)0.23662600, +(float)0.23344538, (float)0.23026201, (float)0.22707619, (float)0.22388794, (float)0.22069728, +(float)0.21750426, (float)0.21430916, (float)0.21111152, (float)0.20791161, (float)0.20470949, +(float)0.20150517, (float)0.19829892, (float)0.19509031, (float)0.19187963, (float)0.18866688, +(float)0.18545210, (float)0.18223536, (float)0.17901689, (float)0.17579627, (float)0.17257376, +(float)0.16934940, (float)0.16612324, (float)0.16289529, (float)0.15966584, (float)0.15643445, +(float)0.15320137, (float)0.14996666, (float)0.14673033, (float)0.14349243, (float)0.14025325, +(float)0.13701232, (float)0.13376991, (float)0.13052608, (float)0.12728085, (float)0.12403426, +(float)0.12078657, (float)0.11753736, (float)0.11428688, (float)0.11103519, (float)0.10778230, +(float)0.10452849, (float)0.10127334, (float)0.09801710, (float)0.09475980, (float)0.09150149, +(float)0.08824220, (float)0.08498220, (float)0.08172106, (float)0.07845904, (float)0.07519618, +(float)0.07193252, (float)0.06866808, (float)0.06540315, (float)0.06213728, (float)0.05887074, +(float)0.05560357, (float)0.05233581, (float)0.04906749, (float)0.04579888, (float)0.04252954, +(float)0.03925974, (float)0.03598953, (float)0.03271893, (float)0.02944798, (float)0.02617695, +(float)0.02290541, (float)0.01963361, (float)0.01636161, (float)0.01308943, (float)0.00981712, +(float)0.00654493, (float)0.00327244, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000, +(float)0.00000000, (float)0.00000000, (float)0.00000000, (float)0.00000000}; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_WINDOWS_PRIVATE_H_ diff --git a/src/modules/audio_processing/utility/Android.mk b/src/modules/audio_processing/utility/Android.mk new file mode 100644 index 0000000000..7e758cea29 --- /dev/null +++ b/src/modules/audio_processing/utility/Android.mk @@ -0,0 +1,49 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +LOCAL_PATH := $(call my-dir) + +include $(CLEAR_VARS) + +LOCAL_ARM_MODE := arm +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +LOCAL_MODULE := libwebrtc_apm_utility +LOCAL_MODULE_TAGS := optional +LOCAL_GENERATED_SOURCES := +LOCAL_SRC_FILES := fft4g.c \ + ring_buffer.c + +# Flags passed to both C and C++ files. +MY_CFLAGS := +MY_CFLAGS_C := +MY_DEFS := '-DNO_TCMALLOC' \ + '-DNO_HEAPCHECKER' \ + '-DWEBRTC_TARGET_PC' \ + '-DWEBRTC_LINUX' \ + '-DWEBRTC_THREAD_RR' \ + '-DWEBRTC_ANDROID' \ + '-DANDROID' +LOCAL_CFLAGS := $(MY_CFLAGS_C) $(MY_CFLAGS) $(MY_DEFS) + +# Include paths placed before CFLAGS/CPPFLAGS +LOCAL_C_INCLUDES := \ + $(LOCAL_PATH) + +# Flags passed to only C++ (and not C) files. +LOCAL_CPPFLAGS := +LOCAL_LDFLAGS := + +LOCAL_STATIC_LIBRARIES := + +LOCAL_SHARED_LIBRARIES := libcutils \ + libdl \ + libstlport +LOCAL_ADDITIONAL_DEPENDENCIES := + +include external/stlport/libstlport.mk +include $(BUILD_STATIC_LIBRARY) diff --git a/src/modules/audio_processing/utility/fft4g.c b/src/modules/audio_processing/utility/fft4g.c new file mode 100644 index 0000000000..9a84368c41 --- /dev/null +++ b/src/modules/audio_processing/utility/fft4g.c @@ -0,0 +1,1356 @@ +/* + * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html + * Copyright Takuya OOURA, 1996-2001 + * + * You may use, copy, modify and distribute this code for any purpose (include + * commercial use) and without fee. Please refer to this package when you modify + * this code. + * + * Changes: + * Trivial type modifications by the WebRTC authors. + */ + +/* +Fast Fourier/Cosine/Sine Transform + dimension :one + data length :power of 2 + decimation :frequency + radix :4, 2 + data :inplace + table :use +functions + cdft: Complex Discrete Fourier Transform + rdft: Real Discrete Fourier Transform + ddct: Discrete Cosine Transform + ddst: Discrete Sine Transform + dfct: Cosine Transform of RDFT (Real Symmetric DFT) + dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) +function prototypes + void cdft(int, int, float *, int *, float *); + void rdft(int, int, float *, int *, float *); + void ddct(int, int, float *, int *, float *); + void ddst(int, int, float *, int *, float *); + void dfct(int, float *, float *, int *, float *); + void dfst(int, float *, float *, int *, float *); + + +-------- Complex DFT (Discrete Fourier Transform) -------- + [definition] + <case1> + X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n + <case2> + X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n + (notes: sum_j=0^n-1 is a summation from j=0 to n-1) + [usage] + <case1> + ip[0] = 0; // first time only + cdft(2*n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + cdft(2*n, -1, a, ip, w); + [parameters] + 2*n :data length (int) + n >= 1, n = power of 2 + a[0...2*n-1] :input/output data (float *) + input data + a[2*j] = Re(x[j]), + a[2*j+1] = Im(x[j]), 0<=j<n + output data + a[2*k] = Re(X[k]), + a[2*k+1] = Im(X[k]), 0<=k<n + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n) + strictly, + length of ip >= + 2+(1<<(int)(log(n+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n/2-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + cdft(2*n, -1, a, ip, w); + is + cdft(2*n, 1, a, ip, w); + for (j = 0; j <= 2 * n - 1; j++) { + a[j] *= 1.0 / n; + } + . + + +-------- Real DFT / Inverse of Real DFT -------- + [definition] + <case1> RDFT + R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2 + I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2 + <case2> IRDFT (excluding scale) + a[k] = (R[0] + R[n/2]*cos(pi*k))/2 + + sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) + + sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n + [usage] + <case1> + ip[0] = 0; // first time only + rdft(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + rdft(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + <case1> + output data + a[2*k] = R[k], 0<=k<n/2 + a[2*k+1] = I[k], 0<k<n/2 + a[1] = R[n/2] + <case2> + input data + a[2*j] = R[j], 0<=j<n/2 + a[2*j+1] = I[j], 0<j<n/2 + a[1] = R[n/2] + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n/2-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + rdft(n, 1, a, ip, w); + is + rdft(n, -1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- DCT (Discrete Cosine Transform) / Inverse of DCT -------- + [definition] + <case1> IDCT (excluding scale) + C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n + <case2> DCT + C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n + [usage] + <case1> + ip[0] = 0; // first time only + ddct(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + ddct(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + output data + a[k] = C[k], 0<=k<n + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/4-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddct(n, -1, a, ip, w); + is + a[0] *= 0.5; + ddct(n, 1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- DST (Discrete Sine Transform) / Inverse of DST -------- + [definition] + <case1> IDST (excluding scale) + S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n + <case2> DST + S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n + [usage] + <case1> + ip[0] = 0; // first time only + ddst(n, 1, a, ip, w); + <case2> + ip[0] = 0; // first time only + ddst(n, -1, a, ip, w); + [parameters] + n :data length (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + <case1> + input data + a[j] = A[j], 0<j<n + a[0] = A[n] + output data + a[k] = S[k], 0<=k<n + <case2> + output data + a[k] = S[k], 0<k<n + a[0] = S[n] + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/2) + strictly, + length of ip >= + 2+(1<<(int)(log(n/2+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/4-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + ddst(n, -1, a, ip, w); + is + a[0] *= 0.5; + ddst(n, 1, a, ip, w); + for (j = 0; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- Cosine Transform of RDFT (Real Symmetric DFT) -------- + [definition] + C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n + [usage] + ip[0] = 0; // first time only + dfct(n, a, t, ip, w); + [parameters] + n :data length - 1 (int) + n >= 2, n = power of 2 + a[0...n] :input/output data (float *) + output data + a[k] = C[k], 0<=k<=n + t[0...n/2] :work area (float *) + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/4) + strictly, + length of ip >= + 2+(1<<(int)(log(n/4+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/8-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + a[0] *= 0.5; + a[n] *= 0.5; + dfct(n, a, t, ip, w); + is + a[0] *= 0.5; + a[n] *= 0.5; + dfct(n, a, t, ip, w); + for (j = 0; j <= n; j++) { + a[j] *= 2.0 / n; + } + . + + +-------- Sine Transform of RDFT (Real Anti-symmetric DFT) -------- + [definition] + S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n + [usage] + ip[0] = 0; // first time only + dfst(n, a, t, ip, w); + [parameters] + n :data length + 1 (int) + n >= 2, n = power of 2 + a[0...n-1] :input/output data (float *) + output data + a[k] = S[k], 0<k<n + (a[0] is used for work area) + t[0...n/2-1] :work area (float *) + ip[0...*] :work area for bit reversal (int *) + length of ip >= 2+sqrt(n/4) + strictly, + length of ip >= + 2+(1<<(int)(log(n/4+0.5)/log(2))/2). + ip[0],ip[1] are pointers of the cos/sin table. + w[0...n*5/8-1] :cos/sin table (float *) + w[],ip[] are initialized if ip[0] == 0. + [remark] + Inverse of + dfst(n, a, t, ip, w); + is + dfst(n, a, t, ip, w); + for (j = 1; j <= n - 1; j++) { + a[j] *= 2.0 / n; + } + . + + +Appendix : + The cos/sin table is recalculated when the larger table required. + w[] and ip[] are compatible with all routines. +*/ + +void cdft(int n, int isgn, float *a, int *ip, float *w) +{ + void makewt(int nw, int *ip, float *w); + void bitrv2(int n, int *ip, float *a); + void bitrv2conj(int n, int *ip, float *a); + void cftfsub(int n, float *a, float *w); + void cftbsub(int n, float *a, float *w); + + if (n > (ip[0] << 2)) { + makewt(n >> 2, ip, w); + } + if (n > 4) { + if (isgn >= 0) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + } else { + bitrv2conj(n, ip + 2, a); + cftbsub(n, a, w); + } + } else if (n == 4) { + cftfsub(n, a, w); + } +} + + +void rdft(int n, int isgn, float *a, int *ip, float *w) +{ + void makewt(int nw, int *ip, float *w); + void makect(int nc, int *ip, float *c); + void bitrv2(int n, int *ip, float *a); + void cftfsub(int n, float *a, float *w); + void cftbsub(int n, float *a, float *w); + void rftfsub(int n, float *a, int nc, float *c); + void rftbsub(int n, float *a, int nc, float *c); + int nw, nc; + float xi; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 2)) { + nc = n >> 2; + makect(nc, ip, w + nw); + } + if (isgn >= 0) { + if (n > 4) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, w); + } + xi = a[0] - a[1]; + a[0] += a[1]; + a[1] = xi; + } else { + a[1] = 0.5f * (a[0] - a[1]); + a[0] -= a[1]; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } else if (n == 4) { + cftfsub(n, a, w); + } + } +} + + +void ddct(int n, int isgn, float *a, int *ip, float *w) +{ + void makewt(int nw, int *ip, float *w); + void makect(int nc, int *ip, float *c); + void bitrv2(int n, int *ip, float *a); + void cftfsub(int n, float *a, float *w); + void cftbsub(int n, float *a, float *w); + void rftfsub(int n, float *a, int nc, float *c); + void rftbsub(int n, float *a, int nc, float *c); + void dctsub(int n, float *a, int nc, float *c); + int j, nw, nc; + float xr; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + if (isgn < 0) { + xr = a[n - 1]; + for (j = n - 2; j >= 2; j -= 2) { + a[j + 1] = a[j] - a[j - 1]; + a[j] += a[j - 1]; + } + a[1] = a[0] - xr; + a[0] += xr; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } else if (n == 4) { + cftfsub(n, a, w); + } + } + dctsub(n, a, nc, w + nw); + if (isgn >= 0) { + if (n > 4) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, w); + } + xr = a[0] - a[1]; + a[0] += a[1]; + for (j = 2; j < n; j += 2) { + a[j - 1] = a[j] - a[j + 1]; + a[j] += a[j + 1]; + } + a[n - 1] = xr; + } +} + + +void ddst(int n, int isgn, float *a, int *ip, float *w) +{ + void makewt(int nw, int *ip, float *w); + void makect(int nc, int *ip, float *c); + void bitrv2(int n, int *ip, float *a); + void cftfsub(int n, float *a, float *w); + void cftbsub(int n, float *a, float *w); + void rftfsub(int n, float *a, int nc, float *c); + void rftbsub(int n, float *a, int nc, float *c); + void dstsub(int n, float *a, int nc, float *c); + int j, nw, nc; + float xr; + + nw = ip[0]; + if (n > (nw << 2)) { + nw = n >> 2; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > nc) { + nc = n; + makect(nc, ip, w + nw); + } + if (isgn < 0) { + xr = a[n - 1]; + for (j = n - 2; j >= 2; j -= 2) { + a[j + 1] = -a[j] - a[j - 1]; + a[j] -= a[j - 1]; + } + a[1] = a[0] + xr; + a[0] -= xr; + if (n > 4) { + rftbsub(n, a, nc, w + nw); + bitrv2(n, ip + 2, a); + cftbsub(n, a, w); + } else if (n == 4) { + cftfsub(n, a, w); + } + } + dstsub(n, a, nc, w + nw); + if (isgn >= 0) { + if (n > 4) { + bitrv2(n, ip + 2, a); + cftfsub(n, a, w); + rftfsub(n, a, nc, w + nw); + } else if (n == 4) { + cftfsub(n, a, w); + } + xr = a[0] - a[1]; + a[0] += a[1]; + for (j = 2; j < n; j += 2) { + a[j - 1] = -a[j] - a[j + 1]; + a[j] -= a[j + 1]; + } + a[n - 1] = -xr; + } +} + + +void dfct(int n, float *a, float *t, int *ip, float *w) +{ + void makewt(int nw, int *ip, float *w); + void makect(int nc, int *ip, float *c); + void bitrv2(int n, int *ip, float *a); + void cftfsub(int n, float *a, float *w); + void rftfsub(int n, float *a, int nc, float *c); + void dctsub(int n, float *a, int nc, float *c); + int j, k, l, m, mh, nw, nc; + float xr, xi, yr, yi; + + nw = ip[0]; + if (n > (nw << 3)) { + nw = n >> 3; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 1)) { + nc = n >> 1; + makect(nc, ip, w + nw); + } + m = n >> 1; + yi = a[m]; + xi = a[0] + a[n]; + a[0] -= a[n]; + t[0] = xi - yi; + t[m] = xi + yi; + if (n > 2) { + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + xr = a[j] - a[n - j]; + xi = a[j] + a[n - j]; + yr = a[k] - a[n - k]; + yi = a[k] + a[n - k]; + a[j] = xr; + a[k] = yr; + t[j] = xi - yi; + t[k] = xi + yi; + } + t[mh] = a[mh] + a[n - mh]; + a[mh] -= a[n - mh]; + dctsub(m, a, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, a); + cftfsub(m, a, w); + rftfsub(m, a, nc, w + nw); + } else if (m == 4) { + cftfsub(m, a, w); + } + a[n - 1] = a[0] - a[1]; + a[1] = a[0] + a[1]; + for (j = m - 2; j >= 2; j -= 2) { + a[2 * j + 1] = a[j] + a[j + 1]; + a[2 * j - 1] = a[j] - a[j + 1]; + } + l = 2; + m = mh; + while (m >= 2) { + dctsub(m, t, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, t); + cftfsub(m, t, w); + rftfsub(m, t, nc, w + nw); + } else if (m == 4) { + cftfsub(m, t, w); + } + a[n - l] = t[0] - t[1]; + a[l] = t[0] + t[1]; + k = 0; + for (j = 2; j < m; j += 2) { + k += l << 2; + a[k - l] = t[j] - t[j + 1]; + a[k + l] = t[j] + t[j + 1]; + } + l <<= 1; + mh = m >> 1; + for (j = 0; j < mh; j++) { + k = m - j; + t[j] = t[m + k] - t[m + j]; + t[k] = t[m + k] + t[m + j]; + } + t[mh] = t[m + mh]; + m = mh; + } + a[l] = t[0]; + a[n] = t[2] - t[1]; + a[0] = t[2] + t[1]; + } else { + a[1] = a[0]; + a[2] = t[0]; + a[0] = t[1]; + } +} + + +void dfst(int n, float *a, float *t, int *ip, float *w) +{ + void makewt(int nw, int *ip, float *w); + void makect(int nc, int *ip, float *c); + void bitrv2(int n, int *ip, float *a); + void cftfsub(int n, float *a, float *w); + void rftfsub(int n, float *a, int nc, float *c); + void dstsub(int n, float *a, int nc, float *c); + int j, k, l, m, mh, nw, nc; + float xr, xi, yr, yi; + + nw = ip[0]; + if (n > (nw << 3)) { + nw = n >> 3; + makewt(nw, ip, w); + } + nc = ip[1]; + if (n > (nc << 1)) { + nc = n >> 1; + makect(nc, ip, w + nw); + } + if (n > 2) { + m = n >> 1; + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + xr = a[j] + a[n - j]; + xi = a[j] - a[n - j]; + yr = a[k] + a[n - k]; + yi = a[k] - a[n - k]; + a[j] = xr; + a[k] = yr; + t[j] = xi + yi; + t[k] = xi - yi; + } + t[0] = a[mh] - a[n - mh]; + a[mh] += a[n - mh]; + a[0] = a[m]; + dstsub(m, a, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, a); + cftfsub(m, a, w); + rftfsub(m, a, nc, w + nw); + } else if (m == 4) { + cftfsub(m, a, w); + } + a[n - 1] = a[1] - a[0]; + a[1] = a[0] + a[1]; + for (j = m - 2; j >= 2; j -= 2) { + a[2 * j + 1] = a[j] - a[j + 1]; + a[2 * j - 1] = -a[j] - a[j + 1]; + } + l = 2; + m = mh; + while (m >= 2) { + dstsub(m, t, nc, w + nw); + if (m > 4) { + bitrv2(m, ip + 2, t); + cftfsub(m, t, w); + rftfsub(m, t, nc, w + nw); + } else if (m == 4) { + cftfsub(m, t, w); + } + a[n - l] = t[1] - t[0]; + a[l] = t[0] + t[1]; + k = 0; + for (j = 2; j < m; j += 2) { + k += l << 2; + a[k - l] = -t[j] - t[j + 1]; + a[k + l] = t[j] - t[j + 1]; + } + l <<= 1; + mh = m >> 1; + for (j = 1; j < mh; j++) { + k = m - j; + t[j] = t[m + k] + t[m + j]; + t[k] = t[m + k] - t[m + j]; + } + t[0] = t[m + mh]; + m = mh; + } + a[l] = t[0]; + } + a[0] = 0; +} + + +/* -------- initializing routines -------- */ + + +#include <math.h> + +void makewt(int nw, int *ip, float *w) +{ + void bitrv2(int n, int *ip, float *a); + int j, nwh; + float delta, x, y; + + ip[0] = nw; + ip[1] = 1; + if (nw > 2) { + nwh = nw >> 1; + delta = (float)atan(1.0f) / nwh; + w[0] = 1; + w[1] = 0; + w[nwh] = (float)cos(delta * nwh); + w[nwh + 1] = w[nwh]; + if (nwh > 2) { + for (j = 2; j < nwh; j += 2) { + x = (float)cos(delta * j); + y = (float)sin(delta * j); + w[j] = x; + w[j + 1] = y; + w[nw - j] = y; + w[nw - j + 1] = x; + } + bitrv2(nw, ip + 2, w); + } + } +} + + +void makect(int nc, int *ip, float *c) +{ + int j, nch; + float delta; + + ip[1] = nc; + if (nc > 1) { + nch = nc >> 1; + delta = (float)atan(1.0f) / nch; + c[0] = (float)cos(delta * nch); + c[nch] = 0.5f * c[0]; + for (j = 1; j < nch; j++) { + c[j] = 0.5f * (float)cos(delta * j); + c[nc - j] = 0.5f * (float)sin(delta * j); + } + } +} + + +/* -------- child routines -------- */ + + +void bitrv2(int n, int *ip, float *a) +{ + int j, j1, k, k1, l, m, m2; + float xr, xi, yr, yi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + m2 = 2 * m; + if ((m << 3) == l) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 -= m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + j1 = 2 * k + m2 + ip[k]; + k1 = j1 + m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } else { + for (k = 1; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += m2; + xr = a[j1]; + xi = a[j1 + 1]; + yr = a[k1]; + yi = a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + } + } +} + + +void bitrv2conj(int n, int *ip, float *a) +{ + int j, j1, k, k1, l, m, m2; + float xr, xi, yr, yi; + + ip[0] = 0; + l = n; + m = 1; + while ((m << 3) < l) { + l >>= 1; + for (j = 0; j < m; j++) { + ip[m + j] = ip[j] + l; + } + m <<= 1; + } + m2 = 2 * m; + if ((m << 3) == l) { + for (k = 0; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 -= m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += 2 * m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 2 * k + ip[k]; + a[k1 + 1] = -a[k1 + 1]; + j1 = k1 + m2; + k1 = j1 + m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + k1 += m2; + a[k1 + 1] = -a[k1 + 1]; + } + } else { + a[1] = -a[1]; + a[m2 + 1] = -a[m2 + 1]; + for (k = 1; k < m; k++) { + for (j = 0; j < k; j++) { + j1 = 2 * j + ip[k]; + k1 = 2 * k + ip[j]; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + j1 += m2; + k1 += m2; + xr = a[j1]; + xi = -a[j1 + 1]; + yr = a[k1]; + yi = -a[k1 + 1]; + a[j1] = yr; + a[j1 + 1] = yi; + a[k1] = xr; + a[k1 + 1] = xi; + } + k1 = 2 * k + ip[k]; + a[k1 + 1] = -a[k1 + 1]; + a[k1 + m2 + 1] = -a[k1 + m2 + 1]; + } + } +} + + +void cftfsub(int n, float *a, float *w) +{ + void cft1st(int n, float *a, float *w); + void cftmdl(int n, int l, float *a, float *w); + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 2; + if (n > 8) { + cft1st(n, a, w); + l = 8; + while ((l << 2) < n) { + cftmdl(n, l, a, w); + l <<= 2; + } + } + if ((l << 2) == n) { + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + } else { + for (j = 0; j < l; j += 2) { + j1 = j + l; + x0r = a[j] - a[j1]; + x0i = a[j + 1] - a[j1 + 1]; + a[j] += a[j1]; + a[j + 1] += a[j1 + 1]; + a[j1] = x0r; + a[j1 + 1] = x0i; + } + } +} + + +void cftbsub(int n, float *a, float *w) +{ + void cft1st(int n, float *a, float *w); + void cftmdl(int n, int l, float *a, float *w); + int j, j1, j2, j3, l; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + l = 2; + if (n > 8) { + cft1st(n, a, w); + l = 8; + while ((l << 2) < n) { + cftmdl(n, l, a, w); + l <<= 2; + } + } + if ((l << 2) == n) { + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = -a[j + 1] - a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = -a[j + 1] + a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i - x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i + x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i - x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i + x3r; + } + } else { + for (j = 0; j < l; j += 2) { + j1 = j + l; + x0r = a[j] - a[j1]; + x0i = -a[j + 1] + a[j1 + 1]; + a[j] += a[j1]; + a[j + 1] = -a[j + 1] - a[j1 + 1]; + a[j1] = x0r; + a[j1 + 1] = x0i; + } + } +} + + +void cft1st(int n, float *a, float *w) +{ + int j, k1, k2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + x0r = a[0] + a[2]; + x0i = a[1] + a[3]; + x1r = a[0] - a[2]; + x1i = a[1] - a[3]; + x2r = a[4] + a[6]; + x2i = a[5] + a[7]; + x3r = a[4] - a[6]; + x3i = a[5] - a[7]; + a[0] = x0r + x2r; + a[1] = x0i + x2i; + a[4] = x0r - x2r; + a[5] = x0i - x2i; + a[2] = x1r - x3i; + a[3] = x1i + x3r; + a[6] = x1r + x3i; + a[7] = x1i - x3r; + wk1r = w[2]; + x0r = a[8] + a[10]; + x0i = a[9] + a[11]; + x1r = a[8] - a[10]; + x1i = a[9] - a[11]; + x2r = a[12] + a[14]; + x2i = a[13] + a[15]; + x3r = a[12] - a[14]; + x3i = a[13] - a[15]; + a[8] = x0r + x2r; + a[9] = x0i + x2i; + a[12] = x2i - x0i; + a[13] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[10] = wk1r * (x0r - x0i); + a[11] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[14] = wk1r * (x0i - x0r); + a[15] = wk1r * (x0i + x0r); + k1 = 0; + for (j = 16; j < n; j += 16) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + x0r = a[j] + a[j + 2]; + x0i = a[j + 1] + a[j + 3]; + x1r = a[j] - a[j + 2]; + x1i = a[j + 1] - a[j + 3]; + x2r = a[j + 4] + a[j + 6]; + x2i = a[j + 5] + a[j + 7]; + x3r = a[j + 4] - a[j + 6]; + x3i = a[j + 5] - a[j + 7]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 4] = wk2r * x0r - wk2i * x0i; + a[j + 5] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 2] = wk1r * x0r - wk1i * x0i; + a[j + 3] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 6] = wk3r * x0r - wk3i * x0i; + a[j + 7] = wk3r * x0i + wk3i * x0r; + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + x0r = a[j + 8] + a[j + 10]; + x0i = a[j + 9] + a[j + 11]; + x1r = a[j + 8] - a[j + 10]; + x1i = a[j + 9] - a[j + 11]; + x2r = a[j + 12] + a[j + 14]; + x2i = a[j + 13] + a[j + 15]; + x3r = a[j + 12] - a[j + 14]; + x3i = a[j + 13] - a[j + 15]; + a[j + 8] = x0r + x2r; + a[j + 9] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j + 12] = -wk2i * x0r - wk2r * x0i; + a[j + 13] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j + 10] = wk1r * x0r - wk1i * x0i; + a[j + 11] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j + 14] = wk3r * x0r - wk3i * x0i; + a[j + 15] = wk3r * x0i + wk3i * x0r; + } +} + + +void cftmdl(int n, int l, float *a, float *w) +{ + int j, j1, j2, j3, k, k1, k2, m, m2; + float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; + float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + + m = l << 2; + for (j = 0; j < l; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x0r - x2r; + a[j2 + 1] = x0i - x2i; + a[j1] = x1r - x3i; + a[j1 + 1] = x1i + x3r; + a[j3] = x1r + x3i; + a[j3 + 1] = x1i - x3r; + } + wk1r = w[2]; + for (j = m; j < l + m; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + a[j2] = x2i - x0i; + a[j2 + 1] = x0r - x2r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * (x0r - x0i); + a[j1 + 1] = wk1r * (x0r + x0i); + x0r = x3i + x1r; + x0i = x3r - x1i; + a[j3] = wk1r * (x0i - x0r); + a[j3 + 1] = wk1r * (x0i + x0r); + } + k1 = 0; + m2 = 2 * m; + for (k = m2; k < n; k += m2) { + k1 += 2; + k2 = 2 * k1; + wk2r = w[k1]; + wk2i = w[k1 + 1]; + wk1r = w[k2]; + wk1i = w[k2 + 1]; + wk3r = wk1r - 2 * wk2i * wk1i; + wk3i = 2 * wk2i * wk1r - wk1i; + for (j = k; j < l + k; j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = wk2r * x0r - wk2i * x0i; + a[j2 + 1] = wk2r * x0i + wk2i * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + wk1r = w[k2 + 2]; + wk1i = w[k2 + 3]; + wk3r = wk1r - 2 * wk2r * wk1i; + wk3i = 2 * wk2r * wk1r - wk1i; + for (j = k + m; j < l + (k + m); j += 2) { + j1 = j + l; + j2 = j1 + l; + j3 = j2 + l; + x0r = a[j] + a[j1]; + x0i = a[j + 1] + a[j1 + 1]; + x1r = a[j] - a[j1]; + x1i = a[j + 1] - a[j1 + 1]; + x2r = a[j2] + a[j3]; + x2i = a[j2 + 1] + a[j3 + 1]; + x3r = a[j2] - a[j3]; + x3i = a[j2 + 1] - a[j3 + 1]; + a[j] = x0r + x2r; + a[j + 1] = x0i + x2i; + x0r -= x2r; + x0i -= x2i; + a[j2] = -wk2i * x0r - wk2r * x0i; + a[j2 + 1] = -wk2i * x0i + wk2r * x0r; + x0r = x1r - x3i; + x0i = x1i + x3r; + a[j1] = wk1r * x0r - wk1i * x0i; + a[j1 + 1] = wk1r * x0i + wk1i * x0r; + x0r = x1r + x3i; + x0i = x1i - x3r; + a[j3] = wk3r * x0r - wk3i * x0i; + a[j3 + 1] = wk3r * x0i + wk3i * x0r; + } + } +} + + +void rftfsub(int n, float *a, int nc, float *c) +{ + int j, k, kk, ks, m; + float wkr, wki, xr, xi, yr, yi; + + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5f - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr - wki * xi; + yi = wkr * xi + wki * xr; + a[j] -= yr; + a[j + 1] -= yi; + a[k] += yr; + a[k + 1] -= yi; + } +} + + +void rftbsub(int n, float *a, int nc, float *c) +{ + int j, k, kk, ks, m; + float wkr, wki, xr, xi, yr, yi; + + a[1] = -a[1]; + m = n >> 1; + ks = 2 * nc / m; + kk = 0; + for (j = 2; j < m; j += 2) { + k = n - j; + kk += ks; + wkr = 0.5f - c[nc - kk]; + wki = c[kk]; + xr = a[j] - a[k]; + xi = a[j + 1] + a[k + 1]; + yr = wkr * xr + wki * xi; + yi = wkr * xi - wki * xr; + a[j] -= yr; + a[j + 1] = yi - a[j + 1]; + a[k] += yr; + a[k + 1] = yi - a[k + 1]; + } + a[m + 1] = -a[m + 1]; +} + + +void dctsub(int n, float *a, int nc, float *c) +{ + int j, k, kk, ks, m; + float wkr, wki, xr; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; j++) { + k = n - j; + kk += ks; + wkr = c[kk] - c[nc - kk]; + wki = c[kk] + c[nc - kk]; + xr = wki * a[j] - wkr * a[k]; + a[j] = wkr * a[j] + wki * a[k]; + a[k] = xr; + } + a[m] *= c[0]; +} + + +void dstsub(int n, float *a, int nc, float *c) +{ + int j, k, kk, ks, m; + float wkr, wki, xr; + + m = n >> 1; + ks = nc / n; + kk = 0; + for (j = 1; j < m; j++) { + k = n - j; + kk += ks; + wkr = c[kk] - c[nc - kk]; + wki = c[kk] + c[nc - kk]; + xr = wki * a[k] - wkr * a[j]; + a[k] = wkr * a[k] + wki * a[j]; + a[j] = xr; + } + a[m] *= c[0]; +} + diff --git a/src/modules/audio_processing/utility/fft4g.h b/src/modules/audio_processing/utility/fft4g.h new file mode 100644 index 0000000000..373ff14891 --- /dev/null +++ b/src/modules/audio_processing/utility/fft4g.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_FFT4G_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_FFT4G_H_ + +void rdft(int, int, float *, int *, float *); +void cdft(int, int, float *, int *, float *); + +#endif + diff --git a/src/modules/audio_processing/utility/ring_buffer.c b/src/modules/audio_processing/utility/ring_buffer.c new file mode 100644 index 0000000000..ea2e3544be --- /dev/null +++ b/src/modules/audio_processing/utility/ring_buffer.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Provides a generic ring buffer that can be written to and read from with + * arbitrarily sized blocks. The AEC uses this for several different tasks. + */ + +#include <stdlib.h> +#include <string.h> +#include "ring_buffer.h" + +typedef struct { + int readPos; + int writePos; + int size; + char rwWrap; + bufdata_t *data; +} buf_t; + +enum {SAME_WRAP, DIFF_WRAP}; + +int WebRtcApm_CreateBuffer(void **bufInst, int size) +{ + buf_t *buf = NULL; + + if (size < 0) { + return -1; + } + + buf = malloc(sizeof(buf_t)); + *bufInst = buf; + if (buf == NULL) { + return -1; + } + + buf->data = malloc(size*sizeof(bufdata_t)); + if (buf->data == NULL) { + free(buf); + buf = NULL; + return -1; + } + + buf->size = size; + return 0; +} + +int WebRtcApm_InitBuffer(void *bufInst) +{ + buf_t *buf = (buf_t*)bufInst; + + buf->readPos = 0; + buf->writePos = 0; + buf->rwWrap = SAME_WRAP; + + // Initialize buffer to zeros + memset(buf->data, 0, sizeof(bufdata_t)*buf->size); + + return 0; +} + +int WebRtcApm_FreeBuffer(void *bufInst) +{ + buf_t *buf = (buf_t*)bufInst; + + if (buf == NULL) { + return -1; + } + + free(buf->data); + free(buf); + + return 0; +} + +int WebRtcApm_ReadBuffer(void *bufInst, bufdata_t *data, int size) +{ + buf_t *buf = (buf_t*)bufInst; + int n = 0, margin = 0; + + if (size <= 0 || size > buf->size) { + return -1; + } + + n = size; + if (buf->rwWrap == DIFF_WRAP) { + margin = buf->size - buf->readPos; + if (n > margin) { + buf->rwWrap = SAME_WRAP; + memcpy(data, buf->data + buf->readPos, + sizeof(bufdata_t)*margin); + buf->readPos = 0; + n = size - margin; + } + else { + memcpy(data, buf->data + buf->readPos, + sizeof(bufdata_t)*n); + buf->readPos += n; + return n; + } + } + + if (buf->rwWrap == SAME_WRAP) { + margin = buf->writePos - buf->readPos; + if (margin > n) + margin = n; + memcpy(data + size - n, buf->data + buf->readPos, + sizeof(bufdata_t)*margin); + buf->readPos += margin; + n -= margin; + } + + return size - n; +} + +int WebRtcApm_WriteBuffer(void *bufInst, const bufdata_t *data, int size) +{ + buf_t *buf = (buf_t*)bufInst; + int n = 0, margin = 0; + + if (size < 0 || size > buf->size) { + return -1; + } + + n = size; + if (buf->rwWrap == SAME_WRAP) { + margin = buf->size - buf->writePos; + if (n > margin) { + buf->rwWrap = DIFF_WRAP; + memcpy(buf->data + buf->writePos, data, + sizeof(bufdata_t)*margin); + buf->writePos = 0; + n = size - margin; + } + else { + memcpy(buf->data + buf->writePos, data, + sizeof(bufdata_t)*n); + buf->writePos += n; + return n; + } + } + + if (buf->rwWrap == DIFF_WRAP) { + margin = buf->readPos - buf->writePos; + if (margin > n) + margin = n; + memcpy(buf->data + buf->writePos, data + size - n, + sizeof(bufdata_t)*margin); + buf->writePos += margin; + n -= margin; + } + + return size - n; +} + +int WebRtcApm_FlushBuffer(void *bufInst, int size) +{ + buf_t *buf = (buf_t*)bufInst; + int n = 0, margin = 0; + + if (size <= 0 || size > buf->size) { + return -1; + } + + n = size; + if (buf->rwWrap == DIFF_WRAP) { + margin = buf->size - buf->readPos; + if (n > margin) { + buf->rwWrap = SAME_WRAP; + buf->readPos = 0; + n = size - margin; + } + else { + buf->readPos += n; + return n; + } + } + + if (buf->rwWrap == SAME_WRAP) { + margin = buf->writePos - buf->readPos; + if (margin > n) + margin = n; + buf->readPos += margin; + n -= margin; + } + + return size - n; +} + +int WebRtcApm_StuffBuffer(void *bufInst, int size) +{ + buf_t *buf = (buf_t*)bufInst; + int n = 0, margin = 0; + + if (size <= 0 || size > buf->size) { + return -1; + } + + n = size; + if (buf->rwWrap == SAME_WRAP) { + margin = buf->readPos; + if (n > margin) { + buf->rwWrap = DIFF_WRAP; + buf->readPos = buf->size - 1; + n -= margin + 1; + } + else { + buf->readPos -= n; + return n; + } + } + + if (buf->rwWrap == DIFF_WRAP) { + margin = buf->readPos - buf->writePos; + if (margin > n) + margin = n; + buf->readPos -= margin; + n -= margin; + } + + return size - n; +} + +int WebRtcApm_get_buffer_size(const void *bufInst) +{ + const buf_t *buf = (buf_t*)bufInst; + + if (buf->rwWrap == SAME_WRAP) + return buf->writePos - buf->readPos; + else + return buf->size - buf->readPos + buf->writePos; +} diff --git a/src/modules/audio_processing/utility/ring_buffer.h b/src/modules/audio_processing/utility/ring_buffer.h new file mode 100644 index 0000000000..0fd261dfe9 --- /dev/null +++ b/src/modules/audio_processing/utility/ring_buffer.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * Specifies the interface for the AEC generic buffer. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_RING_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_RING_BUFFER_H_ + +// Determines buffer datatype +typedef short bufdata_t; + +// Unless otherwise specified, functions return 0 on success and -1 on error +int WebRtcApm_CreateBuffer(void **bufInst, int size); +int WebRtcApm_InitBuffer(void *bufInst); +int WebRtcApm_FreeBuffer(void *bufInst); + +// Returns number of samples read +int WebRtcApm_ReadBuffer(void *bufInst, bufdata_t *data, int size); + +// Returns number of samples written +int WebRtcApm_WriteBuffer(void *bufInst, const bufdata_t *data, int size); + +// Returns number of samples flushed +int WebRtcApm_FlushBuffer(void *bufInst, int size); + +// Returns number of samples stuffed +int WebRtcApm_StuffBuffer(void *bufInst, int size); + +// Returns number of samples in buffer +int WebRtcApm_get_buffer_size(const void *bufInst); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_RING_BUFFER_H_ diff --git a/src/modules/audio_processing/utility/util.gyp b/src/modules/audio_processing/utility/util.gyp new file mode 100644 index 0000000000..3348da8a9d --- /dev/null +++ b/src/modules/audio_processing/utility/util.gyp @@ -0,0 +1,36 @@ +# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +{ + 'includes': [ + '../../../common_settings.gypi', + ], + 'targets': [ + { + 'target_name': 'apm_util', + 'type': '<(library)', + 'direct_dependent_settings': { + 'include_dirs': [ + '.', + ], + }, + 'sources': [ + 'ring_buffer.c', + 'ring_buffer.h', + 'fft4g.c', + 'fft4g.h', + ], + }, + ], +} + +# Local Variables: +# tab-width:2 +# indent-tabs-mode:nil +# End: +# vim: set expandtab tabstop=2 shiftwidth=2: |