diff options
Diffstat (limited to 'celt/mdct.c')
-rw-r--r-- | celt/mdct.c | 113 |
1 files changed, 46 insertions, 67 deletions
diff --git a/celt/mdct.c b/celt/mdct.c index 16a36c6..90a214a 100644 --- a/celt/mdct.c +++ b/celt/mdct.c @@ -109,12 +109,14 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar int N, N2, N4; kiss_twiddle_scalar sine; VARDECL(kiss_fft_scalar, f); + VARDECL(kiss_fft_scalar, f2); SAVE_STACK; N = l->n; N >>= shift; N2 = N>>1; N4 = N>>2; ALLOC(f, N2, kiss_fft_scalar); + ALLOC(f2, N2, kiss_fft_scalar); /* sin(x) ~= x here */ #ifdef FIXED_POINT sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; @@ -131,7 +133,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar kiss_fft_scalar * OPUS_RESTRICT yp = f; const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; - for(i=0;i<(overlap>>2);i++) + for(i=0;i<((overlap+3)>>2);i++) { /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); @@ -143,7 +145,7 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar } wp1 = window; wp2 = window+overlap-1; - for(;i<N4-(overlap>>2);i++) + for(;i<N4-((overlap+3)>>2);i++) { /* Real part arranged as a-bR, Imag part arranged as -c-dR */ *yp++ = *xp2; @@ -180,12 +182,12 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar } /* N/4 complex FFT, down-scales by 4/N */ - opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in); + opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); /* Post-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT fp = in; + const kiss_fft_scalar * OPUS_RESTRICT fp = f2; kiss_fft_scalar * OPUS_RESTRICT yp1 = out; kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); const kiss_twiddle_scalar *t = &l->trig[0]; @@ -212,14 +214,12 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala int i; int N, N2, N4; kiss_twiddle_scalar sine; - VARDECL(kiss_fft_scalar, f); VARDECL(kiss_fft_scalar, f2); SAVE_STACK; N = l->n; N >>= shift; N2 = N>>1; N4 = N>>2; - ALLOC(f, N2, kiss_fft_scalar); ALLOC(f2, N2, kiss_fft_scalar); /* sin(x) ~= x here */ #ifdef FIXED_POINT @@ -249,81 +249,60 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala } /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-point */ - opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f); + opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>1))); - /* Post-rotate */ + /* Post-rotate and de-shuffle from both ends of the buffer at once to make + it in-place. */ { - kiss_fft_scalar * OPUS_RESTRICT fp = f; + kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); + kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; const kiss_twiddle_scalar *t = &l->trig[0]; - - for(i=0;i<N4;i++) + /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the + middle pair will be computed twice. */ + for(i=0;i<(N4+1)>>1;i++) { kiss_fft_scalar re, im, yr, yi; - re = fp[0]; - im = fp[1]; + kiss_twiddle_scalar t0, t1; + re = yp0[0]; + im = yp0[1]; + t0 = t[i<<shift]; + t1 = t[(N4-i)<<shift]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); - yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); + yr = S_MUL(re,t0) - S_MUL(im,t1); + yi = S_MUL(im,t0) + S_MUL(re,t1); + re = yp1[0]; + im = yp1[1]; /* works because the cos is nearly one */ - *fp++ = yr - S_MUL(yi,sine); - *fp++ = yi + S_MUL(yr,sine); - } - } - /* De-shuffle the components for the middle of the window only */ - { - const kiss_fft_scalar * OPUS_RESTRICT fp1 = f; - const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1; - kiss_fft_scalar * OPUS_RESTRICT yp = f2; - for(i = 0; i < N4; i++) - { - *yp++ =-*fp1; - *yp++ = *fp2; - fp1 += 2; - fp2 -= 2; + yp0[0] = -(yr - S_MUL(yi,sine)); + yp1[1] = yi + S_MUL(yr,sine); + + t0 = t[(N4-i-1)<<shift]; + t1 = t[(i+1)<<shift]; + /* We'd scale up by 2 here, but instead it's done when mixing the windows */ + yr = S_MUL(re,t0) - S_MUL(im,t1); + yi = S_MUL(im,t0) + S_MUL(re,t1); + /* works because the cos is nearly one */ + yp1[0] = -(yr - S_MUL(yi,sine)); + yp0[1] = yi + S_MUL(yr,sine); + yp0 += 2; + yp1 -= 2; } } - out -= (N2-overlap)>>1; + /* Mirror on both sides for TDAC */ { - kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1; - kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1; - kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2; - const opus_val16 * OPUS_RESTRICT wp1 = window; - const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; - for(i = 0; i< N4-overlap/2; i++) - { - *xp1 = *fp1; - xp1--; - fp1--; - } - for(; i < N4; i++) - { - kiss_fft_scalar x1; - x1 = *fp1--; - *yp1++ +=-MULT16_32_Q15(*wp1, x1); - *xp1-- += MULT16_32_Q15(*wp2, x1); - wp1++; - wp2--; - } - } - { - kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4; - kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2; - kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2); + kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; + kiss_fft_scalar * OPUS_RESTRICT yp1 = out; const opus_val16 * OPUS_RESTRICT wp1 = window; const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; - for(i = 0; i< N4-overlap/2; i++) - { - *xp2 = *fp2; - xp2++; - fp2++; - } - for(; i < N4; i++) + + for(i = 0; i < overlap/2; i++) { - kiss_fft_scalar x2; - x2 = *fp2++; - *yp2-- = MULT16_32_Q15(*wp1, x2); - *xp2++ = MULT16_32_Q15(*wp2, x2); + kiss_fft_scalar x1, x2; + x1 = *xp1; + x2 = *yp1; + *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); + *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); wp1++; wp2--; } |