aboutsummaryrefslogtreecommitdiff
path: root/tmv/kiss_fft_tm.h
diff options
context:
space:
mode:
Diffstat (limited to 'tmv/kiss_fft_tm.h')
-rw-r--r--tmv/kiss_fft_tm.h599
1 files changed, 599 insertions, 0 deletions
diff --git a/tmv/kiss_fft_tm.h b/tmv/kiss_fft_tm.h
new file mode 100644
index 0000000..0a69f3d
--- /dev/null
+++ b/tmv/kiss_fft_tm.h
@@ -0,0 +1,599 @@
+/* Copyright (C) 2007 Hong Zhiqian */
+/**
+ @file kiss_fft_tm.h
+ @author Hong Zhiqian
+ @brief Various compatibility routines for Speex (TriMedia version)
+*/
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ - Neither the name of the Xiph.org Foundation nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "_kiss_fft_guts_tm.h"
+
+#ifdef TM_ASM
+
+#include "profile_tm.h"
+
+#ifdef FIXED_POINT
+
+#define OVERRIDE_KFBFLY2
+static void kf_bfly2(
+ kiss_fft_cpx *Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register int * restrict Fout2;
+ register int * restrict tw1 = (int*)st->twiddles;
+ register int i, j;
+ register int _inv = !st->inverse;
+
+ Fout2 = (int*)Fout + m;
+
+ for ( i=0,j=0 ; i<m ; ++i,j+=4,tw1+=fstride )
+ { register int tw_10, ff_10, f2_10;
+
+ ff_10 = ld32x(Fout, i);
+ f2_10 = ld32x(Fout2, i);
+ tw_10 = ld32(tw1);
+
+ if ( _inv )
+ { TM_SHR(f2_10, f2_10, 1);
+ TM_SHR(ff_10, ff_10, 1);
+ }
+
+ TM_MUL(tw_10, tw_10, f2_10);
+ TM_SUB(f2_10, ff_10, tw_10);
+ TM_ADD(ff_10, ff_10, tw_10);
+
+ st32d(j, Fout2, f2_10);
+ st32d(j, Fout, ff_10);
+ }
+}
+
+#define OVERRIDE_KFBFLY4
+static void kf_bfly4(
+ kiss_fft_cpx *Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ const int m
+ )
+{
+ register int * restrict tw1;
+ register int * restrict tw2;
+ register int * restrict tw3;
+ register int * restrict Fout1;
+ register int * restrict Fout2;
+ register int * restrict Fout3;
+ register int i, j;
+ register int fstride2, fstride3;
+ register int _inv = !st->inverse;
+
+ tw3 = tw2 = tw1 = (int*)st->twiddles;
+ fstride2 = fstride << 1;
+ fstride3 = fstride * 3;
+
+ Fout1 = (int*)Fout + m;
+ Fout2 = (int*)Fout + (m << 1);
+ Fout3 = (int*)Fout + (m * 3);
+
+
+ for ( i=0,j=0 ; i<m ; ++i,j+=4,tw1+=fstride,tw2+=fstride2,tw3+=fstride3 )
+ { register int sc0, sc1, sc2, sc3, sc4, sc5;
+ register int ff0;
+
+ sc0 = ld32x(Fout1,i);
+ sc3 = ld32(tw1);
+ sc1 = ld32x(Fout2, i);
+ sc4 = ld32(tw2);
+ sc2 = ld32x(Fout3, i);
+ sc5 = ld32(tw3);
+ ff0 = ld32x(Fout,i);
+
+ if ( _inv )
+ {
+ TM_ADD(sc0, sc0, 0x00020002);
+ TM_ADD(sc1, sc1, 0x00020002);
+ TM_ADD(sc2, sc2, 0x00020002);
+ TM_ADD(ff0, ff0, 0x00020002);
+ TM_SHR(sc0, sc0, 2);
+ TM_SHR(sc1, sc1, 2);
+ TM_SHR(sc2, sc2, 2);
+ TM_SHR(ff0, ff0, 2);
+ }
+
+ TM_MUL(sc0, sc0, sc3);
+ TM_MUL(sc1, sc1, sc4);
+ TM_MUL(sc2, sc2, sc5);
+ TM_SUB(sc5, ff0, sc1);
+ TM_ADD(ff0, ff0, sc1);
+ TM_ADD(sc3, sc0, sc2);
+ TM_SUB(sc4, sc0, sc2);
+ TM_SUB(sc1, ff0, sc3);
+ TM_ADD(ff0, ff0, sc3);
+
+ st32d(j, Fout2, sc1);
+ st32d(j, Fout, ff0);
+
+ sc5 = funshift2(sc5, sc5);
+
+ if ( _inv )
+ { TM_ADD(ff0, sc5, sc4);
+ TM_SUB(sc1, sc5, sc4);
+ } else
+ { TM_ADD(sc1, sc5, sc4);
+ TM_SUB(ff0, sc5, sc4);
+ }
+
+ sc0 = funshift2(sc1, ff0);
+ sc2 = funshift2(ff0, sc1);
+
+ st32d(j, Fout1, sc0);
+ st32d(j, Fout3, sc2);
+ }
+}
+
+
+#define OVERRIDE_KFBFLY3
+static void kf_bfly3(
+ kiss_fft_cpx *Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register int * restrict tw1;
+ register int * restrict tw2;
+ register int * restrict Fout1;
+ register int * restrict Fout2;
+ register int epi;
+ register int i, j;
+ register int fstride2;
+ register int _inv = !st->inverse;
+
+ tw1 = tw2 = (int*)st->twiddles;
+ Fout1 = (int*)Fout + m;
+ Fout2 = (int*)Fout + (m << 1);
+ epi = tw1[fstride*m];
+ epi = pack16lsb(epi,epi);
+ fstride2 = fstride << 1;
+
+ for ( i=0,j=0 ; i<m ; ++i,j+=4,tw1+=fstride,tw2+=fstride2 )
+ { register int sc0, sc1, sc2, sc3, sc4, sc5;
+ register int ff0;
+
+ sc1 = ld32x(Fout1,i);
+ sc2 = ld32x(Fout2,i);
+ sc3 = ld32(tw1);
+ sc4 = ld32(tw2);
+ ff0 = ld32x(Fout,i);
+
+ if ( _inv )
+ {
+ TM_DIV(sc1, sc1, 3);
+ TM_DIV(sc2, sc2, 3);
+ TM_DIV(ff0, ff0, 3);
+ }
+
+ TM_MUL(sc1, sc1, sc3);
+ TM_MUL(sc2, sc2, sc4);
+ TM_ADD(sc3, sc1, sc2);
+ TM_SUB(sc0, sc1, sc2);
+ TM_SHR(sc4, sc3, 1);
+ TM_SUB(sc1, ff0, sc4);
+
+ sc0 = dspidualmul(sc0, epi);
+ sc0 = funshift2(sc0, sc0);
+
+ TM_ADD(ff0, ff0, sc3);
+ TM_ADD(sc4, sc1, sc0);
+ TM_SUB(sc5, sc1, sc0);
+
+ sc1 = funshift2(sc4, sc5);
+ sc2 = funshift2(sc5, sc4);
+ sc2 = funshift2(sc2, sc2);
+
+ st32d(j, Fout1, sc1);
+ st32d(j, Fout, ff0);
+ st32d(j, Fout2, sc2);
+ }
+}
+
+
+#define OVERRIDE_KFBFLY5
+static void kf_bfly5(
+ kiss_fft_cpx *Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register int * restrict tw1;
+ register int * restrict tw2;
+ register int * restrict tw3;
+ register int * restrict tw4;
+ register int * restrict Fout1;
+ register int * restrict Fout2;
+ register int * restrict Fout3;
+ register int * restrict Fout4;
+ register int fstride2, fstride3, fstride4;
+ register int i, j;
+ register int yab_msb, yab_lsb, yba_msb, yba_lsb;
+ register int _inv = !st->inverse;
+
+
+ Fout1=(int*)Fout+m;
+ Fout2=(int*)Fout+(m<<1);
+ Fout3=(int*)Fout+(3 *m);
+ Fout4=(int*)Fout+(m<<2);
+
+ tw1 = tw2 = tw3 = tw4 = (int*)st->twiddles;
+
+ i = tw1[fstride*m];
+ yab_lsb = tw1[fstride*(m<<1)];
+ yab_msb = pack16msb(i, yab_lsb);
+ yab_lsb = pack16lsb(i, yab_lsb);
+ yba_msb = funshift2(-sex16(yab_msb), yab_msb);
+ yba_lsb = funshift2(yab_lsb, yab_lsb);
+
+ fstride2 = fstride << 1;
+ fstride3 = fstride * 3;
+ fstride4 = fstride << 2;
+
+ for ( i=0,j=0 ; i<m ; ++i,j+=4,tw1+=fstride,tw2+=fstride2,tw3+=fstride3,tw4+=fstride4 )
+ { register int sc0, sc1, sc2, sc3, sc4, sc5, sc6;
+ register int sc7, sc8, sc9, sc10, sc11, sc12;
+ register int ff0, sc78_msb, sc78_lsb, sc90_msb, sc90_lsb;
+
+ sc0 = ld32x(Fout,i);
+ sc1 = ld32x(Fout1,i);
+ sc2 = ld32x(Fout2,i);
+ sc3 = ld32x(Fout3,i);
+ sc4 = ld32x(Fout4,i);
+ sc5 = ld32(tw1);
+ sc6 = ld32(tw2);
+ sc7 = ld32(tw3);
+ sc8 = ld32(tw4);
+
+ if ( _inv )
+ {
+ TM_DIV(sc0, sc0, 5);
+ TM_DIV(sc1, sc1, 5);
+ TM_DIV(sc2, sc2, 5);
+ TM_DIV(sc3, sc3, 5);
+ TM_DIV(sc4, sc4, 5);
+ }
+
+ ff0 = sc0;
+
+ TM_MUL(sc1, sc1, sc5);
+ TM_MUL(sc2, sc2, sc6);
+ TM_MUL(sc3, sc3, sc7);
+ TM_MUL(sc4, sc4, sc8);
+ TM_ADD(sc7, sc1, sc4);
+ TM_SUB(sc10,sc1, sc4);
+ TM_ADD(sc8, sc2, sc3);
+ TM_SUB(sc9, sc2, sc3);
+
+ TM_ADD(ff0, ff0, sc7);
+ TM_ADD(ff0, ff0, sc8);
+ st32d(j, Fout, ff0);
+
+ sc78_msb = pack16msb(sc7,sc8);
+ sc78_lsb = pack16lsb(sc7,sc8);
+ sc90_msb = pack16msb(sc10,sc9);
+ sc90_lsb = pack16lsb(sc10,sc9);
+
+ sc5 = pack16lsb( sround(ifir16(sc78_msb,yab_lsb)), sround(ifir16(sc78_lsb,yab_lsb)));
+ sc6 = pack16lsb(-sround(ifir16(sc90_lsb,yab_msb)), sround(ifir16(sc90_msb,yab_msb)));
+
+ TM_ADD(sc5, sc5, sc0);
+ TM_SUB(sc1, sc5, sc6);
+ TM_ADD(sc4, sc5, sc6);
+ st32d(j, Fout1, sc1);
+ st32d(j, Fout4, sc4);
+
+ sc11 = pack16lsb( sround(ifir16(sc78_msb,yba_lsb)), sround(ifir16(sc78_lsb,yba_lsb)));
+ sc12 = pack16lsb(-sround(ifir16(sc90_lsb,yba_msb)), sround(ifir16(sc90_msb,yba_msb)));
+
+ TM_ADD(sc11, sc11, sc0);
+ TM_ADD(sc2, sc11, sc12);
+ TM_SUB(sc3, sc11, sc12);
+ st32d(j, Fout2, sc2);
+ st32d(j, Fout3, sc3);
+
+ }
+}
+
+
+#define OVERRIDE_KF_BFLY_GENERIC
+static void kf_bfly_generic(
+ kiss_fft_cpx * restrict Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m,
+ int p
+ )
+{
+ register int _inv = !st->inverse;
+ register int i, j, k, l;
+ register int * restrict twiddles = (int*)st->twiddles;
+ register int Norig = st->nfft;
+
+ CHECKBUF(scratchbuf,nscratchbuf,p);
+
+ for ( i=0; i<m; ++i )
+ { register int sc10;
+
+ for ( j=0,k=i ; j<p ; ++j,k+=m )
+ { register int f10;
+
+ f10 = ld32x(Fout,k);
+
+ if ( _inv )
+ { TM_DIV(f10, f10, p);
+ }
+
+ st32d(j<<2, scratchbuf, f10);
+ }
+
+ for ( j=0,k=i,sc10=ld32(scratchbuf) ; j<p ; ++j,k+=m )
+ {
+ register int twidx = 0;
+ register int f10;
+
+ for ( l=1,f10 = sc10 ; l<p ; ++l )
+ { register int tw, sc;
+
+ twidx += fstride * k;
+ if ( twidx>=Norig )
+ { twidx -= Norig;
+ }
+
+ sc = ld32x(scratchbuf,l);
+ tw = ld32x(twiddles,twidx);
+
+ TM_MUL(sc, sc, tw);
+ TM_ADD(f10, f10, sc);
+ }
+ st32d(k<<2, Fout, f10);
+ }
+ }
+}
+
+#else
+
+#define OVERRIDE_KFBFLY2
+static void kf_bfly2(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register kiss_fft_cpx * restrict Fout2;
+ register kiss_fft_cpx * restrict tw1 = st->twiddles;
+
+ Fout2 = Fout + m;
+
+ do
+ {
+ register kiss_fft_cpx _fout2, _fout, t;
+
+ _fout2 = *Fout2;
+ _fout = *Fout;
+
+ C_MUL ( t, _fout2, *tw1);
+ C_SUB (_fout2, _fout, t);
+ C_ADD (_fout, _fout, t);
+
+ *Fout2 = _fout2;
+ *Fout = _fout;
+
+ tw1 += fstride;
+ ++Fout2;
+ ++Fout;
+
+ } while ( --m );
+}
+
+#define OVERRIDE_KFBFLY4
+static void kf_bfly4(
+ kiss_fft_cpx * Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register kiss_fft_cpx * restrict tw1,* restrict tw2,* restrict tw3;
+ register kiss_fft_cpx * restrict Fout1, * restrict Fout2, * restrict Fout3;
+ register int _inv = !st->inverse;
+
+ tw3 = tw2 = tw1 = st->twiddles;
+
+ Fout1 = Fout + m;
+ Fout2 = Fout + (m << 1);
+ Fout3 = Fout + (m * 3);
+
+ do {
+
+ register kiss_fft_cpx _fout;
+ register kiss_fft_cpx sc0, sc1, sc2, sc3, sc4, sc5;
+
+ _fout = *Fout;
+
+ C_MUL( sc0,*Fout1, *tw1);
+ C_MUL( sc1,*Fout2, *tw2);
+ C_MUL( sc2,*Fout3, *tw3);
+ C_SUB( sc5, _fout, sc1);
+ C_ADD( _fout, _fout, sc1);
+ C_ADD( sc3, sc0, sc2);
+ C_SUB( sc4, sc0, sc2);
+ C_SUB(*Fout2, _fout, sc3);
+ C_ADD( *Fout, _fout, sc3);
+
+ tw1 += fstride;
+ tw2 += (fstride << 1);
+ tw3 += (fstride * 3);
+
+ if ( _inv )
+ {
+ Fout1->r = sc5.r + sc4.i;
+ Fout1->i = sc5.i - sc4.r;
+ Fout3->r = sc5.r - sc4.i;
+ Fout3->i = sc5.i + sc4.r;
+ }
+ else
+ { Fout1->r = sc5.r - sc4.i;
+ Fout1->i = sc5.i + sc4.r;
+ Fout3->r = sc5.r + sc4.i;
+ Fout3->i = sc5.i - sc4.r;
+ }
+
+
+ ++Fout; ++Fout1; ++Fout2; ++Fout3;
+
+ } while(--m);
+}
+
+#define OVERRIDE_KFBFLY3
+static void kf_bfly3(
+ kiss_fft_cpx * Fout,
+ const int fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register kiss_fft_cpx * restrict Fout1, * restrict Fout2;
+ register kiss_fft_cpx * restrict tw1,* restrict tw2;
+ register float epi;
+
+ tw1 = tw2 = st->twiddles;
+ epi = st->twiddles[fstride*m].i;
+ Fout1 = Fout + m;
+ Fout2 = Fout + (m << 1);
+
+ do {
+
+ register kiss_fft_cpx _fout;
+ register kiss_fft_cpx sc0, sc1, sc2, sc3;
+
+ _fout = *Fout;
+
+ C_MUL( sc1, *Fout1, *tw1);
+ C_MUL( sc2, *Fout2, *tw2);
+ C_ADD( sc3, sc1, sc2);
+ C_SUB( sc0, sc1, sc2);
+ tw1 += fstride;
+ tw2 += (fstride << 1);
+
+ sc1.r = _fout.r - HALF_OF(sc3.r);
+ sc1.i = _fout.i - HALF_OF(sc3.i);
+
+ C_MULBYSCALAR(sc0, epi);
+ C_ADD(*Fout, _fout, sc3);
+
+ Fout2->r = sc1.r + sc0.i;
+ Fout2->i = sc1.i - sc0.r;
+
+ Fout1->r = sc1.i - sc0.i;
+ Fout1->i = sc1.r + sc0.r;
+
+ ++Fout; ++Fout1; ++Fout2;
+
+ } while(--m);
+}
+
+#define OVERRIDE_KFBFLY5
+static void kf_bfly5(
+ kiss_fft_cpx * Fout,
+ const size_t fstride,
+ const kiss_fft_cfg st,
+ int m
+ )
+{
+ register kiss_fft_cpx * restrict Fout1,* restrict Fout2,* restrict Fout3,* restrict Fout4;
+ register int u;
+ register kiss_fft_cpx *tw;
+ register float yar, yai, ybr, ybi;
+
+ Fout1=Fout+m;
+ Fout2=Fout+(m<<1);
+ Fout3=Fout+(m*3);
+ Fout4=Fout+(m<<2);
+
+ tw = st->twiddles;
+ yar = tw[fstride*m].r;
+ yai = tw[fstride*m].i;
+ ybr = tw[fstride*2*m].r;
+ ybi = tw[fstride*2*m].i;
+
+ for ( u=0; u<m; ++u )
+ {
+ register kiss_fft_cpx sc0, sc1, sc2, sc3, sc4, sc5, sc6, sc7, sc8, sc9, sc10, sc11, sc12;
+
+ sc0 = *Fout;
+
+ C_MUL( sc1,*Fout1, tw[u*fstride]);
+ C_MUL( sc2,*Fout2, tw[2*u*fstride]);
+ C_MUL( sc3,*Fout3, tw[3*u*fstride]);
+ C_MUL( sc4,*Fout4, tw[4*u*fstride]);
+
+ C_ADD( sc7, sc1, sc4);
+ C_SUB( sc10, sc1, sc4);
+ C_ADD( sc8, sc2, sc3);
+ C_SUB( sc9, sc2, sc3);
+
+ Fout->r = sc0.r + sc7.r + sc8.r;
+ Fout->i = sc0.i + sc7.i + sc8.i;
+
+ sc5.r = sc0.r + S_MUL(sc7.r,yar) + S_MUL(sc8.r,ybr);
+ sc5.i = sc0.i + S_MUL(sc7.i,yar) + S_MUL(sc8.i,ybr);
+
+ sc6.r = S_MUL(sc10.i,yai) + S_MUL(sc9.i,ybi);
+ sc6.i = -S_MUL(sc10.r,yai) - S_MUL(sc9.r,ybi);
+
+ C_SUB(*Fout1,sc5,sc6);
+ C_ADD(*Fout4,sc5,sc6);
+
+ sc11.r = sc0.r + S_MUL(sc7.r,ybr) + S_MUL(sc8.r,yar);
+ sc11.i = sc0.i + S_MUL(sc7.i,ybr) + S_MUL(sc8.i,yar);
+ sc12.r = - S_MUL(sc10.i,ybi) + S_MUL(sc9.i,yai);
+ sc12.i = S_MUL(sc10.r,ybi) - S_MUL(sc9.r,yai);
+ C_ADD(*Fout2,sc11,sc12);
+ C_SUB(*Fout3,sc11,sc12);
+
+ ++Fout1; ++Fout2; ++Fout3; ++Fout4;
+ }
+}
+
+
+#endif
+
+#endif