diff options
author | Bill Yi <byi@google.com> | 2015-06-23 13:53:11 -0700 |
---|---|---|
committer | Bill Yi <byi@google.com> | 2015-06-23 13:53:11 -0700 |
commit | 4e213d510f437769f8a28578dd4f786fb7d16c44 (patch) | |
tree | 0d5cbd5a7eee87b3dca5820d282ef618a7e25991 /dotprod_sse2.c | |
download | fec-4e213d510f437769f8a28578dd4f786fb7d16c44.tar.gz |
Initial codenougat-mr1-arc
Diffstat (limited to 'dotprod_sse2.c')
-rw-r--r-- | dotprod_sse2.c | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/dotprod_sse2.c b/dotprod_sse2.c new file mode 100644 index 0000000..1fddd18 --- /dev/null +++ b/dotprod_sse2.c @@ -0,0 +1,72 @@ +/* 16-bit signed integer dot product + * SSE2 version + * Copyright 2004 Phil Karn + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#define _XOPEN_SOURCE 600 +#include <stdlib.h> +#include <memory.h> +#include "fec.h" + +struct dotprod { + int len; /* Number of coefficients */ + + /* On a SSE2 machine, these hold 8 copies of the coefficients, + * preshifted by 0,1,..7 words to meet all possible input data + * alignments (see Intel ap559 on MMX dot products). + */ + signed short *coeffs[8]; +}; + +long dotprod_sse2_assist(signed short *a,signed short *b,int cnt); + +/* Create and return a descriptor for use with the dot product function */ +void *initdp_sse2(signed short coeffs[],int len){ + struct dotprod *dp; + int i,j,blksize; + + if(len == 0) + return NULL; + + dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); + dp->len = len; + + /* Make 8 copies of coefficients, one for each data alignment, + * each aligned to 16-byte boundary + */ + for(i=0;i<8;i++){ + blksize = (1+(len+i-1)/8) * 8*sizeof(signed short); + posix_memalign((void **)&dp->coeffs[i],16,blksize); + memset(dp->coeffs[i],0,blksize); + for(j=0;j<len;j++) + dp->coeffs[i][j+i] = coeffs[j]; + } + return (void *)dp; +} + + +/* Free a dot product descriptor created earlier */ +void freedp_sse2(void *p){ + struct dotprod *dp = (struct dotprod *)p; + int i; + + for(i=0;i<8;i++) + if(dp->coeffs[i] != NULL) + free(dp->coeffs[i]); + free(dp); +} + +/* Compute a dot product given a descriptor and an input array + * The length is taken from the descriptor + */ +long dotprod_sse2(void *p,signed short a[]){ + struct dotprod *dp = (struct dotprod *)p; + int al; + signed short *ar; + + ar = (signed short *)((int)a & ~15); + al = a - ar; + + /* Call assembler routine to do the work, passing number of 8-word blocks */ + return dotprod_sse2_assist(ar,dp->coeffs[al],(dp->len+al-1)/8+1); +} |