diff options
author | Bill Yi <byi@google.com> | 2015-06-23 13:53:11 -0700 |
---|---|---|
committer | Bill Yi <byi@google.com> | 2015-06-23 13:53:11 -0700 |
commit | 4e213d510f437769f8a28578dd4f786fb7d16c44 (patch) | |
tree | 0d5cbd5a7eee87b3dca5820d282ef618a7e25991 | |
download | fec-4e213d510f437769f8a28578dd4f786fb7d16c44.tar.gz |
Initial codenougat-mr1-arc
-rw-r--r-- | INSTALL | 39 | ||||
-rw-r--r-- | README | 120 | ||||
-rw-r--r-- | ccsds.h | 5 | ||||
-rw-r--r-- | char.h | 24 | ||||
-rw-r--r-- | config.guess | 1516 | ||||
-rw-r--r-- | config.h.in | 19 | ||||
-rwxr-xr-x | config.sub | 1362 | ||||
-rwxr-xr-x | configure | 4357 | ||||
-rw-r--r-- | configure.in | 83 | ||||
-rw-r--r-- | cpu_features.s | 15 | ||||
-rw-r--r-- | cpu_mode_ppc.c | 40 | ||||
-rw-r--r-- | cpu_mode_x86.c | 33 | ||||
-rw-r--r-- | decode_rs.c | 262 | ||||
-rw-r--r-- | decode_rs.h | 298 | ||||
-rw-r--r-- | decode_rs_8.c | 24 | ||||
-rw-r--r-- | decode_rs_ccsds.c | 26 | ||||
-rw-r--r-- | decode_rs_char.c | 22 | ||||
-rw-r--r-- | decode_rs_int.c | 22 | ||||
-rw-r--r-- | dotprod.c | 94 | ||||
-rw-r--r-- | dotprod.h | 15 | ||||
-rw-r--r-- | dotprod_av.c | 93 | ||||
-rw-r--r-- | dotprod_mmx.c | 81 | ||||
-rw-r--r-- | dotprod_mmx_assist.s | 83 | ||||
-rw-r--r-- | dotprod_port.c | 58 | ||||
-rw-r--r-- | dotprod_sse2.c | 72 | ||||
-rw-r--r-- | dotprod_sse2_assist.s | 85 | ||||
-rw-r--r-- | dsp.3 | 63 | ||||
-rw-r--r-- | dtest.c | 99 | ||||
-rw-r--r-- | encode_rs.c | 52 | ||||
-rw-r--r-- | encode_rs.h | 58 | ||||
-rw-r--r-- | encode_rs_8.c | 109 | ||||
-rw-r--r-- | encode_rs_av.c | 61 | ||||
-rw-r--r-- | encode_rs_ccsds.c | 24 | ||||
-rw-r--r-- | encode_rs_char.c | 15 | ||||
-rw-r--r-- | encode_rs_int.c | 15 | ||||
-rw-r--r-- | exercise.c | 122 | ||||
-rw-r--r-- | fec.c | 66 | ||||
-rw-r--r-- | fec.h | 347 | ||||
-rw-r--r-- | fixed.h | 33 | ||||
-rw-r--r-- | gen_ccsds.c | 39 | ||||
-rw-r--r-- | gen_ccsds_tal.c | 53 | ||||
-rw-r--r-- | init_rs.c | 39 | ||||
-rw-r--r-- | init_rs.h | 106 | ||||
-rw-r--r-- | init_rs_char.c | 35 | ||||
-rw-r--r-- | init_rs_int.c | 35 | ||||
-rwxr-xr-x | install-sh | 251 | ||||
-rw-r--r-- | int.h | 22 | ||||
-rw-r--r-- | lesser.txt | 504 | ||||
-rw-r--r-- | makefile.in | 242 | ||||
-rw-r--r-- | mmxbfly27.s | 148 | ||||
-rw-r--r-- | mmxbfly29.s | 161 | ||||
-rw-r--r-- | peak_mmx_assist.s | 70 | ||||
-rw-r--r-- | peak_sse2_assist.s | 51 | ||||
-rw-r--r-- | peak_sse_assist.s | 49 | ||||
-rw-r--r-- | peaktest.c | 38 | ||||
-rw-r--r-- | peakval.c | 39 | ||||
-rw-r--r-- | peakval_av.c | 61 | ||||
-rw-r--r-- | peakval_mmx.c | 34 | ||||
-rw-r--r-- | peakval_mmx_assist.s | 70 | ||||
-rw-r--r-- | peakval_port.c | 16 | ||||
-rw-r--r-- | peakval_sse.c | 35 | ||||
-rw-r--r-- | peakval_sse2.c | 34 | ||||
-rw-r--r-- | peakval_sse2_assist.s | 51 | ||||
-rw-r--r-- | peakval_sse_assist.s | 49 | ||||
-rw-r--r-- | rs-common.h | 26 | ||||
-rw-r--r-- | rs.3 | 198 | ||||
-rw-r--r-- | rs_speedtest.c | 54 | ||||
-rw-r--r-- | rstest.c | 296 | ||||
-rw-r--r-- | sim.c | 43 | ||||
-rw-r--r-- | simd-viterbi.3 | 247 | ||||
-rw-r--r-- | sqtest.c | 42 | ||||
-rw-r--r-- | sse2bfly27.s | 202 | ||||
-rw-r--r-- | sse2bfly29.s | 245 | ||||
-rw-r--r-- | ssebfly27.s | 205 | ||||
-rw-r--r-- | ssebfly29.s | 271 | ||||
-rw-r--r-- | sumsq.c | 40 | ||||
-rw-r--r-- | sumsq_av.c | 78 | ||||
-rw-r--r-- | sumsq_mmx.c | 35 | ||||
-rw-r--r-- | sumsq_mmx_assist.s | 83 | ||||
-rw-r--r-- | sumsq_port.c | 16 | ||||
-rw-r--r-- | sumsq_sse2.c | 33 | ||||
-rw-r--r-- | sumsq_sse2_assist.s | 49 | ||||
-rw-r--r-- | sumsq_test.c | 101 | ||||
-rw-r--r-- | viterbi27.c | 161 | ||||
-rw-r--r-- | viterbi27_av.c | 210 | ||||
-rw-r--r-- | viterbi27_mmx.c | 115 | ||||
-rw-r--r-- | viterbi27_port.c | 191 | ||||
-rw-r--r-- | viterbi27_sse.c | 113 | ||||
-rw-r--r-- | viterbi27_sse2.c | 180 | ||||
-rw-r--r-- | viterbi29.c | 152 | ||||
-rw-r--r-- | viterbi29_av.c | 190 | ||||
-rw-r--r-- | viterbi29_mmx.c | 118 | ||||
-rw-r--r-- | viterbi29_port.c | 166 | ||||
-rw-r--r-- | viterbi29_sse.c | 114 | ||||
-rw-r--r-- | viterbi29_sse2.c | 119 | ||||
-rw-r--r-- | viterbi39.c | 153 | ||||
-rw-r--r-- | viterbi39_av.c | 251 | ||||
-rw-r--r-- | viterbi39_mmx.c | 185 | ||||
-rw-r--r-- | viterbi39_port.c | 168 | ||||
-rw-r--r-- | viterbi39_sse.c | 201 | ||||
-rw-r--r-- | viterbi39_sse2.c | 200 | ||||
-rw-r--r-- | viterbi615.c | 155 | ||||
-rw-r--r-- | viterbi615_av.c | 257 | ||||
-rw-r--r-- | viterbi615_mmx.c | 183 | ||||
-rw-r--r-- | viterbi615_port.c | 156 | ||||
-rw-r--r-- | viterbi615_sse.c | 201 | ||||
-rw-r--r-- | viterbi615_sse2.c | 204 | ||||
-rw-r--r-- | vtest27.c | 184 | ||||
-rw-r--r-- | vtest29.c | 185 | ||||
-rw-r--r-- | vtest39.c | 186 | ||||
-rw-r--r-- | vtest615.c | 191 |
111 files changed, 19297 insertions, 0 deletions
@@ -0,0 +1,39 @@ +INSTALLATION INSTRUCTIONS + +To build and install the libfec libraries, simply say + +./configure +make +make test (optional) +make install (as root) + +By default, "make install" puts the libfec libraries in +/usr/local/lib, the include files in /usr/local/include, and the +manual page in /usr/local/man. + +You may have an old version of the GNU assembler that cannot handle +the relatively new SSE2 mnemonics. Update your version of the GNU +"binutils" package. + +You may obtain the latest binutils package through your normal +distribution channels or from: + +http://sources.redhat.com/binutils/ + +TESTING THE FEC LIBRARY + +After running the ./configure script, optional tests can be built and +run as follows: + +make test + +"make test" tests each routine, using the SIMD versions as +appropriate, verifying correct operation and estimating Viterbi +decoding speeds. These tests should always succeed unless something is +broken. + +28 Mar 2004 +Phil Karn, karn@ka9q.net + + + @@ -0,0 +1,120 @@ +COPYRIGHT + +This package is copyright 2006 by Phil Karn, KA9Q. It may be used +under the terms of the GNU Lesser General Public License (LGPL). See +the file "lesser.txt" in this package for license details. + +INTRODUCTION + +This package provides a set of functions that implement several +popular forward error correction (FEC) algorithms and several low-level routines +useful in modems implemented with digital signal processing (DSP). + +The following routines are provided: + +1. Viterbi decoders for the following convolutional codes: + +r=1/2 k=7 ("Voyager" code, now a widely used industry standard) +r=1/2 k=9 (Used on the IS-95 CDMA forward link) +r=1/6 k=15 ("Cassini" code, used by several NASA/JPL deep space missions) + +2. Reed-Solomon encoders and decoders for any user-specified code. + +3. Optimized encoder and decoder for the CCSDS-standard (255,223) +Reed-Solomon code, with and without the CCSDS-standard "dual basis" +symbol representation. + +4. Compute dot product between a 16-bit buffer and a set of 16-bit +coefficients. This is the basic DSP primitive for digital filtering +and correlation. + +4. Compute sum of squares of a buffer of 16-bit signed integers. This is +useful in DSP for finding the total energy in a signal. + +5. Find peak value in a buffer of 16-bit signed integers, useful for +scaling a signal to prevent overflow. + +SIMD SUPPORT + +This package automatically makes use of various SIMD (Single +Instruction stream, Multiple Data stream) instruction sets, when +available: MMX, SSE and SSE2 on the IA-32 (Intel) architecture, and +Altivec on the PowerPC G4 and G5 used by Power Macintoshes. + +"Altivec" is a Motorola trademark; Apple calls it "Velocity Engine", +and IBM calls it "VMX". Altivec is roughly comparable to SSE2 on the +IA-32. + +Many of the SIMD versions run more than an order of +magnitude faster than their portable C versions. The available SIMD +instruction sets, if any, are determined at run time and the proper +version of each routine is automatically selected. If no SIMD +instructions are available, the portable C version is invoked by +default. On targets other than IA-32 and PPC, only the portable C +version is built. + +The SIMD-assisted versions generally produce the same results as the C +versions, with a few minor exceptions. The Viterbi decoders in C have +a very slightly greater Eb/No performance due to their use of 32-bit +path metrics. On the other hand, the SIMD versions use the +"saturating" arithmetic available in these instructions to avoid the +integer wraparounds that can occur in C when argument ranges are not +properly constrained. This applies primarily to the "dotprod" (dot +product) function. + +The MMX (MultiMedia eXtensions) instruction set was introduced on +later Pentium CPUs; it is also implemented on the Pentium II and most +AMD CPUs starting with the K6. SSE (SIMD Streaming Extensions) was +introduced in the Pentium III; AMD calls it "3D Now! Professional". +Intel introduced SSE2 on the Pentium 4, and it has been picked up by +later AMD CPUs. SSE support implies MMX support, while SSE2 support +implies both SSE and MMX support. + +The latest IA-32 SIMD instruction set, SSE3 (also known as "Prescott +New Instructions") was introduced in early 2004 with the latest +("Prescott") revision of the Pentium 4. Relatively little was +introduced with SSE3, and this library currently makes no use of it. + +See the various manual pages for details on how to use the library +routines. + +Copyright 2006, Phil Karn, KA9Q +karn@ka9q.net +http://www.ka9q.net/ + +This software may be used under the terms of the GNU Lesser General +Public License (LGPL); see the file lesser.txt for details. + +Revision history: +Version 1.0 released 29 May 2001 + +Version 2.0 released 3 Dec 2001: +Restructured to add support for shared libraries. + +Version 2.0.1 released 8 Dec 2001: +Includes autoconf/configure script + +Version 2.0.2 released 4 Feb 2002: +Add SIMD version override options +Test for lack of SSE2 mnemonic support in 'as' +Build only selected version + +Version 2.0.3 released 6 Feb 2002: +Fix to parityb function in parity.h + +feclib version 1.0 released November 2003 +Merged SIMD-Viterbi, RS and DSP libraries +Changed SIMD Viterbi decoder to detect SSE2/SSE/MMX at runtime rather than build time + +feclib version 2.0 (unreleased) Mar 2004 +General speedups and cleanups +Switch from 4 to 8-bit input symbols on all Viterbi decoders +Support for Altivec on PowerPC +Support for k=15 r=1/6 Cassini/Mars Pathfinder/Mars Exploration Rover/STEREO code +Changed license to GNU Lesser General Public License (LGPL) + +feclib version 2.1 June 5 2006 +Added error checking, fixed alignment bug in SSE2 versions of Viterbi decoders causing segfaults + +feclib version 2.1.1 June 6 2006 +Fix test/benchmark time measurement on Linux @@ -0,0 +1,5 @@ +typedef unsigned char data_t; +extern unsigned char Taltab[],Tal1tab[]; +#define NN 255 +#define NROOTS 32 + @@ -0,0 +1,24 @@ +/* Stuff specific to the 8-bit symbol version of the general purpose RS codecs + * + * Copyright 2003, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +typedef unsigned char data_t; + +#define MODNN(x) modnn(rs,x) + +#define MM (rs->mm) +#define NN (rs->nn) +#define ALPHA_TO (rs->alpha_to) +#define INDEX_OF (rs->index_of) +#define GENPOLY (rs->genpoly) +#define NROOTS (rs->nroots) +#define FCR (rs->fcr) +#define PRIM (rs->prim) +#define IPRIM (rs->iprim) +#define PAD (rs->pad) +#define A0 (NN) + + + + diff --git a/config.guess b/config.guess new file mode 100644 index 0000000..0f0fe71 --- /dev/null +++ b/config.guess @@ -0,0 +1,1516 @@ +#! /bin/sh +# Attempt to guess a canonical system name. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, +# 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, +# Inc. + +timestamp='2007-03-06' + +# This file is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA +# 02110-1301, USA. +# +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + + +# Originally written by Per Bothner <per@bothner.com>. +# Please send patches to <config-patches@gnu.org>. Submit a context +# diff and a properly formatted ChangeLog entry. +# +# This script attempts to guess a canonical system name similar to +# config.sub. If it succeeds, it prints the system name on stdout, and +# exits with 0. Otherwise, it exits with 1. +# +# The plan is that this can be called by configure scripts if you +# don't specify an explicit build system type. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] + +Output the configuration name of the system \`$me' is run on. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to <config-patches@gnu.org>." + +version="\ +GNU config.guess ($timestamp) + +Originally written by Per Bothner. +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 +Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit ;; + --version | -v ) + echo "$version" ; exit ;; + --help | --h* | -h ) + echo "$usage"; exit ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" >&2 + exit 1 ;; + * ) + break ;; + esac +done + +if test $# != 0; then + echo "$me: too many arguments$help" >&2 + exit 1 +fi + +trap 'exit 1' 1 2 15 + +# CC_FOR_BUILD -- compiler used by this script. Note that the use of a +# compiler to aid in system detection is discouraged as it requires +# temporary files to be created and, as you can see below, it is a +# headache to deal with in a portable fashion. + +# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still +# use `HOST_CC' if defined, but it is deprecated. + +# Portable tmp directory creation inspired by the Autoconf team. + +set_cc_for_build=' +trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ; +trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ; +: ${TMPDIR=/tmp} ; + { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } || + { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } || + { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } || + { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ; +dummy=$tmp/dummy ; +tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ; +case $CC_FOR_BUILD,$HOST_CC,$CC in + ,,) echo "int x;" > $dummy.c ; + for c in cc gcc c89 c99 ; do + if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then + CC_FOR_BUILD="$c"; break ; + fi ; + done ; + if test x"$CC_FOR_BUILD" = x ; then + CC_FOR_BUILD=no_compiler_found ; + fi + ;; + ,,*) CC_FOR_BUILD=$CC ;; + ,*,*) CC_FOR_BUILD=$HOST_CC ;; +esac ; set_cc_for_build= ;' + +# This is needed to find uname on a Pyramid OSx when run in the BSD universe. +# (ghazi@noc.rutgers.edu 1994-08-24) +if (test -f /.attbin/uname) >/dev/null 2>&1 ; then + PATH=$PATH:/.attbin ; export PATH +fi + +UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown +UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown +UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown +UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown + +# Note: order is significant - the case branches are not exclusive. + +case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in + *:NetBSD:*:*) + # NetBSD (nbsd) targets should (where applicable) match one or + # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*, + # *-*-netbsdecoff* and *-*-netbsd*. For targets that recently + # switched to ELF, *-*-netbsd* would select the old + # object file format. This provides both forward + # compatibility and a consistent mechanism for selecting the + # object file format. + # + # Note: NetBSD doesn't particularly care about the vendor + # portion of the name. We always set it to "unknown". + sysctl="sysctl -n hw.machine_arch" + UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \ + /usr/sbin/$sysctl 2>/dev/null || echo unknown)` + case "${UNAME_MACHINE_ARCH}" in + armeb) machine=armeb-unknown ;; + arm*) machine=arm-unknown ;; + sh3el) machine=shl-unknown ;; + sh3eb) machine=sh-unknown ;; + sh5el) machine=sh5le-unknown ;; + *) machine=${UNAME_MACHINE_ARCH}-unknown ;; + esac + # The Operating System including object format, if it has switched + # to ELF recently, or will in the future. + case "${UNAME_MACHINE_ARCH}" in + arm*|i386|m68k|ns32k|sh3*|sparc|vax) + eval $set_cc_for_build + if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ + | grep __ELF__ >/dev/null + then + # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout). + # Return netbsd for either. FIX? + os=netbsd + else + os=netbsdelf + fi + ;; + *) + os=netbsd + ;; + esac + # The OS release + # Debian GNU/NetBSD machines have a different userland, and + # thus, need a distinct triplet. However, they do not need + # kernel version information, so it can be replaced with a + # suitable tag, in the style of linux-gnu. + case "${UNAME_VERSION}" in + Debian*) + release='-gnu' + ;; + *) + release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'` + ;; + esac + # Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM: + # contains redundant information, the shorter form: + # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. + echo "${machine}-${os}${release}" + exit ;; + *:OpenBSD:*:*) + UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` + echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} + exit ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit ;; + *:SolidBSD:*:*) + echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE} + exit ;; + macppc:MirBSD:*:*) + echo powerpc-unknown-mirbsd${UNAME_RELEASE} + exit ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit ;; + alpha:OSF1:*:*) + case $UNAME_RELEASE in + *4.0) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac + # According to Compaq, /usr/sbin/psrinfo has been available on + # OSF/1 and Tru64 systems produced since 1995. I hope that + # covers most systems running today. This code pipes the CPU + # types through head -n 1, so we only detect the type of CPU 0. + ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` + case "$ALPHA_CPU_TYPE" in + "EV4 (21064)") + UNAME_MACHINE="alpha" ;; + "EV4.5 (21064)") + UNAME_MACHINE="alpha" ;; + "LCA4 (21066/21068)") + UNAME_MACHINE="alpha" ;; + "EV5 (21164)") + UNAME_MACHINE="alphaev5" ;; + "EV5.6 (21164A)") + UNAME_MACHINE="alphaev56" ;; + "EV5.6 (21164PC)") + UNAME_MACHINE="alphapca56" ;; + "EV5.7 (21164PC)") + UNAME_MACHINE="alphapca57" ;; + "EV6 (21264)") + UNAME_MACHINE="alphaev6" ;; + "EV6.7 (21264A)") + UNAME_MACHINE="alphaev67" ;; + "EV6.8CB (21264C)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8AL (21264B)") + UNAME_MACHINE="alphaev68" ;; + "EV6.8CX (21264D)") + UNAME_MACHINE="alphaev68" ;; + "EV6.9A (21264/EV69A)") + UNAME_MACHINE="alphaev69" ;; + "EV7 (21364)") + UNAME_MACHINE="alphaev7" ;; + "EV7.9 (21364A)") + UNAME_MACHINE="alphaev79" ;; + esac + # A Pn.n version is a patched version. + # A Vn.n version is a released version. + # A Tn.n version is a released field test version. + # A Xn.n version is an unreleased experimental baselevel. + # 1.2 uses "1.2" for uname -r. + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + exit ;; + Alpha\ *:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # Should we change UNAME_MACHINE based on the output of uname instead + # of the specific Alpha model? + echo alpha-pc-interix + exit ;; + 21064:Windows_NT:50:3) + echo alpha-dec-winnt3.5 + exit ;; + Amiga*:UNIX_System_V:4.0:*) + echo m68k-unknown-sysv4 + exit ;; + *:[Aa]miga[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-amigaos + exit ;; + *:[Mm]orph[Oo][Ss]:*:*) + echo ${UNAME_MACHINE}-unknown-morphos + exit ;; + *:OS/390:*:*) + echo i370-ibm-openedition + exit ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit ;; + arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) + echo arm-acorn-riscix${UNAME_RELEASE} + exit ;; + arm:riscos:*:*|arm:RISCOS:*:*) + echo arm-unknown-riscos + exit ;; + SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*) + echo hppa1.1-hitachi-hiuxmpp + exit ;; + Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*) + # akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE. + if test "`(/bin/universe) 2>/dev/null`" = att ; then + echo pyramid-pyramid-sysv3 + else + echo pyramid-pyramid-bsd + fi + exit ;; + NILE*:*:*:dcosx) + echo pyramid-pyramid-svr4 + exit ;; + DRS?6000:unix:4.0:6*) + echo sparc-icl-nx6 + exit ;; + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) + case `/usr/bin/uname -p` in + sparc) echo sparc-icl-nx7; exit ;; + esac ;; + sun4H:SunOS:5.*:*) + echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*) + echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + i86pc:SunOS:5.*:*) + echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:6*:*) + # According to config.sub, this is the proper way to canonicalize + # SunOS6. Hard to guess exactly what SunOS6 will be like, but + # it's likely to be more like Solaris than SunOS4. + echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + sun4*:SunOS:*:*) + case "`/usr/bin/arch -k`" in + Series*|S4*) + UNAME_RELEASE=`uname -v` + ;; + esac + # Japanese Language versions have a version number like `4.1.3-JL'. + echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'` + exit ;; + sun3*:SunOS:*:*) + echo m68k-sun-sunos${UNAME_RELEASE} + exit ;; + sun*:*:4.2BSD:*) + UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 + case "`/bin/arch`" in + sun3) + echo m68k-sun-sunos${UNAME_RELEASE} + ;; + sun4) + echo sparc-sun-sunos${UNAME_RELEASE} + ;; + esac + exit ;; + aushp:SunOS:*:*) + echo sparc-auspex-sunos${UNAME_RELEASE} + exit ;; + # The situation for MiNT is a little confusing. The machine name + # can be virtually everything (everything which is not + # "atarist" or "atariste" at least should have a processor + # > m68000). The system name ranges from "MiNT" over "FreeMiNT" + # to the lowercase version "mint" (or "freemint"). Finally + # the system name "TOS" denotes a system which is actually not + # MiNT. But MiNT is downward compatible to TOS, so this should + # be no problem. + atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*) + echo m68k-atari-mint${UNAME_RELEASE} + exit ;; + milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*) + echo m68k-milan-mint${UNAME_RELEASE} + exit ;; + hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*) + echo m68k-hades-mint${UNAME_RELEASE} + exit ;; + *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) + echo m68k-unknown-mint${UNAME_RELEASE} + exit ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit ;; + powerpc:machten:*:*) + echo powerpc-apple-machten${UNAME_RELEASE} + exit ;; + RISC*:Mach:*:*) + echo mips-dec-mach_bsd4.3 + exit ;; + RISC*:ULTRIX:*:*) + echo mips-dec-ultrix${UNAME_RELEASE} + exit ;; + VAX*:ULTRIX*:*:*) + echo vax-dec-ultrix${UNAME_RELEASE} + exit ;; + 2020:CLIX:*:* | 2430:CLIX:*:*) + echo clipper-intergraph-clix${UNAME_RELEASE} + exit ;; + mips:*:*:UMIPS | mips:*:*:RISCos) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c +#ifdef __cplusplus +#include <stdio.h> /* for printf() prototype */ + int main (int argc, char *argv[]) { +#else + int main (argc, argv) int argc; char *argv[]; { +#endif + #if defined (host_mips) && defined (MIPSEB) + #if defined (SYSTYPE_SYSV) + printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_SVR4) + printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0); + #endif + #if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD) + printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0); + #endif + #endif + exit (-1); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && + dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` && + SYSTEM_NAME=`$dummy $dummyarg` && + { echo "$SYSTEM_NAME"; exit; } + echo mips-mips-riscos${UNAME_RELEASE} + exit ;; + Motorola:PowerMAX_OS:*:*) + echo powerpc-motorola-powermax + exit ;; + Motorola:*:4.3:PL8-*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*) + echo powerpc-harris-powermax + exit ;; + Night_Hawk:Power_UNIX:*:*) + echo powerpc-harris-powerunix + exit ;; + m88k:CX/UX:7*:*) + echo m88k-harris-cxux7 + exit ;; + m88k:*:4*:R4*) + echo m88k-motorola-sysv4 + exit ;; + m88k:*:3*:R3*) + echo m88k-motorola-sysv3 + exit ;; + AViiON:dgux:*:*) + # DG/UX returns AViiON for all architectures + UNAME_PROCESSOR=`/usr/bin/uname -p` + if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ] + then + if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \ + [ ${TARGET_BINARY_INTERFACE}x = x ] + then + echo m88k-dg-dgux${UNAME_RELEASE} + else + echo m88k-dg-dguxbcs${UNAME_RELEASE} + fi + else + echo i586-dg-dgux${UNAME_RELEASE} + fi + exit ;; + M88*:DolphinOS:*:*) # DolphinOS (SVR3) + echo m88k-dolphin-sysv3 + exit ;; + M88*:*:R3*:*) + # Delta 88k system running SVR3 + echo m88k-motorola-sysv3 + exit ;; + XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3) + echo m88k-tektronix-sysv3 + exit ;; + Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD) + echo m68k-tektronix-bsd + exit ;; + *:IRIX*:*:*) + echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'` + exit ;; + ????????:AIX?:[12].1:2) # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX. + echo romp-ibm-aix # uname -m gives an 8 hex-code CPU id + exit ;; # Note that: echo "'`uname -s`'" gives 'AIX ' + i*86:AIX:*:*) + echo i386-ibm-aix + exit ;; + ia64:AIX:*:*) + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${UNAME_MACHINE}-ibm-aix${IBM_REV} + exit ;; + *:AIX:2:3) + if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include <sys/systemcfg.h> + + main() + { + if (!__power_pc()) + exit(1); + puts("powerpc-ibm-aix3.2.5"); + exit(0); + } +EOF + if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` + then + echo "$SYSTEM_NAME" + else + echo rs6000-ibm-aix3.2.5 + fi + elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then + echo rs6000-ibm-aix3.2.4 + else + echo rs6000-ibm-aix3.2 + fi + exit ;; + *:AIX:*:[45]) + IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'` + if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then + IBM_ARCH=rs6000 + else + IBM_ARCH=powerpc + fi + if [ -x /usr/bin/oslevel ] ; then + IBM_REV=`/usr/bin/oslevel` + else + IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE} + fi + echo ${IBM_ARCH}-ibm-aix${IBM_REV} + exit ;; + *:AIX:*:*) + echo rs6000-ibm-aix + exit ;; + ibmrt:4.4BSD:*|romp-ibm:BSD:*) + echo romp-ibm-bsd4.4 + exit ;; + ibmrt:*BSD:*|romp-ibm:BSD:*) # covers RT/PC BSD and + echo romp-ibm-bsd${UNAME_RELEASE} # 4.3 with uname added to + exit ;; # report: romp-ibm BSD 4.3 + *:BOSX:*:*) + echo rs6000-bull-bosx + exit ;; + DPX/2?00:B.O.S.:*:*) + echo m68k-bull-sysv3 + exit ;; + 9000/[34]??:4.3bsd:1.*:*) + echo m68k-hp-bsd + exit ;; + hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*) + echo m68k-hp-bsd4.4 + exit ;; + 9000/[34678]??:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + case "${UNAME_MACHINE}" in + 9000/31? ) HP_ARCH=m68000 ;; + 9000/[34]?? ) HP_ARCH=m68k ;; + 9000/[678][0-9][0-9]) + if [ -x /usr/bin/getconf ]; then + sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` + sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` + case "${sc_cpu_version}" in + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 + 532) # CPU_PA_RISC2_0 + case "${sc_kernel_bits}" in + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 + esac ;; + esac + fi + if [ "${HP_ARCH}" = "" ]; then + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + + #define _HPUX_SOURCE + #include <stdlib.h> + #include <unistd.h> + + int main () + { + #if defined(_SC_KERNEL_BITS) + long bits = sysconf(_SC_KERNEL_BITS); + #endif + long cpu = sysconf (_SC_CPU_VERSION); + + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1"); break; + case CPU_PA_RISC2_0: + #if defined(_SC_KERNEL_BITS) + switch (bits) + { + case 64: puts ("hppa2.0w"); break; + case 32: puts ("hppa2.0n"); break; + default: puts ("hppa2.0"); break; + } break; + #else /* !defined(_SC_KERNEL_BITS) */ + puts ("hppa2.0"); break; + #endif + default: puts ("hppa1.0"); break; + } + exit (0); + } +EOF + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + test -z "$HP_ARCH" && HP_ARCH=hppa + fi ;; + esac + if [ ${HP_ARCH} = "hppa2.0w" ] + then + eval $set_cc_for_build + + # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating + # 32-bit code. hppa64-hp-hpux* has the same kernel and a compiler + # generating 64-bit code. GNU and HP use different nomenclature: + # + # $ CC_FOR_BUILD=cc ./config.guess + # => hppa2.0w-hp-hpux11.23 + # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess + # => hppa64-hp-hpux11.23 + + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | + grep __LP64__ >/dev/null + then + HP_ARCH="hppa2.0w" + else + HP_ARCH="hppa64" + fi + fi + echo ${HP_ARCH}-hp-hpux${HPUX_REV} + exit ;; + ia64:HP-UX:*:*) + HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'` + echo ia64-hp-hpux${HPUX_REV} + exit ;; + 3050*:HI-UX:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include <unistd.h> + int + main () + { + long cpu = sysconf (_SC_CPU_VERSION); + /* The order matters, because CPU_IS_HP_MC68K erroneously returns + true for CPU_PA_RISC1_0. CPU_IS_PA_RISC returns correct + results, however. */ + if (CPU_IS_PA_RISC (cpu)) + { + switch (cpu) + { + case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break; + case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break; + case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break; + default: puts ("hppa-hitachi-hiuxwe2"); break; + } + } + else if (CPU_IS_HP_MC68K (cpu)) + puts ("m68k-hitachi-hiuxwe2"); + else puts ("unknown-hitachi-hiuxwe2"); + exit (0); + } +EOF + $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + echo unknown-hitachi-hiuxwe2 + exit ;; + 9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* ) + echo hppa1.1-hp-bsd + exit ;; + 9000/8??:4.3bsd:*:*) + echo hppa1.0-hp-bsd + exit ;; + *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*) + echo hppa1.0-hp-mpeix + exit ;; + hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* ) + echo hppa1.1-hp-osf + exit ;; + hp8??:OSF1:*:*) + echo hppa1.0-hp-osf + exit ;; + i*86:OSF1:*:*) + if [ -x /usr/sbin/sysversion ] ; then + echo ${UNAME_MACHINE}-unknown-osf1mk + else + echo ${UNAME_MACHINE}-unknown-osf1 + fi + exit ;; + parisc*:Lites*:*:*) + echo hppa1.1-hp-lites + exit ;; + C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*) + echo c1-convex-bsd + exit ;; + C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*) + echo c34-convex-bsd + exit ;; + C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*) + echo c38-convex-bsd + exit ;; + C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*) + echo c4-convex-bsd + exit ;; + CRAY*Y-MP:*:*:*) + echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*[A-Z]90:*:*:*) + echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \ + | sed -e 's/CRAY.*\([A-Z]90\)/\1/' \ + -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \ + -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*TS:*:*:*) + echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*T3E:*:*:*) + echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + CRAY*SV1:*:*:*) + echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + *:UNICOS/mp:*:*) + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + exit ;; + F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` + echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit ;; + i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) + echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} + exit ;; + sparc*:BSD/OS:*:*) + echo sparc-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:BSD/OS:*:*) + echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} + exit ;; + *:FreeBSD:*:*) + case ${UNAME_MACHINE} in + pc98) + echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + amd64) + echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + *) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;; + esac + exit ;; + i*:CYGWIN*:*) + echo ${UNAME_MACHINE}-pc-cygwin + exit ;; + *:MINGW*:*) + echo ${UNAME_MACHINE}-pc-mingw32 + exit ;; + i*:windows32*:*) + # uname -m includes "-pc" on this system. + echo ${UNAME_MACHINE}-mingw32 + exit ;; + i*:PW*:*) + echo ${UNAME_MACHINE}-pc-pw32 + exit ;; + *:Interix*:[3456]*) + case ${UNAME_MACHINE} in + x86) + echo i586-pc-interix${UNAME_RELEASE} + exit ;; + EM64T | authenticamd) + echo x86_64-unknown-interix${UNAME_RELEASE} + exit ;; + esac ;; + [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*) + echo i${UNAME_MACHINE}-pc-mks + exit ;; + i*:Windows_NT*:* | Pentium*:Windows_NT*:*) + # How do we know it's Interix rather than the generic POSIX subsystem? + # It also conflicts with pre-2.0 versions of AT&T UWIN. Should we + # UNAME_MACHINE based on the output of uname instead of i386? + echo i586-pc-interix + exit ;; + i*:UWIN*:*) + echo ${UNAME_MACHINE}-pc-uwin + exit ;; + amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit ;; + p*:CYGWIN*:*) + echo powerpcle-unknown-cygwin + exit ;; + prep*:SunOS:5.*:*) + echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` + exit ;; + *:GNU:*:*) + # the GNU system + echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` + exit ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + exit ;; + i*86:Minix:*:*) + echo ${UNAME_MACHINE}-pc-minix + exit ;; + arm*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + avr32*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + cris:Linux:*:*) + echo cris-axis-linux-gnu + exit ;; + crisv32:Linux:*:*) + echo crisv32-axis-linux-gnu + exit ;; + frv:Linux:*:*) + echo frv-unknown-linux-gnu + exit ;; + ia64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + m68*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + mips:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips + #undef mipsel + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mipsel + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + mips64:Linux:*:*) + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #undef CPU + #undef mips64 + #undef mips64el + #if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL) + CPU=mips64el + #else + #if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB) + CPU=mips64 + #else + CPU= + #endif + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^CPU/{ + s: ::g + p + }'`" + test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; } + ;; + or32:Linux:*:*) + echo or32-unknown-linux-gnu + exit ;; + ppc:Linux:*:*) + echo powerpc-unknown-linux-gnu + exit ;; + ppc64:Linux:*:*) + echo powerpc64-unknown-linux-gnu + exit ;; + alpha:Linux:*:*) + case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in + EV5) UNAME_MACHINE=alphaev5 ;; + EV56) UNAME_MACHINE=alphaev56 ;; + PCA56) UNAME_MACHINE=alphapca56 ;; + PCA57) UNAME_MACHINE=alphapca56 ;; + EV6) UNAME_MACHINE=alphaev6 ;; + EV67) UNAME_MACHINE=alphaev67 ;; + EV68*) UNAME_MACHINE=alphaev68 ;; + esac + objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null + if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi + echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + exit ;; + parisc:Linux:*:* | hppa:Linux:*:*) + # Look for CPU level + case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in + PA7*) echo hppa1.1-unknown-linux-gnu ;; + PA8*) echo hppa2.0-unknown-linux-gnu ;; + *) echo hppa-unknown-linux-gnu ;; + esac + exit ;; + parisc64:Linux:*:* | hppa64:Linux:*:*) + echo hppa64-unknown-linux-gnu + exit ;; + s390:Linux:*:* | s390x:Linux:*:*) + echo ${UNAME_MACHINE}-ibm-linux + exit ;; + sh64*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sh*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + sparc:Linux:*:* | sparc64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; + vax:Linux:*:*) + echo ${UNAME_MACHINE}-dec-linux-gnu + exit ;; + x86_64:Linux:*:*) + echo x86_64-unknown-linux-gnu + exit ;; + xtensa:Linux:*:*) + echo xtensa-unknown-linux-gnu + exit ;; + i*86:Linux:*:*) + # The BFD linker knows what the default object file format is, so + # first see if it will tell us. cd to the root directory to prevent + # problems with other programs or directories called `ld' in the path. + # Set LC_ALL=C to ensure ld outputs messages in English. + ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \ + | sed -ne '/supported targets:/!d + s/[ ][ ]*/ /g + s/.*supported targets: *// + s/ .*// + p'` + case "$ld_supported_targets" in + elf32-i386) + TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" + ;; + a.out-i386-linux) + echo "${UNAME_MACHINE}-pc-linux-gnuaout" + exit ;; + coff-i386) + echo "${UNAME_MACHINE}-pc-linux-gnucoff" + exit ;; + "") + # Either a pre-BFD a.out linker (linux-gnuoldld) or + # one that does not give us useful --help. + echo "${UNAME_MACHINE}-pc-linux-gnuoldld" + exit ;; + esac + # Determine whether the default compiler is a.out or elf + eval $set_cc_for_build + sed 's/^ //' << EOF >$dummy.c + #include <features.h> + #ifdef __ELF__ + # ifdef __GLIBC__ + # if __GLIBC__ >= 2 + LIBC=gnu + # else + LIBC=gnulibc1 + # endif + # else + LIBC=gnulibc1 + # endif + #else + #if defined(__INTEL_COMPILER) || defined(__PGI) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) + LIBC=gnu + #else + LIBC=gnuaout + #endif + #endif + #ifdef __dietlibc__ + LIBC=dietlibc + #endif +EOF + eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n ' + /^LIBC/{ + s: ::g + p + }'`" + test x"${LIBC}" != x && { + echo "${UNAME_MACHINE}-pc-linux-${LIBC}" + exit + } + test x"${TENTATIVE}" != x && { echo "${TENTATIVE}"; exit; } + ;; + i*86:DYNIX/ptx:4*:*) + # ptx 4.0 does uname -s correctly, with DYNIX/ptx in there. + # earlier versions are messed up and put the nodename in both + # sysname and nodename. + echo i386-sequent-sysv4 + exit ;; + i*86:UNIX_SV:4.2MP:2.*) + # Unixware is an offshoot of SVR4, but it has its own version + # number series starting with 2... + # I am not positive that other SVR4 systems won't match this, + # I just have to hope. -- rms. + # Use sysv4.2uw... so that sysv4* matches it. + echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION} + exit ;; + i*86:OS/2:*:*) + # If we were able to find `uname', then EMX Unix compatibility + # is probably installed. + echo ${UNAME_MACHINE}-pc-os2-emx + exit ;; + i*86:XTS-300:*:STOP) + echo ${UNAME_MACHINE}-unknown-stop + exit ;; + i*86:atheos:*:*) + echo ${UNAME_MACHINE}-unknown-atheos + exit ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit ;; + i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) + echo i386-unknown-lynxos${UNAME_RELEASE} + exit ;; + i*86:*DOS:*:*) + echo ${UNAME_MACHINE}-pc-msdosdjgpp + exit ;; + i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*) + UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'` + if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then + echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL} + else + echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL} + fi + exit ;; + i*86:*:5:[678]*) + # UnixWare 7.x, OpenUNIX and OpenServer 6. + case `/bin/uname -X | grep "^Machine"` in + *486*) UNAME_MACHINE=i486 ;; + *Pentium) UNAME_MACHINE=i586 ;; + *Pent*|*Celeron) UNAME_MACHINE=i686 ;; + esac + echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION} + exit ;; + i*86:*:3.2:*) + if test -f /usr/options/cb.name; then + UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name` + echo ${UNAME_MACHINE}-pc-isc$UNAME_REL + elif /bin/uname -X 2>/dev/null >/dev/null ; then + UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')` + (/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486 + (/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \ + && UNAME_MACHINE=i586 + (/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \ + && UNAME_MACHINE=i686 + (/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \ + && UNAME_MACHINE=i686 + echo ${UNAME_MACHINE}-pc-sco$UNAME_REL + else + echo ${UNAME_MACHINE}-pc-sysv32 + fi + exit ;; + pc:*:*:*) + # Left here for compatibility: + # uname -m prints for DJGPP always 'pc', but it prints nothing about + # the processor, so we play safe by assuming i386. + echo i386-pc-msdosdjgpp + exit ;; + Intel:Mach:3*:*) + echo i386-pc-mach3 + exit ;; + paragon:*:*:*) + echo i860-intel-osf1 + exit ;; + i860:*:4.*:*) # i860-SVR4 + if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then + echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4 + else # Add other i860-SVR4 vendors below as they are discovered. + echo i860-unknown-sysv${UNAME_RELEASE} # Unknown i860-SVR4 + fi + exit ;; + mini*:CTIX:SYS*5:*) + # "miniframe" + echo m68010-convergent-sysv + exit ;; + mc68k:UNIX:SYSTEM5:3.51m) + echo m68k-convergent-sysv + exit ;; + M680?0:D-NIX:5.3:*) + echo m68k-diab-dnix + exit ;; + M68*:*:R3V[5678]*:*) + test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;; + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) + OS_REL='' + test -r /etc/.relid \ + && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4.3${OS_REL}; exit; } + /bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \ + && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;; + 3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*) + /bin/uname -p 2>/dev/null | grep 86 >/dev/null \ + && { echo i486-ncr-sysv4; exit; } ;; + m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*) + echo m68k-unknown-lynxos${UNAME_RELEASE} + exit ;; + mc68030:UNIX_System_V:4.*:*) + echo m68k-atari-sysv4 + exit ;; + TSUNAMI:LynxOS:2.*:*) + echo sparc-unknown-lynxos${UNAME_RELEASE} + exit ;; + rs6000:LynxOS:2.*:*) + echo rs6000-unknown-lynxos${UNAME_RELEASE} + exit ;; + PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*) + echo powerpc-unknown-lynxos${UNAME_RELEASE} + exit ;; + SM[BE]S:UNIX_SV:*:*) + echo mips-dde-sysv${UNAME_RELEASE} + exit ;; + RM*:ReliantUNIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + RM*:SINIX-*:*:*) + echo mips-sni-sysv4 + exit ;; + *:SINIX-*:*:*) + if uname -p 2>/dev/null >/dev/null ; then + UNAME_MACHINE=`(uname -p) 2>/dev/null` + echo ${UNAME_MACHINE}-sni-sysv4 + else + echo ns32k-sni-sysv + fi + exit ;; + PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort + # says <Richard.M.Bartel@ccMail.Census.GOV> + echo i586-unisys-sysv4 + exit ;; + *:UNIX_System_V:4*:FTX*) + # From Gerald Hewes <hewes@openmarket.com>. + # How about differentiating between stratus architectures? -djm + echo hppa1.1-stratus-sysv4 + exit ;; + *:*:*:FTX*) + # From seanf@swdc.stratus.com. + echo i860-stratus-sysv4 + exit ;; + i*86:VOS:*:*) + # From Paul.Green@stratus.com. + echo ${UNAME_MACHINE}-stratus-vos + exit ;; + *:VOS:*:*) + # From Paul.Green@stratus.com. + echo hppa1.1-stratus-vos + exit ;; + mc68*:A/UX:*:*) + echo m68k-apple-aux${UNAME_RELEASE} + exit ;; + news*:NEWS-OS:6*:*) + echo mips-sony-newsos6 + exit ;; + R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*) + if [ -d /usr/nec ]; then + echo mips-nec-sysv${UNAME_RELEASE} + else + echo mips-unknown-sysv${UNAME_RELEASE} + fi + exit ;; + BeBox:BeOS:*:*) # BeOS running on hardware made by Be, PPC only. + echo powerpc-be-beos + exit ;; + BeMac:BeOS:*:*) # BeOS running on Mac or Mac clone, PPC only. + echo powerpc-apple-beos + exit ;; + BePC:BeOS:*:*) # BeOS running on Intel PC compatible. + echo i586-pc-beos + exit ;; + SX-4:SUPER-UX:*:*) + echo sx4-nec-superux${UNAME_RELEASE} + exit ;; + SX-5:SUPER-UX:*:*) + echo sx5-nec-superux${UNAME_RELEASE} + exit ;; + SX-6:SUPER-UX:*:*) + echo sx6-nec-superux${UNAME_RELEASE} + exit ;; + SX-7:SUPER-UX:*:*) + echo sx7-nec-superux${UNAME_RELEASE} + exit ;; + SX-8:SUPER-UX:*:*) + echo sx8-nec-superux${UNAME_RELEASE} + exit ;; + SX-8R:SUPER-UX:*:*) + echo sx8r-nec-superux${UNAME_RELEASE} + exit ;; + Power*:Rhapsody:*:*) + echo powerpc-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Rhapsody:*:*) + echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} + exit ;; + *:Darwin:*:*) + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in + unknown) UNAME_PROCESSOR=powerpc ;; + esac + echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} + exit ;; + *:procnto*:*:* | *:QNX:[0123456789]*:*) + UNAME_PROCESSOR=`uname -p` + if test "$UNAME_PROCESSOR" = "x86"; then + UNAME_PROCESSOR=i386 + UNAME_MACHINE=pc + fi + echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE} + exit ;; + *:QNX:*:4*) + echo i386-pc-qnx + exit ;; + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit ;; + NSR-?:NONSTOP_KERNEL:*:*) + echo nsr-tandem-nsk${UNAME_RELEASE} + exit ;; + *:NonStop-UX:*:*) + echo mips-compaq-nonstopux + exit ;; + BS2000:POSIX*:*:*) + echo bs2000-siemens-sysv + exit ;; + DS/*:UNIX_System_V:*:*) + echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE} + exit ;; + *:Plan9:*:*) + # "uname -m" is not consistent, so use $cputype instead. 386 + # is converted to i386 for consistency with other x86 + # operating systems. + if test "$cputype" = "386"; then + UNAME_MACHINE=i386 + else + UNAME_MACHINE="$cputype" + fi + echo ${UNAME_MACHINE}-unknown-plan9 + exit ;; + *:TOPS-10:*:*) + echo pdp10-unknown-tops10 + exit ;; + *:TENEX:*:*) + echo pdp10-unknown-tenex + exit ;; + KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*) + echo pdp10-dec-tops20 + exit ;; + XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*) + echo pdp10-xkl-tops20 + exit ;; + *:TOPS-20:*:*) + echo pdp10-unknown-tops20 + exit ;; + *:ITS:*:*) + echo pdp10-unknown-its + exit ;; + SEI:*:*:SEIUX) + echo mips-sei-seiux${UNAME_RELEASE} + exit ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms ; exit ;; + I*) echo ia64-dec-vms ; exit ;; + V*) echo vax-dec-vms ; exit ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit ;; + i*86:skyos:*:*) + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' + exit ;; + i*86:rdos:*:*) + echo ${UNAME_MACHINE}-pc-rdos + exit ;; +esac + +#echo '(No uname command or uname output not recognized.)' 1>&2 +#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2 + +eval $set_cc_for_build +cat >$dummy.c <<EOF +#ifdef _SEQUENT_ +# include <sys/types.h> +# include <sys/utsname.h> +#endif +main () +{ +#if defined (sony) +#if defined (MIPSEB) + /* BFD wants "bsd" instead of "newsos". Perhaps BFD should be changed, + I don't know.... */ + printf ("mips-sony-bsd\n"); exit (0); +#else +#include <sys/param.h> + printf ("m68k-sony-newsos%s\n", +#ifdef NEWSOS4 + "4" +#else + "" +#endif + ); exit (0); +#endif +#endif + +#if defined (__arm) && defined (__acorn) && defined (__unix) + printf ("arm-acorn-riscix\n"); exit (0); +#endif + +#if defined (hp300) && !defined (hpux) + printf ("m68k-hp-bsd\n"); exit (0); +#endif + +#if defined (NeXT) +#if !defined (__ARCHITECTURE__) +#define __ARCHITECTURE__ "m68k" +#endif + int version; + version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`; + if (version < 4) + printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version); + else + printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version); + exit (0); +#endif + +#if defined (MULTIMAX) || defined (n16) +#if defined (UMAXV) + printf ("ns32k-encore-sysv\n"); exit (0); +#else +#if defined (CMU) + printf ("ns32k-encore-mach\n"); exit (0); +#else + printf ("ns32k-encore-bsd\n"); exit (0); +#endif +#endif +#endif + +#if defined (__386BSD__) + printf ("i386-pc-bsd\n"); exit (0); +#endif + +#if defined (sequent) +#if defined (i386) + printf ("i386-sequent-dynix\n"); exit (0); +#endif +#if defined (ns32000) + printf ("ns32k-sequent-dynix\n"); exit (0); +#endif +#endif + +#if defined (_SEQUENT_) + struct utsname un; + + uname(&un); + + if (strncmp(un.version, "V2", 2) == 0) { + printf ("i386-sequent-ptx2\n"); exit (0); + } + if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */ + printf ("i386-sequent-ptx1\n"); exit (0); + } + printf ("i386-sequent-ptx\n"); exit (0); + +#endif + +#if defined (vax) +# if !defined (ultrix) +# include <sys/param.h> +# if defined (BSD) +# if BSD == 43 + printf ("vax-dec-bsd4.3\n"); exit (0); +# else +# if BSD == 199006 + printf ("vax-dec-bsd4.3reno\n"); exit (0); +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# endif +# else + printf ("vax-dec-bsd\n"); exit (0); +# endif +# else + printf ("vax-dec-ultrix\n"); exit (0); +# endif +#endif + +#if defined (alliant) && defined (i860) + printf ("i860-alliant-bsd\n"); exit (0); +#endif + + exit (1); +} +EOF + +$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` && + { echo "$SYSTEM_NAME"; exit; } + +# Apollos put the system type in the environment. + +test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; } + +# Convex versions that predate uname can use getsysinfo(1) + +if [ -x /usr/convex/getsysinfo ] +then + case `getsysinfo -f cpu_type` in + c1*) + echo c1-convex-bsd + exit ;; + c2*) + if getsysinfo -f scalar_acc + then echo c32-convex-bsd + else echo c2-convex-bsd + fi + exit ;; + c34*) + echo c34-convex-bsd + exit ;; + c38*) + echo c38-convex-bsd + exit ;; + c4*) + echo c4-convex-bsd + exit ;; + esac +fi + +cat >&2 <<EOF +$0: unable to guess system type + +This script, last modified $timestamp, has failed to recognize +the operating system you are using. It is advised that you +download the most up to date version of the config scripts from + + http://savannah.gnu.org/cgi-bin/viewcvs/*checkout*/config/config/config.guess +and + http://savannah.gnu.org/cgi-bin/viewcvs/*checkout*/config/config/config.sub + +If the version you run ($0) is already up to date, please +send the following data and any information you think might be +pertinent to <config-patches@gnu.org> in order to provide the needed +information to handle your system. + +config.guess timestamp = $timestamp + +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null` + +hostinfo = `(hostinfo) 2>/dev/null` +/bin/universe = `(/bin/universe) 2>/dev/null` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null` +/bin/arch = `(/bin/arch) 2>/dev/null` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null` + +UNAME_MACHINE = ${UNAME_MACHINE} +UNAME_RELEASE = ${UNAME_RELEASE} +UNAME_SYSTEM = ${UNAME_SYSTEM} +UNAME_VERSION = ${UNAME_VERSION} +EOF + +exit 1 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/config.h.in b/config.h.in new file mode 100644 index 0000000..f0a5c51 --- /dev/null +++ b/config.h.in @@ -0,0 +1,19 @@ +/* config.h.in. Generated automatically from configure.in by autoheader. */ + +/* Define if you have the getopt_long function. */ +#undef HAVE_GETOPT_LONG + +/* Define if you have the <getopt.h> header file. */ +#undef HAVE_GETOPT_H + +/* Define if you have the <memory.h> header file. */ +#undef HAVE_MEMORY_H + +/* Define if you have the <stdio.h> header file. */ +#undef HAVE_STDIO_H + +/* Define if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define if you have the c library (-lc). */ +#undef HAVE_LIBC diff --git a/config.sub b/config.sub new file mode 100755 index 0000000..a06a480 --- /dev/null +++ b/config.sub @@ -0,0 +1,1362 @@ +#! /bin/sh +# Configuration validation subroutine script. +# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +# Free Software Foundation, Inc. + +timestamp='2001-04-20' + +# This file is (in principle) common to ALL GNU software. +# The presence of a machine in this file suggests that SOME GNU software +# can handle that machine. It does not imply ALL GNU software can. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# As a special exception to the GNU General Public License, if you +# distribute this file as part of a program that contains a +# configuration script generated by Autoconf, you may include it under +# the same distribution terms that you use for the rest of that program. + +# Please send patches to <config-patches@gnu.org>. +# +# Configuration subroutine to validate and canonicalize a configuration type. +# Supply the specified configuration type as an argument. +# If it is invalid, we print an error message on stderr and exit with code 1. +# Otherwise, we print the canonical config type on stdout and succeed. + +# This file is supposed to be the same for all GNU packages +# and recognize all the CPU types, system types and aliases +# that are meaningful with *any* GNU software. +# Each package is responsible for reporting which valid configurations +# it does not support. The user should be able to distinguish +# a failure to support a valid configuration from a meaningless +# configuration. + +# The goal of this file is to map all the various variations of a given +# machine specification into a single specification in the form: +# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM +# or in some cases, the newer four-part form: +# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM +# It is wrong to echo any other type of specification. + +me=`echo "$0" | sed -e 's,.*/,,'` + +usage="\ +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS + +Canonicalize a configuration name. + +Operation modes: + -h, --help print this help, then exit + -t, --time-stamp print date of last modification, then exit + -v, --version print version number, then exit + +Report bugs and patches to <config-patches@gnu.org>." + +version="\ +GNU config.sub ($timestamp) + +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +Free Software Foundation, Inc. + +This is free software; see the source for copying conditions. There is NO +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." + +help=" +Try \`$me --help' for more information." + +# Parse command line +while test $# -gt 0 ; do + case $1 in + --time-stamp | --time* | -t ) + echo "$timestamp" ; exit 0 ;; + --version | -v ) + echo "$version" ; exit 0 ;; + --help | --h* | -h ) + echo "$usage"; exit 0 ;; + -- ) # Stop option processing + shift; break ;; + - ) # Use stdin as input. + break ;; + -* ) + echo "$me: invalid option $1$help" + exit 1 ;; + + *local*) + # First pass through any local machine types. + echo $1 + exit 0;; + + * ) + break ;; + esac +done + +case $# in + 0) echo "$me: missing argument$help" >&2 + exit 1;; + 1) ;; + *) echo "$me: too many arguments$help" >&2 + exit 1;; +esac + +# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any). +# Here we must recognize all the valid KERNEL-OS combinations. +maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` +case $maybe_os in + nto-qnx* | linux-gnu* | storm-chaos* | os2-emx*) + os=-$maybe_os + basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` + ;; + *) + basic_machine=`echo $1 | sed 's/-[^-]*$//'` + if [ $basic_machine != $1 ] + then os=`echo $1 | sed 's/.*-/-/'` + else os=; fi + ;; +esac + +### Let's recognize common machines as not being operating systems so +### that things like config.sub decstation-3100 work. We also +### recognize some manufacturers as not being operating systems, so we +### can provide default operating systems below. +case $os in + -sun*os*) + # Prevent following clause from handling this invalid input. + ;; + -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \ + -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \ + -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \ + -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ + -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ + -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ + -apple | -axis) + os= + basic_machine=$1 + ;; + -sim | -cisco | -oki | -wec | -winbond) + os= + basic_machine=$1 + ;; + -scout) + ;; + -wrs) + os=-vxworks + basic_machine=$1 + ;; + -hiux*) + os=-hiuxwe2 + ;; + -sco5) + os=-sco3.2v5 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco4) + os=-sco3.2v4 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2.[4-9]*) + os=`echo $os | sed -e 's/sco3.2./sco3.2v/'` + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco3.2v[4-9]*) + # Don't forget version if it is 3.2v4 or newer. + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -sco*) + os=-sco3.2v2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -udk*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -isc) + os=-isc2.2 + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -clix*) + basic_machine=clipper-intergraph + ;; + -isc*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'` + ;; + -lynx*) + os=-lynxos + ;; + -ptx*) + basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'` + ;; + -windowsnt*) + os=`echo $os | sed -e 's/windowsnt/winnt/'` + ;; + -psos*) + os=-psos + ;; + -mint | -mint[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; +esac + +# Decode aliases for certain CPU-COMPANY combinations. +case $basic_machine in + # Recognize the basic CPU types without company name. + # Some are omitted here because they have special meanings below. + tahoe | i860 | ia64 | m32r | m68k | m68000 | m88k | ns32k | arc \ + | arm | arme[lb] | arm[bl]e | armv[2345] | armv[345][lb] | strongarm | xscale \ + | pyramid | mn10200 | mn10300 | tron | a29k \ + | 580 | i960 | h8300 \ + | x86 | ppcbe | mipsbe | mipsle | shbe | shle \ + | hppa | hppa1.0 | hppa1.1 | hppa2.0 | hppa2.0w | hppa2.0n \ + | hppa64 \ + | alpha | alphaev[4-8] | alphaev56 | alphapca5[67] \ + | alphaev6[78] \ + | we32k | ns16k | clipper | i370 | sh | sh[34] \ + | powerpc | powerpcle \ + | 1750a | dsp16xx | pdp10 | pdp11 \ + | mips16 | mips64 | mipsel | mips64el \ + | mips64orion | mips64orionel | mipstx39 | mipstx39el \ + | mips64vr4300 | mips64vr4300el | mips64vr4100 | mips64vr4100el \ + | mips64vr5000 | miprs64vr5000el | mcore | s390 | s390x \ + | sparc | sparclet | sparclite | sparc64 | sparcv9 | sparcv9b \ + | v850 | c4x \ + | thumb | d10v | d30v | fr30 | avr | openrisc | tic80 \ + | pj | pjl | h8500) + basic_machine=$basic_machine-unknown + ;; + m6811 | m68hc11 | m6812 | m68hc12) + # Motorola 68HC11/12. + basic_machine=$basic_machine-unknown + os=-none + ;; + m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | z8k | v70 | w65) + ;; + + # We use `pc' rather than `unknown' + # because (1) that's what they normally are, and + # (2) the word "unknown" tends to confuse beginning users. + i*86 | x86_64) + basic_machine=$basic_machine-pc + ;; + # Object if more than one company name word. + *-*-*) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; + # Recognize the basic CPU types with company name. + # FIXME: clean up the formatting here. + vax-* | tahoe-* | i*86-* | i860-* | ia64-* | m32r-* | m68k-* | m68000-* \ + | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | c[123]* \ + | arm-* | armbe-* | armle-* | armv*-* | strongarm-* | xscale-* \ + | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \ + | power-* | none-* | 580-* | cray2-* | h8300-* | h8500-* | i960-* \ + | xmp-* | ymp-* \ + | x86-* | ppcbe-* | mipsbe-* | mipsle-* | shbe-* | shle-* \ + | hppa-* | hppa1.0-* | hppa1.1-* | hppa2.0-* | hppa2.0w-* \ + | hppa2.0n-* | hppa64-* \ + | alpha-* | alphaev[4-8]-* | alphaev56-* | alphapca5[67]-* \ + | alphaev6[78]-* \ + | we32k-* | cydra-* | ns16k-* | pn-* | np1-* | xps100-* \ + | clipper-* | orion-* \ + | sparclite-* | pdp10-* | pdp11-* | sh-* | powerpc-* | powerpcle-* \ + | sparc64-* | sparcv9-* | sparcv9b-* | sparc86x-* \ + | mips16-* | mips64-* | mipsel-* \ + | mips64el-* | mips64orion-* | mips64orionel-* \ + | mips64vr4100-* | mips64vr4100el-* | mips64vr4300-* | mips64vr4300el-* \ + | mipstx39-* | mipstx39el-* | mcore-* \ + | f30[01]-* | f700-* | s390-* | s390x-* | sv1-* | t3e-* \ + | [cjt]90-* \ + | m88110-* | m680[01234]0-* | m683?2-* | m68360-* | z8k-* | d10v-* \ + | thumb-* | v850-* | d30v-* | tic30-* | tic80-* | c30-* | fr30-* \ + | bs2000-* | tic54x-* | c54x-* | x86_64-* | pj-* | pjl-*) + ;; + # Recognize the various machine names and aliases which stand + # for a CPU type and a company and sometimes even an OS. + 386bsd) + basic_machine=i386-unknown + os=-bsd + ;; + 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc) + basic_machine=m68000-att + ;; + 3b*) + basic_machine=we32k-att + ;; + a29khif) + basic_machine=a29k-amd + os=-udi + ;; + adobe68k) + basic_machine=m68010-adobe + os=-scout + ;; + alliant | fx80) + basic_machine=fx80-alliant + ;; + altos | altos3068) + basic_machine=m68k-altos + ;; + am29k) + basic_machine=a29k-none + os=-bsd + ;; + amdahl) + basic_machine=580-amdahl + os=-sysv + ;; + amiga | amiga-*) + basic_machine=m68k-unknown + ;; + amigaos | amigados) + basic_machine=m68k-unknown + os=-amigaos + ;; + amigaunix | amix) + basic_machine=m68k-unknown + os=-sysv4 + ;; + apollo68) + basic_machine=m68k-apollo + os=-sysv + ;; + apollo68bsd) + basic_machine=m68k-apollo + os=-bsd + ;; + aux) + basic_machine=m68k-apple + os=-aux + ;; + balance) + basic_machine=ns32k-sequent + os=-dynix + ;; + convex-c1) + basic_machine=c1-convex + os=-bsd + ;; + convex-c2) + basic_machine=c2-convex + os=-bsd + ;; + convex-c32) + basic_machine=c32-convex + os=-bsd + ;; + convex-c34) + basic_machine=c34-convex + os=-bsd + ;; + convex-c38) + basic_machine=c38-convex + os=-bsd + ;; + cray | ymp) + basic_machine=ymp-cray + os=-unicos + ;; + cray2) + basic_machine=cray2-cray + os=-unicos + ;; + [cjt]90) + basic_machine=${basic_machine}-cray + os=-unicos + ;; + crds | unos) + basic_machine=m68k-crds + ;; + cris | cris-* | etrax*) + basic_machine=cris-axis + ;; + da30 | da30-*) + basic_machine=m68k-da30 + ;; + decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn) + basic_machine=mips-dec + ;; + delta | 3300 | motorola-3300 | motorola-delta \ + | 3300-motorola | delta-motorola) + basic_machine=m68k-motorola + ;; + delta88) + basic_machine=m88k-motorola + os=-sysv3 + ;; + dpx20 | dpx20-*) + basic_machine=rs6000-bull + os=-bosx + ;; + dpx2* | dpx2*-bull) + basic_machine=m68k-bull + os=-sysv3 + ;; + ebmon29k) + basic_machine=a29k-amd + os=-ebmon + ;; + elxsi) + basic_machine=elxsi-elxsi + os=-bsd + ;; + encore | umax | mmax) + basic_machine=ns32k-encore + ;; + es1800 | OSE68k | ose68k | ose | OSE) + basic_machine=m68k-ericsson + os=-ose + ;; + fx2800) + basic_machine=i860-alliant + ;; + genix) + basic_machine=ns32k-ns + ;; + gmicro) + basic_machine=tron-gmicro + os=-sysv + ;; + go32) + basic_machine=i386-pc + os=-go32 + ;; + h3050r* | hiux*) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + h8300hms) + basic_machine=h8300-hitachi + os=-hms + ;; + h8300xray) + basic_machine=h8300-hitachi + os=-xray + ;; + h8500hms) + basic_machine=h8500-hitachi + os=-hms + ;; + harris) + basic_machine=m88k-harris + os=-sysv3 + ;; + hp300-*) + basic_machine=m68k-hp + ;; + hp300bsd) + basic_machine=m68k-hp + os=-bsd + ;; + hp300hpux) + basic_machine=m68k-hp + os=-hpux + ;; + hp3k9[0-9][0-9] | hp9[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k2[0-9][0-9] | hp9k31[0-9]) + basic_machine=m68000-hp + ;; + hp9k3[2-9][0-9]) + basic_machine=m68k-hp + ;; + hp9k6[0-9][0-9] | hp6[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hp9k7[0-79][0-9] | hp7[0-79][0-9]) + basic_machine=hppa1.1-hp + ;; + hp9k78[0-9] | hp78[0-9]) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893) + # FIXME: really hppa2.0-hp + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][13679] | hp8[0-9][13679]) + basic_machine=hppa1.1-hp + ;; + hp9k8[0-9][0-9] | hp8[0-9][0-9]) + basic_machine=hppa1.0-hp + ;; + hppa-next) + os=-nextstep3 + ;; + hppaosf) + basic_machine=hppa1.1-hp + os=-osf + ;; + hppro) + basic_machine=hppa1.1-hp + os=-proelf + ;; + i370-ibm* | ibm*) + basic_machine=i370-ibm + ;; +# I'm not sure what "Sysv32" means. Should this be sysv3.2? + i*86v32) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv32 + ;; + i*86v4*) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv4 + ;; + i*86v) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-sysv + ;; + i*86sol2) + basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'` + os=-solaris2 + ;; + i386mach) + basic_machine=i386-mach + os=-mach + ;; + i386-vsta | vsta) + basic_machine=i386-unknown + os=-vsta + ;; + iris | iris4d) + basic_machine=mips-sgi + case $os in + -irix*) + ;; + *) + os=-irix4 + ;; + esac + ;; + isi68 | isi) + basic_machine=m68k-isi + os=-sysv + ;; + m88k-omron*) + basic_machine=m88k-omron + ;; + magnum | m3230) + basic_machine=mips-mips + os=-sysv + ;; + merlin) + basic_machine=ns32k-utek + os=-sysv + ;; + mingw32) + basic_machine=i386-pc + os=-mingw32 + ;; + miniframe) + basic_machine=m68000-convergent + ;; + *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*) + basic_machine=m68k-atari + os=-mint + ;; + mipsel*-linux*) + basic_machine=mipsel-unknown + os=-linux-gnu + ;; + mips*-linux*) + basic_machine=mips-unknown + os=-linux-gnu + ;; + mips3*-*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` + ;; + mips3*) + basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown + ;; + mmix*) + basic_machine=mmix-knuth + os=-mmixware + ;; + monitor) + basic_machine=m68k-rom68k + os=-coff + ;; + msdos) + basic_machine=i386-pc + os=-msdos + ;; + mvs) + basic_machine=i370-ibm + os=-mvs + ;; + ncr3000) + basic_machine=i486-ncr + os=-sysv4 + ;; + netbsd386) + basic_machine=i386-unknown + os=-netbsd + ;; + netwinder) + basic_machine=armv4l-rebel + os=-linux + ;; + news | news700 | news800 | news900) + basic_machine=m68k-sony + os=-newsos + ;; + news1000) + basic_machine=m68030-sony + os=-newsos + ;; + news-3600 | risc-news) + basic_machine=mips-sony + os=-newsos + ;; + necv70) + basic_machine=v70-nec + os=-sysv + ;; + next | m*-next ) + basic_machine=m68k-next + case $os in + -nextstep* ) + ;; + -ns2*) + os=-nextstep2 + ;; + *) + os=-nextstep3 + ;; + esac + ;; + nh3000) + basic_machine=m68k-harris + os=-cxux + ;; + nh[45]000) + basic_machine=m88k-harris + os=-cxux + ;; + nindy960) + basic_machine=i960-intel + os=-nindy + ;; + mon960) + basic_machine=i960-intel + os=-mon960 + ;; + nonstopux) + basic_machine=mips-compaq + os=-nonstopux + ;; + np1) + basic_machine=np1-gould + ;; + nsr-tandem) + basic_machine=nsr-tandem + ;; + op50n-* | op60c-*) + basic_machine=hppa1.1-oki + os=-proelf + ;; + OSE68000 | ose68000) + basic_machine=m68000-ericsson + os=-ose + ;; + os68k) + basic_machine=m68k-none + os=-os68k + ;; + pa-hitachi) + basic_machine=hppa1.1-hitachi + os=-hiuxwe2 + ;; + paragon) + basic_machine=i860-intel + os=-osf + ;; + pbd) + basic_machine=sparc-tti + ;; + pbb) + basic_machine=m68k-tti + ;; + pc532 | pc532-*) + basic_machine=ns32k-pc532 + ;; + pentium | p5 | k5 | k6 | nexgen) + basic_machine=i586-pc + ;; + pentiumpro | p6 | 6x86 | athlon) + basic_machine=i686-pc + ;; + pentiumii | pentium2) + basic_machine=i686-pc + ;; + pentium-* | p5-* | k5-* | k6-* | nexgen-*) + basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumpro-* | p6-* | 6x86-* | athlon-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pentiumii-* | pentium2-*) + basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + pn) + basic_machine=pn-gould + ;; + power) basic_machine=power-ibm + ;; + ppc) basic_machine=powerpc-unknown + ;; + ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ppcle | powerpclittle | ppc-le | powerpc-little) + basic_machine=powerpcle-unknown + ;; + ppcle-* | powerpclittle-*) + basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; + ps2) + basic_machine=i386-ibm + ;; + pw32) + basic_machine=i586-unknown + os=-pw32 + ;; + rom68k) + basic_machine=m68k-rom68k + os=-coff + ;; + rm[46]00) + basic_machine=mips-siemens + ;; + rtpc | rtpc-*) + basic_machine=romp-ibm + ;; + sa29200) + basic_machine=a29k-amd + os=-udi + ;; + sequent) + basic_machine=i386-sequent + ;; + sh) + basic_machine=sh-hitachi + os=-hms + ;; + sparclite-wrs) + basic_machine=sparclite-wrs + os=-vxworks + ;; + sps7) + basic_machine=m68k-bull + os=-sysv2 + ;; + spur) + basic_machine=spur-unknown + ;; + st2000) + basic_machine=m68k-tandem + ;; + stratus) + basic_machine=i860-stratus + os=-sysv4 + ;; + sun2) + basic_machine=m68000-sun + ;; + sun2os3) + basic_machine=m68000-sun + os=-sunos3 + ;; + sun2os4) + basic_machine=m68000-sun + os=-sunos4 + ;; + sun3os3) + basic_machine=m68k-sun + os=-sunos3 + ;; + sun3os4) + basic_machine=m68k-sun + os=-sunos4 + ;; + sun4os3) + basic_machine=sparc-sun + os=-sunos3 + ;; + sun4os4) + basic_machine=sparc-sun + os=-sunos4 + ;; + sun4sol2) + basic_machine=sparc-sun + os=-solaris2 + ;; + sun3 | sun3-*) + basic_machine=m68k-sun + ;; + sun4) + basic_machine=sparc-sun + ;; + sun386 | sun386i | roadrunner) + basic_machine=i386-sun + ;; + sv1) + basic_machine=sv1-cray + os=-unicos + ;; + symmetry) + basic_machine=i386-sequent + os=-dynix + ;; + t3e) + basic_machine=t3e-cray + os=-unicos + ;; + tic54x | c54x*) + basic_machine=tic54x-unknown + os=-coff + ;; + tx39) + basic_machine=mipstx39-unknown + ;; + tx39el) + basic_machine=mipstx39el-unknown + ;; + tower | tower-32) + basic_machine=m68k-ncr + ;; + udi29k) + basic_machine=a29k-amd + os=-udi + ;; + ultra3) + basic_machine=a29k-nyu + os=-sym1 + ;; + v810 | necv810) + basic_machine=v810-nec + os=-none + ;; + vaxv) + basic_machine=vax-dec + os=-sysv + ;; + vms) + basic_machine=vax-dec + os=-vms + ;; + vpp*|vx|vx-*) + basic_machine=f301-fujitsu + ;; + vxworks960) + basic_machine=i960-wrs + os=-vxworks + ;; + vxworks68) + basic_machine=m68k-wrs + os=-vxworks + ;; + vxworks29k) + basic_machine=a29k-wrs + os=-vxworks + ;; + w65*) + basic_machine=w65-wdc + os=-none + ;; + w89k-*) + basic_machine=hppa1.1-winbond + os=-proelf + ;; + xmp) + basic_machine=xmp-cray + os=-unicos + ;; + xps | xps100) + basic_machine=xps100-honeywell + ;; + z8k-*-coff) + basic_machine=z8k-unknown + os=-sim + ;; + none) + basic_machine=none-none + os=-none + ;; + +# Here we handle the default manufacturer of certain CPU types. It is in +# some cases the only manufacturer, in others, it is the most popular. + w89k) + basic_machine=hppa1.1-winbond + ;; + op50n) + basic_machine=hppa1.1-oki + ;; + op60c) + basic_machine=hppa1.1-oki + ;; + mips) + if [ x$os = x-linux-gnu ]; then + basic_machine=mips-unknown + else + basic_machine=mips-mips + fi + ;; + romp) + basic_machine=romp-ibm + ;; + rs6000) + basic_machine=rs6000-ibm + ;; + vax) + basic_machine=vax-dec + ;; + pdp10) + # there are many clones, so DEC is not a safe bet + basic_machine=pdp10-unknown + ;; + pdp11) + basic_machine=pdp11-dec + ;; + we32k) + basic_machine=we32k-att + ;; + sh3 | sh4) + basic_machine=sh-unknown + ;; + sparc | sparcv9 | sparcv9b) + basic_machine=sparc-sun + ;; + cydra) + basic_machine=cydra-cydrome + ;; + orion) + basic_machine=orion-highlevel + ;; + orion105) + basic_machine=clipper-highlevel + ;; + mac | mpw | mac-mpw) + basic_machine=m68k-apple + ;; + pmac | pmac-mpw) + basic_machine=powerpc-apple + ;; + c4x*) + basic_machine=c4x-none + os=-coff + ;; + *-unknown) + # Make sure to match an already-canonicalized machine name. + ;; + *) + echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 + exit 1 + ;; +esac + +# Here we canonicalize certain aliases for manufacturers. +case $basic_machine in + *-digital*) + basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'` + ;; + *-commodore*) + basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'` + ;; + *) + ;; +esac + +# Decode manufacturer-specific aliases for certain operating systems. + +if [ x"$os" != x"" ] +then +case $os in + # First match some system type aliases + # that might get confused with valid system types. + # -solaris* is a basic system type, with this one exception. + -solaris1 | -solaris1.*) + os=`echo $os | sed -e 's|solaris1|sunos4|'` + ;; + -solaris) + os=-solaris2 + ;; + -svr4*) + os=-sysv4 + ;; + -unixware*) + os=-sysv4.2uw + ;; + -gnu/linux*) + os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'` + ;; + # First accept the basic system types. + # The portable systems comes first. + # Each alternative MUST END IN A *, to match a version number. + # -sysv* is not here because it comes later, after sysvr4. + -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \ + | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\ + | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \ + | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \ + | -aos* \ + | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ + | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ + | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \ + | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ + | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ + | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ + | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \ + | -interix* | -uwin* | -rhapsody* | -darwin* | -opened* \ + | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ + | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* | -os2*) + # Remember, each alternative MUST END IN *, to match a version number. + ;; + -qnx*) + case $basic_machine in + x86-* | i*86-*) + ;; + *) + os=-nto$os + ;; + esac + ;; + -nto*) + os=-nto-qnx + ;; + -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \ + | -windows* | -osx | -abug | -netware* | -os9* | -beos* \ + | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*) + ;; + -mac*) + os=`echo $os | sed -e 's|mac|macos|'` + ;; + -linux*) + os=`echo $os | sed -e 's|linux|linux-gnu|'` + ;; + -sunos5*) + os=`echo $os | sed -e 's|sunos5|solaris2|'` + ;; + -sunos6*) + os=`echo $os | sed -e 's|sunos6|solaris3|'` + ;; + -opened*) + os=-openedition + ;; + -wince*) + os=-wince + ;; + -osfrose*) + os=-osfrose + ;; + -osf*) + os=-osf + ;; + -utek*) + os=-bsd + ;; + -dynix*) + os=-bsd + ;; + -acis*) + os=-aos + ;; + -386bsd) + os=-bsd + ;; + -ctix* | -uts*) + os=-sysv + ;; + -ns2 ) + os=-nextstep2 + ;; + -nsk*) + os=-nsk + ;; + # Preserve the version number of sinix5. + -sinix5.*) + os=`echo $os | sed -e 's|sinix|sysv|'` + ;; + -sinix*) + os=-sysv4 + ;; + -triton*) + os=-sysv3 + ;; + -oss*) + os=-sysv3 + ;; + -svr4) + os=-sysv4 + ;; + -svr3) + os=-sysv3 + ;; + -sysvr4) + os=-sysv4 + ;; + # This must come after -sysvr4. + -sysv*) + ;; + -ose*) + os=-ose + ;; + -es1800*) + os=-ose + ;; + -xenix) + os=-xenix + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + os=-mint + ;; + -none) + ;; + *) + # Get rid of the `-' at the beginning of $os. + os=`echo $os | sed 's/[^-]*-//'` + echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2 + exit 1 + ;; +esac +else + +# Here we handle the default operating systems that come with various machines. +# The value should be what the vendor currently ships out the door with their +# machine or put another way, the most popular os provided with the machine. + +# Note that if you're going to try to match "-MANUFACTURER" here (say, +# "-sun"), then you have to tell the case statement up towards the top +# that MANUFACTURER isn't an operating system. Otherwise, code above +# will signal an error saying that MANUFACTURER isn't an operating +# system, and we'll never get to this point. + +case $basic_machine in + *-acorn) + os=-riscix1.2 + ;; + arm*-rebel) + os=-linux + ;; + arm*-semi) + os=-aout + ;; + pdp10-*) + os=-tops20 + ;; + pdp11-*) + os=-none + ;; + *-dec | vax-*) + os=-ultrix4.2 + ;; + m68*-apollo) + os=-domain + ;; + i386-sun) + os=-sunos4.0.2 + ;; + m68000-sun) + os=-sunos3 + # This also exists in the configure program, but was not the + # default. + # os=-sunos4 + ;; + m68*-cisco) + os=-aout + ;; + mips*-cisco) + os=-elf + ;; + mips*-*) + os=-elf + ;; + *-tti) # must be before sparc entry or we get the wrong os. + os=-sysv3 + ;; + sparc-* | *-sun) + os=-sunos4.1.1 + ;; + *-be) + os=-beos + ;; + *-ibm) + os=-aix + ;; + *-wec) + os=-proelf + ;; + *-winbond) + os=-proelf + ;; + *-oki) + os=-proelf + ;; + *-hp) + os=-hpux + ;; + *-hitachi) + os=-hiux + ;; + i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent) + os=-sysv + ;; + *-cbm) + os=-amigaos + ;; + *-dg) + os=-dgux + ;; + *-dolphin) + os=-sysv3 + ;; + m68k-ccur) + os=-rtu + ;; + m88k-omron*) + os=-luna + ;; + *-next ) + os=-nextstep + ;; + *-sequent) + os=-ptx + ;; + *-crds) + os=-unos + ;; + *-ns) + os=-genix + ;; + i370-*) + os=-mvs + ;; + *-next) + os=-nextstep3 + ;; + *-gould) + os=-sysv + ;; + *-highlevel) + os=-bsd + ;; + *-encore) + os=-bsd + ;; + *-sgi) + os=-irix + ;; + *-siemens) + os=-sysv4 + ;; + *-masscomp) + os=-rtu + ;; + f30[01]-fujitsu | f700-fujitsu) + os=-uxpv + ;; + *-rom68k) + os=-coff + ;; + *-*bug) + os=-coff + ;; + *-apple) + os=-macos + ;; + *-atari*) + os=-mint + ;; + *) + os=-none + ;; +esac +fi + +# Here we handle the case where we know the os, and the CPU type, but not the +# manufacturer. We pick the logical manufacturer. +vendor=unknown +case $basic_machine in + *-unknown) + case $os in + -riscix*) + vendor=acorn + ;; + -sunos*) + vendor=sun + ;; + -aix*) + vendor=ibm + ;; + -beos*) + vendor=be + ;; + -hpux*) + vendor=hp + ;; + -mpeix*) + vendor=hp + ;; + -hiux*) + vendor=hitachi + ;; + -unos*) + vendor=crds + ;; + -dgux*) + vendor=dg + ;; + -luna*) + vendor=omron + ;; + -genix*) + vendor=ns + ;; + -mvs* | -opened*) + vendor=ibm + ;; + -ptx*) + vendor=sequent + ;; + -vxsim* | -vxworks*) + vendor=wrs + ;; + -aux*) + vendor=apple + ;; + -hms*) + vendor=hitachi + ;; + -mpw* | -macos*) + vendor=apple + ;; + -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*) + vendor=atari + ;; + esac + basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"` + ;; +esac + +echo $basic_machine$os +exit 0 + +# Local variables: +# eval: (add-hook 'write-file-hooks 'time-stamp) +# time-stamp-start: "timestamp='" +# time-stamp-format: "%:y-%02m-%02d" +# time-stamp-end: "'" +# End: diff --git a/configure b/configure new file mode 100755 index 0000000..47015d5 --- /dev/null +++ b/configure @@ -0,0 +1,4357 @@ +#! /bin/sh +# Guess values for system-dependent variables and create Makefiles. +# Generated by GNU Autoconf 2.59. +# +# Copyright (C) 2003 Free Software Foundation, Inc. +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2 + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2 + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + + +# Name of the host. +# hostname on some systems (SVR3.2, Linux) returns a bogus exit status, +# so uname gets run too. +ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q` + +exec 6>&1 + +# +# Initializations. +# +ac_default_prefix=/usr/local +ac_config_libobj_dir=. +cross_compiling=no +subdirs= +MFLAGS= +MAKEFLAGS= +SHELL=${CONFIG_SHELL-/bin/sh} + +# Maximum number of lines to put in a shell here document. +# This variable seems obsolete. It should probably be removed, and +# only ac_max_sed_lines should be used. +: ${ac_max_here_lines=38} + +# Identity of this package. +PACKAGE_NAME= +PACKAGE_TARNAME= +PACKAGE_VERSION= +PACKAGE_STRING= +PACKAGE_BUGREPORT= + +ac_unique_file="viterbi27.c" +# Factoring default headers for most tests. +ac_includes_default="\ +#include <stdio.h> +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#if HAVE_SYS_STAT_H +# include <sys/stat.h> +#endif +#if STDC_HEADERS +# include <stdlib.h> +# include <stddef.h> +#else +# if HAVE_STDLIB_H +# include <stdlib.h> +# endif +#endif +#if HAVE_STRING_H +# if !STDC_HEADERS && HAVE_MEMORY_H +# include <memory.h> +# endif +# include <string.h> +#endif +#if HAVE_STRINGS_H +# include <strings.h> +#endif +#if HAVE_INTTYPES_H +# include <inttypes.h> +#else +# if HAVE_STDINT_H +# include <stdint.h> +# endif +#endif +#if HAVE_UNISTD_H +# include <unistd.h> +#endif" + +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS SO_NAME VERSION CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os SH_LIB REBIND MLIBS ARCH_OPTION LIBOBJS LTLIBOBJS' +ac_subst_files='' + +# Initialize some variables set by options. +ac_init_help= +ac_init_version=false +# The variables have the same names as the options, with +# dashes changed to underlines. +cache_file=/dev/null +exec_prefix=NONE +no_create= +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +verbose= +x_includes=NONE +x_libraries=NONE + +# Installation directory options. +# These are left unexpanded so users can "make install exec_prefix=/foo" +# and all the variables that are supposed to be based on exec_prefix +# by default will actually change. +# Use braces instead of parens because sh, perl, etc. also accept them. +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datadir='${prefix}/share' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +libdir='${exec_prefix}/lib' +includedir='${prefix}/include' +oldincludedir='/usr/include' +infodir='${prefix}/info' +mandir='${prefix}/man' + +ac_prev= +for ac_option +do + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval "$ac_prev=\$ac_option" + ac_prev= + continue + fi + + ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'` + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case $ac_option in + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir=$ac_optarg ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build_alias ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build_alias=$ac_optarg ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file=$ac_optarg ;; + + --config-cache | -C) + cache_file=config.cache ;; + + -datadir | --datadir | --datadi | --datad | --data | --dat | --da) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ + | --da=*) + datadir=$ac_optarg ;; + + -disable-* | --disable-*) + ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + eval "enable_$ac_feature=no" ;; + + -enable-* | --enable-*) + ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid feature name: $ac_feature" >&2 + { (exit 1); exit 1; }; } + ac_feature=`echo $ac_feature | sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "enable_$ac_feature='$ac_optarg'" ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix=$ac_optarg ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he | -h) + ac_init_help=long ;; + -help=r* | --help=r* | --hel=r* | --he=r* | -hr*) + ac_init_help=recursive ;; + -help=s* | --help=s* | --hel=s* | --he=s* | -hs*) + ac_init_help=short ;; + + -host | --host | --hos | --ho) + ac_prev=host_alias ;; + -host=* | --host=* | --hos=* | --ho=*) + host_alias=$ac_optarg ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir=$ac_optarg ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir=$ac_optarg ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir=$ac_optarg ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir=$ac_optarg ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst \ + | --locals | --local | --loca | --loc | --lo) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* \ + | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) + localstatedir=$ac_optarg ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir=$ac_optarg ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c | -n) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir=$ac_optarg ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix=$ac_optarg ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix=$ac_optarg ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix=$ac_optarg ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name=$ac_optarg ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir=$ac_optarg ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir=$ac_optarg ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site=$ac_optarg ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir=$ac_optarg ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir=$ac_optarg ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target_alias ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target_alias=$ac_optarg ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers | -V) + ac_init_version=: ;; + + -with-* | --with-*) + ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package| sed 's/-/_/g'` + case $ac_option in + *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;; + *) ac_optarg=yes ;; + esac + eval "with_$ac_package='$ac_optarg'" ;; + + -without-* | --without-*) + ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'` + # Reject names that are not valid shell variable names. + expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid package name: $ac_package" >&2 + { (exit 1); exit 1; }; } + ac_package=`echo $ac_package | sed 's/-/_/g'` + eval "with_$ac_package=no" ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes=$ac_optarg ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries=$ac_optarg ;; + + -*) { echo "$as_me: error: unrecognized option: $ac_option +Try \`$0 --help' for more information." >&2 + { (exit 1); exit 1; }; } + ;; + + *=*) + ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='` + # Reject names that are not valid shell variable names. + expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null && + { echo "$as_me: error: invalid variable name: $ac_envvar" >&2 + { (exit 1); exit 1; }; } + ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` + eval "$ac_envvar='$ac_optarg'" + export $ac_envvar ;; + + *) + # FIXME: should be removed in autoconf 3.0. + echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && + echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option} + ;; + + esac +done + +if test -n "$ac_prev"; then + ac_option=--`echo $ac_prev | sed 's/_/-/g'` + { echo "$as_me: error: missing argument to $ac_option" >&2 + { (exit 1); exit 1; }; } +fi + +# Be sure to have absolute paths. +for ac_var in exec_prefix prefix +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* | NONE | '' ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# Be sure to have absolute paths. +for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \ + localstatedir libdir includedir oldincludedir infodir mandir +do + eval ac_val=$`echo $ac_var` + case $ac_val in + [\\/$]* | ?:[\\/]* ) ;; + *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2 + { (exit 1); exit 1; }; };; + esac +done + +# There might be people who depend on the old broken behavior: `$host' +# used to hold the argument of --host etc. +# FIXME: To remove some day. +build=$build_alias +host=$host_alias +target=$target_alias + +# FIXME: To remove some day. +if test "x$host_alias" != x; then + if test "x$build_alias" = x; then + cross_compiling=maybe + echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host. + If a cross compiler is detected then cross compile mode will be used." >&2 + elif test "x$build_alias" != "x$host_alias"; then + cross_compiling=yes + fi +fi + +ac_tool_prefix= +test -n "$host_alias" && ac_tool_prefix=$host_alias- + +test "$silent" = yes && exec 6>/dev/null + + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then its parent. + ac_confdir=`(dirname "$0") 2>/dev/null || +$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$0" : 'X\(//\)[^/]' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$0" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + srcdir=$ac_confdir + if test ! -r $srcdir/$ac_unique_file; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r $srcdir/$ac_unique_file; then + if test "$ac_srcdir_defaulted" = yes; then + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2 + { (exit 1); exit 1; }; } + else + { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2 + { (exit 1); exit 1; }; } + fi +fi +(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null || + { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2 + { (exit 1); exit 1; }; } +srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'` +ac_env_build_alias_set=${build_alias+set} +ac_env_build_alias_value=$build_alias +ac_cv_env_build_alias_set=${build_alias+set} +ac_cv_env_build_alias_value=$build_alias +ac_env_host_alias_set=${host_alias+set} +ac_env_host_alias_value=$host_alias +ac_cv_env_host_alias_set=${host_alias+set} +ac_cv_env_host_alias_value=$host_alias +ac_env_target_alias_set=${target_alias+set} +ac_env_target_alias_value=$target_alias +ac_cv_env_target_alias_set=${target_alias+set} +ac_cv_env_target_alias_value=$target_alias +ac_env_CC_set=${CC+set} +ac_env_CC_value=$CC +ac_cv_env_CC_set=${CC+set} +ac_cv_env_CC_value=$CC +ac_env_CFLAGS_set=${CFLAGS+set} +ac_env_CFLAGS_value=$CFLAGS +ac_cv_env_CFLAGS_set=${CFLAGS+set} +ac_cv_env_CFLAGS_value=$CFLAGS +ac_env_LDFLAGS_set=${LDFLAGS+set} +ac_env_LDFLAGS_value=$LDFLAGS +ac_cv_env_LDFLAGS_set=${LDFLAGS+set} +ac_cv_env_LDFLAGS_value=$LDFLAGS +ac_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_env_CPPFLAGS_value=$CPPFLAGS +ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set} +ac_cv_env_CPPFLAGS_value=$CPPFLAGS +ac_env_CPP_set=${CPP+set} +ac_env_CPP_value=$CPP +ac_cv_env_CPP_set=${CPP+set} +ac_cv_env_CPP_value=$CPP + +# +# Report the --help message. +# +if test "$ac_init_help" = "long"; then + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat <<_ACEOF +\`configure' configures this package to adapt to many kinds of systems. + +Usage: $0 [OPTION]... [VAR=VALUE]... + +To assign environment variables (e.g., CC, CFLAGS...), specify them as +VAR=VALUE. See below for descriptions of some of the useful variables. + +Defaults for the options are specified in brackets. + +Configuration: + -h, --help display this help and exit + --help=short display options specific to this package + --help=recursive display the short help of all the included packages + -V, --version display version information and exit + -q, --quiet, --silent do not print \`checking...' messages + --cache-file=FILE cache test results in FILE [disabled] + -C, --config-cache alias for \`--cache-file=config.cache' + -n, --no-create do not create output files + --srcdir=DIR find the sources in DIR [configure dir or \`..'] + +_ACEOF + + cat <<_ACEOF +Installation directories: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [PREFIX] + +By default, \`make install' will install all the files in +\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify +an installation prefix other than \`$ac_default_prefix' using \`--prefix', +for instance \`--prefix=\$HOME'. + +For better control, use the options below. + +Fine tuning of the installation directories: + --bindir=DIR user executables [EPREFIX/bin] + --sbindir=DIR system admin executables [EPREFIX/sbin] + --libexecdir=DIR program executables [EPREFIX/libexec] + --datadir=DIR read-only architecture-independent data [PREFIX/share] + --sysconfdir=DIR read-only single-machine data [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] + --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --libdir=DIR object code libraries [EPREFIX/lib] + --includedir=DIR C header files [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc [/usr/include] + --infodir=DIR info documentation [PREFIX/info] + --mandir=DIR man documentation [PREFIX/man] +_ACEOF + + cat <<\_ACEOF + +System types: + --build=BUILD configure for building on BUILD [guessed] + --host=HOST cross-compile to build programs to run on HOST [BUILD] + --target=TARGET configure for building compilers for TARGET [HOST] +_ACEOF +fi + +if test -n "$ac_init_help"; then + + cat <<\_ACEOF + +Some influential environment variables: + CC C compiler command + CFLAGS C compiler flags + LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a + nonstandard directory <lib dir> + CPPFLAGS C/C++ preprocessor flags, e.g. -I<include dir> if you have + headers in a nonstandard directory <include dir> + CPP C preprocessor + +Use these variables to override the choices made by `configure' or to help +it to find libraries and programs with nonstandard names/locations. + +_ACEOF +fi + +if test "$ac_init_help" = "recursive"; then + # If there are subdirs, report their specific --help. + ac_popdir=`pwd` + for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue + test -d $ac_dir || continue + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + cd $ac_dir + # Check for guested configure; otherwise get Cygnus style configure. + if test -f $ac_srcdir/configure.gnu; then + echo + $SHELL $ac_srcdir/configure.gnu --help=recursive + elif test -f $ac_srcdir/configure; then + echo + $SHELL $ac_srcdir/configure --help=recursive + elif test -f $ac_srcdir/configure.ac || + test -f $ac_srcdir/configure.in; then + echo + $ac_configure --help + else + echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + fi + cd $ac_popdir + done +fi + +test -n "$ac_init_help" && exit 0 +if $ac_init_version; then + cat <<\_ACEOF + +Copyright (C) 2003 Free Software Foundation, Inc. +This configure script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it. +_ACEOF + exit 0 +fi +exec 5>config.log +cat >&5 <<_ACEOF +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. + +It was created by $as_me, which was +generated by GNU Autoconf 2.59. Invocation command line was + + $ $0 $@ + +_ACEOF +{ +cat <<_ASUNAME +## --------- ## +## Platform. ## +## --------- ## + +hostname = `(hostname || uname -n) 2>/dev/null | sed 1q` +uname -m = `(uname -m) 2>/dev/null || echo unknown` +uname -r = `(uname -r) 2>/dev/null || echo unknown` +uname -s = `(uname -s) 2>/dev/null || echo unknown` +uname -v = `(uname -v) 2>/dev/null || echo unknown` + +/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown` +/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown` + +/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown` +/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown` +/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown` +hostinfo = `(hostinfo) 2>/dev/null || echo unknown` +/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown` +/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown` +/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown` + +_ASUNAME + +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + echo "PATH: $as_dir" +done + +} >&5 + +cat >&5 <<_ACEOF + + +## ----------- ## +## Core tests. ## +## ----------- ## + +_ACEOF + + +# Keep a trace of the command line. +# Strip out --no-create and --no-recursion so they do not pile up. +# Strip out --silent because we don't want to record it for future runs. +# Also quote any args containing shell meta-characters. +# Make two passes to allow for proper duplicate-argument suppression. +ac_configure_args= +ac_configure_args0= +ac_configure_args1= +ac_sep= +ac_must_keep_next=false +for ac_pass in 1 2 +do + for ac_arg + do + case $ac_arg in + -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + continue ;; + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + case $ac_pass in + 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;; + 2) + ac_configure_args1="$ac_configure_args1 '$ac_arg'" + if test $ac_must_keep_next = true; then + ac_must_keep_next=false # Got value, back to normal. + else + case $ac_arg in + *=* | --config-cache | -C | -disable-* | --disable-* \ + | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \ + | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \ + | -with-* | --with-* | -without-* | --without-* | --x) + case "$ac_configure_args0 " in + "$ac_configure_args1"*" '$ac_arg' "* ) continue ;; + esac + ;; + -* ) ac_must_keep_next=true ;; + esac + fi + ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'" + # Get rid of the leading space. + ac_sep=" " + ;; + esac + done +done +$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; } +$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; } + +# When interrupted or exit'd, cleanup temporary files, and complete +# config.log. We remove comments because anyway the quotes in there +# would cause problems or look ugly. +# WARNING: Be sure not to use single quotes in there, as some shells, +# such as our DU 5.0 friend, will then `close' the trap. +trap 'exit_status=$? + # Save into config.log some information that might help in debugging. + { + echo + + cat <<\_ASBOX +## ---------------- ## +## Cache variables. ## +## ---------------- ## +_ASBOX + echo + # The following way of writing the cache mishandles newlines in values, +{ + (set) 2>&1 | + case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in + *ac_space=\ *) + sed -n \ + "s/'"'"'/'"'"'\\\\'"'"''"'"'/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p" + ;; + *) + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} + echo + + cat <<\_ASBOX +## ----------------- ## +## Output variables. ## +## ----------------- ## +_ASBOX + echo + for ac_var in $ac_subst_vars + do + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" + done | sort + echo + + if test -n "$ac_subst_files"; then + cat <<\_ASBOX +## ------------- ## +## Output files. ## +## ------------- ## +_ASBOX + echo + for ac_var in $ac_subst_files + do + eval ac_val=$`echo $ac_var` + echo "$ac_var='"'"'$ac_val'"'"'" + done | sort + echo + fi + + if test -s confdefs.h; then + cat <<\_ASBOX +## ----------- ## +## confdefs.h. ## +## ----------- ## +_ASBOX + echo + sed "/^$/d" confdefs.h | sort + echo + fi + test "$ac_signal" != 0 && + echo "$as_me: caught signal $ac_signal" + echo "$as_me: exit $exit_status" + } >&5 + rm -f core *.core && + rm -rf conftest* confdefs* conf$$* $ac_clean_files && + exit $exit_status + ' 0 +for ac_signal in 1 2 13 15; do + trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal +done +ac_signal=0 + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -rf conftest* confdefs.h +# AIX cpp loses on an empty file, so make sure it contains at least a newline. +echo >confdefs.h + +# Predefined preprocessor variables. + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_NAME "$PACKAGE_NAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_TARNAME "$PACKAGE_TARNAME" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_VERSION "$PACKAGE_VERSION" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_STRING "$PACKAGE_STRING" +_ACEOF + + +cat >>confdefs.h <<_ACEOF +#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" +_ACEOF + + +# Let the site file select an alternate cache file if it wants to. +# Prefer explicitly selected file to automatically selected ones. +if test -z "$CONFIG_SITE"; then + if test "x$prefix" != xNONE; then + CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" + else + CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" + fi +fi +for ac_site_file in $CONFIG_SITE; do + if test -r "$ac_site_file"; then + { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5 +echo "$as_me: loading site script $ac_site_file" >&6;} + sed 's/^/| /' "$ac_site_file" >&5 + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + # Some versions of bash will fail to source /dev/null (special + # files actually), so we avoid doing that. + if test -f "$cache_file"; then + { echo "$as_me:$LINENO: loading cache $cache_file" >&5 +echo "$as_me: loading cache $cache_file" >&6;} + case $cache_file in + [\\/]* | ?:[\\/]* ) . $cache_file;; + *) . ./$cache_file;; + esac + fi +else + { echo "$as_me:$LINENO: creating cache $cache_file" >&5 +echo "$as_me: creating cache $cache_file" >&6;} + >$cache_file +fi + +# Check that the precious variables saved in the cache have kept the same +# value. +ac_cache_corrupted=false +for ac_var in `(set) 2>&1 | + sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do + eval ac_old_set=\$ac_cv_env_${ac_var}_set + eval ac_new_set=\$ac_env_${ac_var}_set + eval ac_old_val="\$ac_cv_env_${ac_var}_value" + eval ac_new_val="\$ac_env_${ac_var}_value" + case $ac_old_set,$ac_new_set in + set,) + { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,set) + { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5 +echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + ac_cache_corrupted=: ;; + ,);; + *) + if test "x$ac_old_val" != "x$ac_new_val"; then + { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5 +echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { echo "$as_me:$LINENO: former value: $ac_old_val" >&5 +echo "$as_me: former value: $ac_old_val" >&2;} + { echo "$as_me:$LINENO: current value: $ac_new_val" >&5 +echo "$as_me: current value: $ac_new_val" >&2;} + ac_cache_corrupted=: + fi;; + esac + # Pass precious variables to config.status. + if test "$ac_new_set" = set; then + case $ac_new_val in + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*) + ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *) ac_arg=$ac_var=$ac_new_val ;; + esac + case " $ac_configure_args " in + *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy. + *) ac_configure_args="$ac_configure_args '$ac_arg'" ;; + esac + fi +done +if $ac_cache_corrupted; then + { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5 +echo "$as_me: error: changes in the environment can compromise the build" >&2;} + { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5 +echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + + + + + + + + + + + + + + + + + + + ac_config_headers="$ac_config_headers config.h" + +SO_NAME=3 +VERSION=3.0.0 + + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. +set dummy ${ac_tool_prefix}gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="gcc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. +set dummy ${ac_tool_prefix}cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="${ac_tool_prefix}cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$ac_cv_prog_CC"; then + ac_ct_CC=$CC + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + CC=$ac_ct_CC +else + CC="$ac_cv_prog_CC" +fi + +fi +if test -z "$CC"; then + # Extract the first word of "cc", so it can be a program name with args. +set dummy cc; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + ac_prog_rejected=no +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then + ac_prog_rejected=yes + continue + fi + ac_cv_prog_CC="cc" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +if test $ac_prog_rejected = yes; then + # We found a bogon in the path, so make sure we never use it. + set dummy $ac_cv_prog_CC + shift + if test $# != 0; then + # We chose a different compiler from the bogus one. + # However, it has the same basename, so the bogon will be chosen + # first if we set CC to just the basename; use the full file name. + shift + ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" + fi +fi +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + +fi +if test -z "$CC"; then + if test -n "$ac_tool_prefix"; then + for ac_prog in cl + do + # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. +set dummy $ac_tool_prefix$ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_CC="$ac_tool_prefix$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +CC=$ac_cv_prog_CC +if test -n "$CC"; then + echo "$as_me:$LINENO: result: $CC" >&5 +echo "${ECHO_T}$CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$CC" && break + done +fi +if test -z "$CC"; then + ac_ct_CC=$CC + for ac_prog in cl +do + # Extract the first word of "$ac_prog", so it can be a program name with args. +set dummy $ac_prog; ac_word=$2 +echo "$as_me:$LINENO: checking for $ac_word" >&5 +echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6 +if test "${ac_cv_prog_ac_ct_CC+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if test -n "$ac_ct_CC"; then + ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +else +as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for ac_exec_ext in '' $ac_executable_extensions; do + if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CC="$ac_prog" + echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5 + break 2 + fi +done +done + +fi +fi +ac_ct_CC=$ac_cv_prog_ac_ct_CC +if test -n "$ac_ct_CC"; then + echo "$as_me:$LINENO: result: $ac_ct_CC" >&5 +echo "${ECHO_T}$ac_ct_CC" >&6 +else + echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6 +fi + + test -n "$ac_ct_CC" && break +done + + CC=$ac_ct_CC +fi + +fi + + +test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&5 +echo "$as_me: error: no acceptable C compiler found in \$PATH +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + +# Provide some information about the compiler. +echo "$as_me:$LINENO:" \ + "checking for C compiler version" >&5 +ac_compiler=`set X $ac_compile; echo $2` +{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5 + (eval $ac_compiler --version </dev/null >&5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v </dev/null >&5\"") >&5 + (eval $ac_compiler -v </dev/null >&5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } +{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V </dev/null >&5\"") >&5 + (eval $ac_compiler -V </dev/null >&5) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } + +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files a.out a.exe b.out" +# Try to create an executable without -o first, disregard a.out. +# It will help us diagnose broken compilers, and finding out an intuition +# of exeext. +echo "$as_me:$LINENO: checking for C compiler default output file name" >&5 +echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6 +ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5 + (eval $ac_link_default) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # Find the output, starting from the most likely. This scheme is +# not robust to junk in `.', hence go to wildcards (a.*) only as a last +# resort. + +# Be careful to initialize this variable, since it used to be cached. +# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile. +ac_cv_exeext= +# b.out is created by i960 compilers. +for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out +do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) + ;; + conftest.$ac_ext ) + # This is the source file. + ;; + [ab].out ) + # We found the default executable, but exeext='' is most + # certainly right. + break;; + *.* ) + ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + # FIXME: I believe we export ac_cv_exeext for Libtool, + # but it would be cool to find out if it's true. Does anybody + # maintain Libtool? --akim. + export ac_cv_exeext + break;; + * ) + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: C compiler cannot create executables +See \`config.log' for more details." >&5 +echo "$as_me: error: C compiler cannot create executables +See \`config.log' for more details." >&2;} + { (exit 77); exit 77; }; } +fi + +ac_exeext=$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_file" >&5 +echo "${ECHO_T}$ac_file" >&6 + +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether the C compiler works" >&5 +echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6 +# FIXME: These cross compiler hacks should be removed for Autoconf 3.0 +# If not cross compiling, check that we can run a simple program. +if test "$cross_compiling" != yes; then + if { ac_try='./$ac_file' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + cross_compiling=no + else + if test "$cross_compiling" = maybe; then + cross_compiling=yes + else + { { echo "$as_me:$LINENO: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot run C compiled programs. +If you meant to cross compile, use \`--host'. +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } + fi + fi +fi +echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6 + +rm -f a.out a.exe conftest$ac_cv_exeext b.out +ac_clean_files=$ac_clean_files_save +# Check the compiler produces executables we can run. If not, either +# the compiler is broken, or we cross compile. +echo "$as_me:$LINENO: checking whether we are cross compiling" >&5 +echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6 +echo "$as_me:$LINENO: result: $cross_compiling" >&5 +echo "${ECHO_T}$cross_compiling" >&6 + +echo "$as_me:$LINENO: checking for suffix of executables" >&5 +echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6 +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + # If both `conftest.exe' and `conftest' are `present' (well, observable) +# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will +# work properly (i.e., refer to `conftest.exe'), while it won't with +# `rm'. +for ac_file in conftest.exe conftest conftest.*; do + test -f "$ac_file" || continue + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;; + *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` + export ac_cv_exeext + break;; + * ) break;; + esac +done +else + { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of executables: cannot compile and link +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest$ac_cv_exeext +echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5 +echo "${ECHO_T}$ac_cv_exeext" >&6 + +rm -f conftest.$ac_ext +EXEEXT=$ac_cv_exeext +ac_exeext=$EXEEXT +echo "$as_me:$LINENO: checking for suffix of object files" >&5 +echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6 +if test "${ac_cv_objext+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.o conftest.obj +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; then + for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do + case $ac_file in + *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;; + *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'` + break;; + esac +done +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&5 +echo "$as_me: error: cannot compute suffix of object files: cannot compile +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +rm -f conftest.$ac_cv_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_objext" >&5 +echo "${ECHO_T}$ac_cv_objext" >&6 +OBJEXT=$ac_cv_objext +ac_objext=$OBJEXT +echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5 +echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6 +if test "${ac_cv_c_compiler_gnu+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ +#ifndef __GNUC__ + choke me +#endif + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_compiler_gnu=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_compiler_gnu=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_cv_c_compiler_gnu=$ac_compiler_gnu + +fi +echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5 +echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6 +GCC=`test $ac_compiler_gnu = yes && echo yes` +ac_test_CFLAGS=${CFLAGS+set} +ac_save_CFLAGS=$CFLAGS +CFLAGS="-g" +echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5 +echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_g+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_g=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_prog_cc_g=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_g" >&6 +if test "$ac_test_CFLAGS" = set; then + CFLAGS=$ac_save_CFLAGS +elif test $ac_cv_prog_cc_g = yes; then + if test "$GCC" = yes; then + CFLAGS="-g -O2" + else + CFLAGS="-g" + fi +else + if test "$GCC" = yes; then + CFLAGS="-O2" + else + CFLAGS= + fi +fi +echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5 +echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6 +if test "${ac_cv_prog_cc_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_prog_cc_stdc=no +ac_save_CC=$CC +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <stdarg.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ +struct buf { int x; }; +FILE * (*rcsopen) (struct buf *, struct stat *, int); +static char *e (p, i) + char **p; + int i; +{ + return p[i]; +} +static char *f (char * (*g) (char **, int), char **p, ...) +{ + char *s; + va_list v; + va_start (v,p); + s = g (p, va_arg (v,int)); + va_end (v); + return s; +} + +/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has + function prototypes and stuff, but not '\xHH' hex character constants. + These don't provoke an error unfortunately, instead are silently treated + as 'x'. The following induces an error, until -std1 is added to get + proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an + array size at least. It's necessary to write '\x00'==0 to get something + that's true only with -std1. */ +int osf4_cc_array ['\x00' == 0 ? 1 : -1]; + +int test (int i, double x); +struct s1 {int (*f) (int a);}; +struct s2 {int (*f) (double a);}; +int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); +int argc; +char **argv; +int +main () +{ +return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; + ; + return 0; +} +_ACEOF +# Don't try gcc -ansi; that turns off useful extensions and +# breaks some systems' header files. +# AIX -qlanglvl=ansi +# Ultrix and OSF/1 -std1 +# HP-UX 10.20 and later -Ae +# HP-UX older versions -Aa -D_HPUX_SOURCE +# SVR4 -Xc -D__EXTENSIONS__ +for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +do + CC="$ac_save_CC $ac_arg" + rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_prog_cc_stdc=$ac_arg +break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext +done +rm -f conftest.$ac_ext conftest.$ac_objext +CC=$ac_save_CC + +fi + +case "x$ac_cv_prog_cc_stdc" in + x|xno) + echo "$as_me:$LINENO: result: none needed" >&5 +echo "${ECHO_T}none needed" >&6 ;; + *) + echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5 +echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6 + CC="$CC $ac_cv_prog_cc_stdc" ;; +esac + +# Some people use a C++ compiler to compile C. Since we use `exit', +# in C++ we need to declare it. In case someone uses the same compiler +# for both compiling C and C++ we need to have the C++ compiler decide +# the declaration of exit, since it's the most demanding environment. +cat >conftest.$ac_ext <<_ACEOF +#ifndef __cplusplus + choke me +#endif +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + for ac_declaration in \ + '' \ + 'extern "C" void std::exit (int) throw (); using std::exit;' \ + 'extern "C" void std::exit (int); using std::exit;' \ + 'extern "C" void exit (int) throw ();' \ + 'extern "C" void exit (int);' \ + 'void exit (int);' +do + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +#include <stdlib.h> +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +continue +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_declaration +int +main () +{ +exit (42); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + break +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +done +rm -f conftest* +if test -n "$ac_declaration"; then + echo '#ifdef __cplusplus' >>confdefs.h + echo $ac_declaration >>confdefs.h + echo '#endif' >>confdefs.h +fi + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +if test $GCC != "yes" +then + { { echo "$as_me:$LINENO: error: Need GNU C compiler" >&5 +echo "$as_me: error: Need GNU C compiler" >&2;} + { (exit 1); exit 1; }; } +fi + + +echo "$as_me:$LINENO: checking for malloc in -lc" >&5 +echo $ECHO_N "checking for malloc in -lc... $ECHO_C" >&6 +if test "${ac_cv_lib_c_malloc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_check_lib_save_LIBS=$LIBS +LIBS="-lc $LIBS" +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char malloc (); +int +main () +{ +malloc (); + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_lib_c_malloc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_lib_c_malloc=no +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +echo "$as_me:$LINENO: result: $ac_cv_lib_c_malloc" >&5 +echo "${ECHO_T}$ac_cv_lib_c_malloc" >&6 +if test $ac_cv_lib_c_malloc = yes; then + cat >>confdefs.h <<_ACEOF +#define HAVE_LIBC 1 +_ACEOF + + LIBS="-lc $LIBS" + +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu +echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5 +echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6 +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then + if test "${ac_cv_prog_CPP+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + # Double quotes because CPP needs to be expanded + for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp" + do + ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since + # <limits.h> exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include <limits.h> +#else +# include <assert.h> +#endif + Syntax error +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether non-existent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <ac_nonexistent.h> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + break +fi + + done + ac_cv_prog_CPP=$CPP + +fi + CPP=$ac_cv_prog_CPP +else + ac_cv_prog_CPP=$CPP +fi +echo "$as_me:$LINENO: result: $CPP" >&5 +echo "${ECHO_T}$CPP" >&6 +ac_preproc_ok=false +for ac_c_preproc_warn_flag in '' yes +do + # Use a header file that comes with gcc, so configuring glibc + # with a fresh cross-compiler works. + # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since + # <limits.h> exists even on freestanding compilers. + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. "Syntax error" is here to catch this case. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#ifdef __STDC__ +# include <limits.h> +#else +# include <assert.h> +#endif + Syntax error +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + : +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Broken: fails on valid input. +continue +fi +rm -f conftest.err conftest.$ac_ext + + # OK, works on sane cases. Now check whether non-existent headers + # can be detected and how. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <ac_nonexistent.h> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + # Broken: success on invalid input. +continue +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + # Passes both tests. +ac_preproc_ok=: +break +fi +rm -f conftest.err conftest.$ac_ext + +done +# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped. +rm -f conftest.err conftest.$ac_ext +if $ac_preproc_ok; then + : +else + { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&5 +echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check +See \`config.log' for more details." >&2;} + { (exit 1); exit 1; }; } +fi + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + +echo "$as_me:$LINENO: checking for egrep" >&5 +echo $ECHO_N "checking for egrep... $ECHO_C" >&6 +if test "${ac_cv_prog_egrep+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + if echo a | (grep -E '(a|b)') >/dev/null 2>&1 + then ac_cv_prog_egrep='grep -E' + else ac_cv_prog_egrep='egrep' + fi +fi +echo "$as_me:$LINENO: result: $ac_cv_prog_egrep" >&5 +echo "${ECHO_T}$ac_cv_prog_egrep" >&6 + EGREP=$ac_cv_prog_egrep + + +echo "$as_me:$LINENO: checking for ANSI C header files" >&5 +echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6 +if test "${ac_cv_header_stdc+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <float.h> + +int +main () +{ + + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_header_stdc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_header_stdc=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <string.h> + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "memchr" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <stdlib.h> + +_ACEOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + $EGREP "free" >/dev/null 2>&1; then + : +else + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. + if test "$cross_compiling" = yes; then + : +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <ctype.h> +#if ((' ' & 0x0FF) == 0x020) +# define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#else +# define ISLOWER(c) \ + (('a' <= (c) && (c) <= 'i') \ + || ('j' <= (c) && (c) <= 'r') \ + || ('s' <= (c) && (c) <= 'z')) +# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c)) +#endif + +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int +main () +{ + int i; + for (i = 0; i < 256; i++) + if (XOR (islower (i), ISLOWER (i)) + || toupper (i) != TOUPPER (i)) + exit(2); + exit (0); +} +_ACEOF +rm -f conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { ac_try='./conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + : +else + echo "$as_me: program exited with status $ac_status" >&5 +echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +( exit $ac_status ) +ac_cv_header_stdc=no +fi +rm -f core *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext +fi +fi +fi +echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5 +echo "${ECHO_T}$ac_cv_header_stdc" >&6 +if test $ac_cv_header_stdc = yes; then + +cat >>confdefs.h <<\_ACEOF +#define STDC_HEADERS 1 +_ACEOF + +fi + +# On IRIX 5.3, sys/types and inttypes.h are conflicting. + + + + + + + + + +for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \ + inttypes.h stdint.h unistd.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default + +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_Header=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +eval "$as_ac_Header=no" +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + + + + + +for ac_header in getopt.h stdio.h stdlib.h memory.h string.h +do +as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh` +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 +else + # Is the header compilable? +echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_header_compiler=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6 + +# Is the header present? +echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6 +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5 + (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi +rm -f conftest.err conftest.$ac_ext +echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6 + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + ( + cat <<\_ASBOX +## ------------------------------------------ ## +## Report this to the AC_PACKAGE_NAME lists. ## +## ------------------------------------------ ## +_ASBOX + ) | + sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6 +if eval "test \"\${$as_ac_Header+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6 + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + +if test -z "$HAVE_stdio.h" +then + { { echo "$as_me:$LINENO: error: Need stdio.h!" >&5 +echo "$as_me: error: Need stdio.h!" >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "$HAVE_stdlib.h" +then + { { echo "$as_me:$LINENO: error: Need stdlib.h!" >&5 +echo "$as_me: error: Need stdlib.h!" >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "$HAVE_stdlib.h" +then + { { echo "$as_me:$LINENO: error: Need memory.h!" >&5 +echo "$as_me: error: Need memory.h!" >&2;} + { (exit 1); exit 1; }; } +fi +if test -z "$HAVE_string.h" +then + { { echo "$as_me:$LINENO: error: Need string.h" >&5 +echo "$as_me: error: Need string.h" >&2;} + { (exit 1); exit 1; }; } +fi + +ac_aux_dir= +for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do + if test -f $ac_dir/install-sh; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f $ac_dir/install.sh; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + elif test -f $ac_dir/shtool; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/shtool install -c" + break + fi +done +if test -z "$ac_aux_dir"; then + { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." >&5 +echo "$as_me: error: cannot find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." >&2;} + { (exit 1); exit 1; }; } +fi +ac_config_guess="$SHELL $ac_aux_dir/config.guess" +ac_config_sub="$SHELL $ac_aux_dir/config.sub" +ac_configure="$SHELL $ac_aux_dir/configure" # This should be Cygnus configure. + +# Make sure we can run config.sub. +$ac_config_sub sun4 >/dev/null 2>&1 || + { { echo "$as_me:$LINENO: error: cannot run $ac_config_sub" >&5 +echo "$as_me: error: cannot run $ac_config_sub" >&2;} + { (exit 1); exit 1; }; } + +echo "$as_me:$LINENO: checking build system type" >&5 +echo $ECHO_N "checking build system type... $ECHO_C" >&6 +if test "${ac_cv_build+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_build_alias=$build_alias +test -z "$ac_cv_build_alias" && + ac_cv_build_alias=`$ac_config_guess` +test -z "$ac_cv_build_alias" && + { { echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5 +echo "$as_me: error: cannot guess build type; you must specify one" >&2;} + { (exit 1); exit 1; }; } +ac_cv_build=`$ac_config_sub $ac_cv_build_alias` || + { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_build_alias failed" >&5 +echo "$as_me: error: $ac_config_sub $ac_cv_build_alias failed" >&2;} + { (exit 1); exit 1; }; } + +fi +echo "$as_me:$LINENO: result: $ac_cv_build" >&5 +echo "${ECHO_T}$ac_cv_build" >&6 +build=$ac_cv_build +build_cpu=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` +build_vendor=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` +build_os=`echo $ac_cv_build | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` + + +echo "$as_me:$LINENO: checking host system type" >&5 +echo $ECHO_N "checking host system type... $ECHO_C" >&6 +if test "${ac_cv_host+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_host_alias=$host_alias +test -z "$ac_cv_host_alias" && + ac_cv_host_alias=$ac_cv_build_alias +ac_cv_host=`$ac_config_sub $ac_cv_host_alias` || + { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_host_alias failed" >&5 +echo "$as_me: error: $ac_config_sub $ac_cv_host_alias failed" >&2;} + { (exit 1); exit 1; }; } + +fi +echo "$as_me:$LINENO: result: $ac_cv_host" >&5 +echo "${ECHO_T}$ac_cv_host" >&6 +host=$ac_cv_host +host_cpu=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` +host_vendor=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` +host_os=`echo $ac_cv_host | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` + + +echo "$as_me:$LINENO: checking target system type" >&5 +echo $ECHO_N "checking target system type... $ECHO_C" >&6 +if test "${ac_cv_target+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + ac_cv_target_alias=$target_alias +test "x$ac_cv_target_alias" = "x" && + ac_cv_target_alias=$ac_cv_host_alias +ac_cv_target=`$ac_config_sub $ac_cv_target_alias` || + { { echo "$as_me:$LINENO: error: $ac_config_sub $ac_cv_target_alias failed" >&5 +echo "$as_me: error: $ac_config_sub $ac_cv_target_alias failed" >&2;} + { (exit 1); exit 1; }; } + +fi +echo "$as_me:$LINENO: result: $ac_cv_target" >&5 +echo "${ECHO_T}$ac_cv_target" >&6 +target=$ac_cv_target +target_cpu=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\1/'` +target_vendor=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\2/'` +target_os=`echo $ac_cv_target | sed 's/^\([^-]*\)-\([^-]*\)-\(.*\)$/\3/'` + + +# The aliases save the names the user supplied, while $host etc. +# will get canonicalized. +test -n "$target_alias" && + test "$program_prefix$program_suffix$program_transform_name" = \ + NONENONEs,x,x, && + program_prefix=${target_alias}- +case $target_cpu in +i386|i486|i586|i686) + ARCH_OPTION="-march=$target_cpu" + MLIBS="viterbi27_mmx.o mmxbfly27.o viterbi27_sse.o ssebfly27.o viterbi27_sse2.o sse2bfly27.o \ + viterbi29_mmx.o mmxbfly29.o viterbi29_sse.o ssebfly29.o viterbi29_sse2.o sse2bfly29.o \ + viterbi39_sse2.o viterbi39_sse.o viterbi39_mmx.o \ + viterbi615_mmx.o viterbi615_sse.o viterbi615_sse2.o \ + dotprod_mmx.o dotprod_mmx_assist.o \ + dotprod_sse2.o dotprod_sse2_assist.o \ + peakval_mmx.o peakval_mmx_assist.o \ + peakval_sse.o peakval_sse_assist.o \ + peakval_sse2.o peakval_sse2_assist.o \ + sumsq.o sumsq_port.o \ + sumsq_sse2.o sumsq_sse2_assist.o \ + sumsq_mmx.o sumsq_mmx_assist.o \ + cpu_features.o cpu_mode_x86.o" + ;; +powerpc*) + ARCH_OPTION="-fno-common -faltivec" + MLIBS="viterbi27_av.o viterbi29_av.o viterbi39_av.o viterbi615_av.o \ + encode_rs_av.o \ + dotprod_av.o sumsq_av.o peakval_av.o cpu_mode_ppc.o" + ;; +*) + MLIBS= +esac +case $target_os in +darwin*) + SH_LIB=libfec.dylib + REBIND="" + ;; +*) + SH_LIB=libfec.so + REBIND=ldconfig + ;; +esac + + + + + + + + + +for ac_func in getopt_long memset memmove +do +as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` +echo "$as_me:$LINENO: checking for $ac_func" >&5 +echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6 +if eval "test \"\${$as_ac_var+set}\" = set"; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func. + For example, HP-UX 11i <limits.h> declares gettimeofday. */ +#define $ac_func innocuous_$ac_func + +/* System header to define __stub macros and hopefully few prototypes, + which can conflict with char $ac_func (); below. + Prefer <limits.h> to <assert.h> if __STDC__ is defined, since + <limits.h> exists even on freestanding compilers. */ + +#ifdef __STDC__ +# include <limits.h> +#else +# include <assert.h> +#endif + +#undef $ac_func + +/* Override any gcc2 internal prototype to avoid an error. */ +#ifdef __cplusplus +extern "C" +{ +#endif +/* We use char because int might match the return type of a gcc2 + builtin and then its argument prototype would still apply. */ +char $ac_func (); +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) +choke me +#else +char (*f) () = $ac_func; +#endif +#ifdef __cplusplus +} +#endif + +int +main () +{ +return f != $ac_func; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 + (eval $ac_link) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest$ac_exeext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + eval "$as_ac_var=yes" +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +eval "$as_ac_var=no" +fi +rm -f conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5 +echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6 +if test `eval echo '${'$as_ac_var'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + + ac_config_files="$ac_config_files makefile" +cat >confcache <<\_ACEOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs, see configure's option --config-cache. +# It is not useful on other systems. If it contains results you don't +# want to keep, you may remove or edit it. +# +# config.status only pays attention to the cache file if you give it +# the --recheck option to rerun configure. +# +# `ac_cv_env_foo' variables (set or unset) will be overridden when +# loading this file, other *unset* `ac_cv_foo' will be assigned the +# following values. + +_ACEOF + +# The following way of writing the cache mishandles newlines in values, +# but we know of no workaround that is simple, portable, and efficient. +# So, don't put newlines in cache variables' values. +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +{ + (set) 2>&1 | + case `(ac_space=' '; set | grep ac_space) 2>&1` in + *ac_space=\ *) + # `set' does not quote correctly, so add quotes (double-quote + # substitution turns \\\\ into \\, and sed turns \\ into \). + sed -n \ + "s/'/'\\\\''/g; + s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p" + ;; + *) + # `set' quotes correctly as required by POSIX, so do not add quotes. + sed -n \ + "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p" + ;; + esac; +} | + sed ' + t clear + : clear + s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + t end + /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ + : end' >>confcache +if diff $cache_file confcache >/dev/null 2>&1; then :; else + if test -w $cache_file; then + test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file" + cat confcache >$cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# VPATH may cause trouble with some makes, so we remove $(srcdir), +# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and +# trailing colons and then remove the whole line if VPATH becomes empty +# (actually we leave an empty line to preserve line numbers). +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=/{ +s/:*\$(srcdir):*/:/; +s/:*\${srcdir}:*/:/; +s/:*@srcdir@:*/:/; +s/^\([^=]*=[ ]*\):*/\1/; +s/:*$//; +s/^[^=]*=[ ]*$//; +}' +fi + +DEFS=-DHAVE_CONFIG_H + +ac_libobjs= +ac_ltlibobjs= +for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue + # 1. Remove the extension, and $U if already installed. + ac_i=`echo "$ac_i" | + sed 's/\$U\././;s/\.o$//;s/\.obj$//'` + # 2. Add them. + ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext" + ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo' +done +LIBOBJS=$ac_libobjs + +LTLIBOBJS=$ac_ltlibobjs + + + +: ${CONFIG_STATUS=./config.status} +ac_clean_files_save=$ac_clean_files +ac_clean_files="$ac_clean_files $CONFIG_STATUS" +{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5 +echo "$as_me: creating $CONFIG_STATUS" >&6;} +cat >$CONFIG_STATUS <<_ACEOF +#! $SHELL +# Generated by $as_me. +# Run this file to recreate the current configuration. +# Compiler output produced by configure, useful for debugging +# configure, is in config.log if it exists. + +debug=false +ac_cs_recheck=false +ac_cs_silent=false +SHELL=\${CONFIG_SHELL-$SHELL} +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +## --------------------- ## +## M4sh Initialization. ## +## --------------------- ## + +# Be Bourne compatible +if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then + emulate sh + NULLCMD=: + # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which + # is contrary to our usage. Disable this feature. + alias -g '${1+"$@"}'='"$@"' +elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then + set -o posix +fi +DUALCASE=1; export DUALCASE # for MKS sh + +# Support unset when possible. +if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then + as_unset=unset +else + as_unset=false +fi + + +# Work around bugs in pre-3.0 UWIN ksh. +$as_unset ENV MAIL MAILPATH +PS1='$ ' +PS2='> ' +PS4='+ ' + +# NLS nuisances. +for as_var in \ + LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \ + LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \ + LC_TELEPHONE LC_TIME +do + if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then + eval $as_var=C; export $as_var + else + $as_unset $as_var + fi +done + +# Required to use basename. +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then + as_basename=basename +else + as_basename=false +fi + + +# Name of the executable. +as_me=`$as_basename "$0" || +$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ + X"$0" : 'X\(//\)$' \| \ + X"$0" : 'X\(/\)$' \| \ + . : '\(.\)' 2>/dev/null || +echo X/"$0" | + sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; } + /^X\/\(\/\/\)$/{ s//\1/; q; } + /^X\/\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + + +# PATH needs CR, and LINENO needs CR and PATH. +# Avoid depending upon Character Ranges. +as_cr_letters='abcdefghijklmnopqrstuvwxyz' +as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ' +as_cr_Letters=$as_cr_letters$as_cr_LETTERS +as_cr_digits='0123456789' +as_cr_alnum=$as_cr_Letters$as_cr_digits + +# The user is always right. +if test "${PATH_SEPARATOR+set}" != set; then + echo "#! /bin/sh" >conf$$.sh + echo "exit 0" >>conf$$.sh + chmod +x conf$$.sh + if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then + PATH_SEPARATOR=';' + else + PATH_SEPARATOR=: + fi + rm -f conf$$.sh +fi + + + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" || { + # Find who we are. Look in the path if we contain no path at all + # relative or not. + case $0 in + *[\\/]* ) as_myself=$0 ;; + *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in $PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break +done + + ;; + esac + # We did not find ourselves, most probably we were run as `sh COMMAND' + # in which case we are not to be found in the path. + if test "x$as_myself" = x; then + as_myself=$0 + fi + if test ! -f "$as_myself"; then + { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5 +echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;} + { (exit 1); exit 1; }; } + fi + case $CONFIG_SHELL in + '') + as_save_IFS=$IFS; IFS=$PATH_SEPARATOR +for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH +do + IFS=$as_save_IFS + test -z "$as_dir" && as_dir=. + for as_base in sh bash ksh sh5; do + case $as_dir in + /*) + if ("$as_dir/$as_base" -c ' + as_lineno_1=$LINENO + as_lineno_2=$LINENO + as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null` + test "x$as_lineno_1" != "x$as_lineno_2" && + test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then + $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; } + $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; } + CONFIG_SHELL=$as_dir/$as_base + export CONFIG_SHELL + exec "$CONFIG_SHELL" "$0" ${1+"$@"} + fi;; + esac + done +done +;; + esac + + # Create $as_me.lineno as a copy of $as_myself, but with $LINENO + # uniformly replaced by the line number. The first 'sed' inserts a + # line-number line before each line; the second 'sed' does the real + # work. The second script uses 'N' to pair each line-number line + # with the numbered line, and appends trailing '-' during + # substitution so that $LINENO is not a special case at line end. + # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the + # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-) + sed '=' <$as_myself | + sed ' + N + s,$,-, + : loop + s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3, + t loop + s,-$,, + s,^['$as_cr_digits']*\n,, + ' >$as_me.lineno && + chmod +x $as_me.lineno || + { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5 +echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;} + { (exit 1); exit 1; }; } + + # Don't try to exec as it changes $[0], causing all sort of problems + # (the dirname of $[0] is not the place where we might find the + # original and so on. Autoconf is especially sensible to this). + . ./$as_me.lineno + # Exit status is that of the last command. + exit +} + + +case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in + *c*,-n*) ECHO_N= ECHO_C=' +' ECHO_T=' ' ;; + *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;; + *) ECHO_N= ECHO_C='\c' ECHO_T= ;; +esac + +if expr a : '\(a\)' >/dev/null 2>&1; then + as_expr=expr +else + as_expr=false +fi + +rm -f conf$$ conf$$.exe conf$$.file +echo >conf$$.file +if ln -s conf$$.file conf$$ 2>/dev/null; then + # We could just check for DJGPP; but this test a) works b) is more generic + # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04). + if test -f conf$$.exe; then + # Don't use ln at all; we don't have any links + as_ln_s='cp -p' + else + as_ln_s='ln -s' + fi +elif ln conf$$.file conf$$ 2>/dev/null; then + as_ln_s=ln +else + as_ln_s='cp -p' +fi +rm -f conf$$ conf$$.exe conf$$.file + +if mkdir -p . 2>/dev/null; then + as_mkdir_p=: +else + test -d ./-p && rmdir ./-p + as_mkdir_p=false +fi + +as_executable_p="test -f" + +# Sed expression to map a string onto a valid CPP name. +as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'" + +# Sed expression to map a string onto a valid variable name. +as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'" + + +# IFS +# We need space, tab and new line, in precisely that order. +as_nl=' +' +IFS=" $as_nl" + +# CDPATH. +$as_unset CDPATH + +exec 6>&1 + +# Open the log real soon, to keep \$[0] and so on meaningful, and to +# report actual input values of CONFIG_FILES etc. instead of their +# values after options handling. Logging --version etc. is OK. +exec 5>>config.log +{ + echo + sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX +## Running $as_me. ## +_ASBOX +} >&5 +cat >&5 <<_CSEOF + +This file was extended by $as_me, which was +generated by GNU Autoconf 2.59. Invocation command line was + + CONFIG_FILES = $CONFIG_FILES + CONFIG_HEADERS = $CONFIG_HEADERS + CONFIG_LINKS = $CONFIG_LINKS + CONFIG_COMMANDS = $CONFIG_COMMANDS + $ $0 $@ + +_CSEOF +echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5 +echo >&5 +_ACEOF + +# Files that config.status was made for. +if test -n "$ac_config_files"; then + echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_headers"; then + echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_links"; then + echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS +fi + +if test -n "$ac_config_commands"; then + echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS +fi + +cat >>$CONFIG_STATUS <<\_ACEOF + +ac_cs_usage="\ +\`$as_me' instantiates files from templates according to the +current configuration. + +Usage: $0 [OPTIONS] [FILE]... + + -h, --help print this help, then exit + -V, --version print version number, then exit + -q, --quiet do not print progress messages + -d, --debug don't remove temporary files + --recheck update $as_me by reconfiguring in the same conditions + --file=FILE[:TEMPLATE] + instantiate the configuration file FILE + --header=FILE[:TEMPLATE] + instantiate the configuration header FILE + +Configuration files: +$config_files + +Configuration headers: +$config_headers + +Report bugs to <bug-autoconf@gnu.org>." +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF +ac_cs_version="\\ +config.status +configured by $0, generated by GNU Autoconf 2.59, + with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\" + +Copyright (C) 2003 Free Software Foundation, Inc. +This config.status script is free software; the Free Software Foundation +gives unlimited permission to copy, distribute and modify it." +srcdir=$srcdir +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF +# If no file are specified by the user, then we need to provide default +# value. By we need to know if files were specified by the user. +ac_need_defaults=: +while test $# != 0 +do + case $1 in + --*=*) + ac_option=`expr "x$1" : 'x\([^=]*\)='` + ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'` + ac_shift=: + ;; + -*) + ac_option=$1 + ac_optarg=$2 + ac_shift=shift + ;; + *) # This is not an option, so the user has probably given explicit + # arguments. + ac_option=$1 + ac_need_defaults=false;; + esac + + case $ac_option in + # Handling of the options. +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + ac_cs_recheck=: ;; + --version | --vers* | -V ) + echo "$ac_cs_version"; exit 0 ;; + --he | --h) + # Conflict between --help and --header + { { echo "$as_me:$LINENO: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: ambiguous option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; };; + --help | --hel | -h ) + echo "$ac_cs_usage"; exit 0 ;; + --debug | --d* | -d ) + debug=: ;; + --file | --fil | --fi | --f ) + $ac_shift + CONFIG_FILES="$CONFIG_FILES $ac_optarg" + ac_need_defaults=false;; + --header | --heade | --head | --hea ) + $ac_shift + CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg" + ac_need_defaults=false;; + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil | --si | --s) + ac_cs_silent=: ;; + + # This is an error. + -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&5 +echo "$as_me: error: unrecognized option: $1 +Try \`$0 --help' for more information." >&2;} + { (exit 1); exit 1; }; } ;; + + *) ac_config_targets="$ac_config_targets $1" ;; + + esac + shift +done + +ac_configure_extra_args= + +if $ac_cs_silent; then + exec 6>/dev/null + ac_configure_extra_args="$ac_configure_extra_args --silent" +fi + +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF +if \$ac_cs_recheck; then + echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6 + exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion +fi + +_ACEOF + + + + + +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_config_target in $ac_config_targets +do + case "$ac_config_target" in + # Handling of arguments. + "makefile" ) CONFIG_FILES="$CONFIG_FILES makefile" ;; + "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;; + *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 +echo "$as_me: error: invalid argument: $ac_config_target" >&2;} + { (exit 1); exit 1; }; };; + esac +done + +# If the user did not use the arguments to specify the items to instantiate, +# then the envvar interface is used. Set only those that are not. +# We use the long form for the default assignment because of an extremely +# bizarre bug on SunOS 4.1.3. +if $ac_need_defaults; then + test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers +fi + +# Have a temporary directory for convenience. Make it in the build tree +# simply because there is no reason to put it here, and in addition, +# creating and moving files from /tmp can sometimes cause problems. +# Create a temporary directory, and hook for its removal unless debugging. +$debug || +{ + trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0 + trap '{ (exit 1); exit 1; }' 1 2 13 15 +} + +# Create a (secure) tmp directory for tmp files. + +{ + tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` && + test -n "$tmp" && test -d "$tmp" +} || +{ + tmp=./confstat$$-$RANDOM + (umask 077 && mkdir $tmp) +} || +{ + echo "$me: cannot create a temporary directory in ." >&2 + { (exit 1); exit 1; } +} + +_ACEOF + +cat >>$CONFIG_STATUS <<_ACEOF + +# +# CONFIG_FILES section. +# + +# No need to generate the scripts if there are no CONFIG_FILES. +# This happens for instance when ./config.status config.h +if test -n "\$CONFIG_FILES"; then + # Protect against being on the right side of a sed subst in config.status. + sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g; + s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF +s,@SHELL@,$SHELL,;t t +s,@PATH_SEPARATOR@,$PATH_SEPARATOR,;t t +s,@PACKAGE_NAME@,$PACKAGE_NAME,;t t +s,@PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t +s,@PACKAGE_VERSION@,$PACKAGE_VERSION,;t t +s,@PACKAGE_STRING@,$PACKAGE_STRING,;t t +s,@PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t +s,@exec_prefix@,$exec_prefix,;t t +s,@prefix@,$prefix,;t t +s,@program_transform_name@,$program_transform_name,;t t +s,@bindir@,$bindir,;t t +s,@sbindir@,$sbindir,;t t +s,@libexecdir@,$libexecdir,;t t +s,@datadir@,$datadir,;t t +s,@sysconfdir@,$sysconfdir,;t t +s,@sharedstatedir@,$sharedstatedir,;t t +s,@localstatedir@,$localstatedir,;t t +s,@libdir@,$libdir,;t t +s,@includedir@,$includedir,;t t +s,@oldincludedir@,$oldincludedir,;t t +s,@infodir@,$infodir,;t t +s,@mandir@,$mandir,;t t +s,@build_alias@,$build_alias,;t t +s,@host_alias@,$host_alias,;t t +s,@target_alias@,$target_alias,;t t +s,@DEFS@,$DEFS,;t t +s,@ECHO_C@,$ECHO_C,;t t +s,@ECHO_N@,$ECHO_N,;t t +s,@ECHO_T@,$ECHO_T,;t t +s,@LIBS@,$LIBS,;t t +s,@SO_NAME@,$SO_NAME,;t t +s,@VERSION@,$VERSION,;t t +s,@CC@,$CC,;t t +s,@CFLAGS@,$CFLAGS,;t t +s,@LDFLAGS@,$LDFLAGS,;t t +s,@CPPFLAGS@,$CPPFLAGS,;t t +s,@ac_ct_CC@,$ac_ct_CC,;t t +s,@EXEEXT@,$EXEEXT,;t t +s,@OBJEXT@,$OBJEXT,;t t +s,@CPP@,$CPP,;t t +s,@EGREP@,$EGREP,;t t +s,@build@,$build,;t t +s,@build_cpu@,$build_cpu,;t t +s,@build_vendor@,$build_vendor,;t t +s,@build_os@,$build_os,;t t +s,@host@,$host,;t t +s,@host_cpu@,$host_cpu,;t t +s,@host_vendor@,$host_vendor,;t t +s,@host_os@,$host_os,;t t +s,@target@,$target,;t t +s,@target_cpu@,$target_cpu,;t t +s,@target_vendor@,$target_vendor,;t t +s,@target_os@,$target_os,;t t +s,@SH_LIB@,$SH_LIB,;t t +s,@REBIND@,$REBIND,;t t +s,@MLIBS@,$MLIBS,;t t +s,@ARCH_OPTION@,$ARCH_OPTION,;t t +s,@LIBOBJS@,$LIBOBJS,;t t +s,@LTLIBOBJS@,$LTLIBOBJS,;t t +CEOF + +_ACEOF + + cat >>$CONFIG_STATUS <<\_ACEOF + # Split the substitutions into bite-sized pieces for seds with + # small command number limits, like on Digital OSF/1 and HP-UX. + ac_max_sed_lines=48 + ac_sed_frag=1 # Number of current file. + ac_beg=1 # First line for current file. + ac_end=$ac_max_sed_lines # Line after last line for current file. + ac_more_lines=: + ac_sed_cmds= + while $ac_more_lines; do + if test $ac_beg -gt 1; then + sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + else + sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag + fi + if test ! -s $tmp/subs.frag; then + ac_more_lines=false + else + # The purpose of the label and of the branching condition is to + # speed up the sed processing (if there are no `@' at all, there + # is no need to browse any of the substitutions). + # These are the two extra sed commands mentioned above. + (echo ':t + /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed + if test -z "$ac_sed_cmds"; then + ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed" + else + ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed" + fi + ac_sed_frag=`expr $ac_sed_frag + 1` + ac_beg=$ac_end + ac_end=`expr $ac_end + $ac_max_sed_lines` + fi + done + if test -z "$ac_sed_cmds"; then + ac_sed_cmds=cat + fi +fi # test -n "$CONFIG_FILES" + +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories. + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + ac_builddir=. + +if test "$ac_dir" != .; then + ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` + # A "../" for each directory in $ac_dir_suffix. + ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` +else + ac_dir_suffix= ac_top_builddir= +fi + +case $srcdir in + .) # No --srcdir option. We are building in place. + ac_srcdir=. + if test -z "$ac_top_builddir"; then + ac_top_srcdir=. + else + ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` + fi ;; + [\\/]* | ?:[\\/]* ) # Absolute path. + ac_srcdir=$srcdir$ac_dir_suffix; + ac_top_srcdir=$srcdir ;; + *) # Relative path. + ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix + ac_top_srcdir=$ac_top_builddir$srcdir ;; +esac + +# Do not use `cd foo && pwd` to compute absolute paths, because +# the directories may not exist. +case `pwd` in +.) ac_abs_builddir="$ac_dir";; +*) + case "$ac_dir" in + .) ac_abs_builddir=`pwd`;; + [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; + *) ac_abs_builddir=`pwd`/"$ac_dir";; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_builddir=${ac_top_builddir}.;; +*) + case ${ac_top_builddir}. in + .) ac_abs_top_builddir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; + *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_srcdir=$ac_srcdir;; +*) + case $ac_srcdir in + .) ac_abs_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; + *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; + esac;; +esac +case $ac_abs_builddir in +.) ac_abs_top_srcdir=$ac_top_srcdir;; +*) + case $ac_top_srcdir in + .) ac_abs_top_srcdir=$ac_abs_builddir;; + [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; + *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; + esac;; +esac + + + + if test x"$ac_file" != x-; then + { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + rm -f "$ac_file" + fi + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + configure_input= + else + configure_input="$ac_file. " + fi + configure_input=$configure_input"Generated from `echo $ac_file_in | + sed 's,.*/,,'` by configure." + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } +_ACEOF +cat >>$CONFIG_STATUS <<_ACEOF + sed "$ac_vpsub +$extrasub +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF +:t +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b +s,@configure_input@,$configure_input,;t t +s,@srcdir@,$ac_srcdir,;t t +s,@abs_srcdir@,$ac_abs_srcdir,;t t +s,@top_srcdir@,$ac_top_srcdir,;t t +s,@abs_top_srcdir@,$ac_abs_top_srcdir,;t t +s,@builddir@,$ac_builddir,;t t +s,@abs_builddir@,$ac_abs_builddir,;t t +s,@top_builddir@,$ac_top_builddir,;t t +s,@abs_top_builddir@,$ac_abs_top_builddir,;t t +" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out + rm -f $tmp/stdin + if test x"$ac_file" != x-; then + mv $tmp/out $ac_file + else + cat $tmp/out + rm -f $tmp/out + fi + +done +_ACEOF +cat >>$CONFIG_STATUS <<\_ACEOF + +# +# CONFIG_HEADER section. +# + +# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where +# NAME is the cpp macro being defined and VALUE is the value it is being given. +# +# ac_d sets the value in "#define NAME VALUE" lines. +ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)' +ac_dB='[ ].*$,\1#\2' +ac_dC=' ' +ac_dD=',;t' +# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE". +ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' +ac_uB='$,\1#\2define\3' +ac_uC=' ' +ac_uD=',;t' + +for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue + # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". + case $ac_file in + - | *:- | *:-:* ) # input from stdin + cat >$tmp/stdin + ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'` + ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;; + * ) ac_file_in=$ac_file.in ;; + esac + + test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5 +echo "$as_me: creating $ac_file" >&6;} + + # First look for the input files in the build tree, otherwise in the + # src tree. + ac_file_inputs=`IFS=: + for f in $ac_file_in; do + case $f in + -) echo $tmp/stdin ;; + [\\/$]*) + # Absolute (can't be DOS-style, as IFS=:) + test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + # Do quote $f, to prevent DOS paths from being IFS'd. + echo "$f";; + *) # Relative + if test -f "$f"; then + # Build tree + echo "$f" + elif test -f "$srcdir/$f"; then + # Source tree + echo "$srcdir/$f" + else + # /dev/null tree + { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5 +echo "$as_me: error: cannot find input file: $f" >&2;} + { (exit 1); exit 1; }; } + fi;; + esac + done` || { (exit 1); exit 1; } + # Remove the trailing spaces. + sed 's/[ ]*$//' $ac_file_inputs >$tmp/in + +_ACEOF + +# Transform confdefs.h into two sed scripts, `conftest.defines' and +# `conftest.undefs', that substitutes the proper values into +# config.h.in to produce config.h. The first handles `#define' +# templates, and the second `#undef' templates. +# And first: Protect against being on the right side of a sed subst in +# config.status. Protect against being in an unquoted here document +# in config.status. +rm -f conftest.defines conftest.undefs +# Using a here document instead of a string reduces the quoting nightmare. +# Putting comments in sed scripts is not portable. +# +# `end' is used to avoid that the second main sed command (meant for +# 0-ary CPP macros) applies to n-ary macro definitions. +# See the Autoconf documentation for `clear'. +cat >confdef2sed.sed <<\_ACEOF +s/[\\&,]/\\&/g +s,[\\$`],\\&,g +t clear +: clear +s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp +t end +s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp +: end +_ACEOF +# If some macros were called several times there might be several times +# the same #defines, which is useless. Nevertheless, we may not want to +# sort them, since we want the *last* AC-DEFINE to be honored. +uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines +sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs +rm -f confdef2sed.sed + +# This sed command replaces #undef with comments. This is necessary, for +# example, in the case of _POSIX_SOURCE, which is predefined and required +# on some systems where configure will not decide to define it. +cat >>conftest.undefs <<\_ACEOF +s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */, +_ACEOF + +# Break up conftest.defines because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS +echo ' if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS +echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS +echo ' :' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.defines >/dev/null +do + # Write a limited-size here document to $tmp/defines.sed. + echo ' cat >$tmp/defines.sed <<CEOF' >>$CONFIG_STATUS + # Speed up: don't consider the non `#define' lines. + echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS + # Work around the forget-to-reset-the-flag bug. + echo 't clr' >>$CONFIG_STATUS + echo ': clr' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS + echo 'CEOF + sed -f $tmp/defines.sed $tmp/in >$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail + rm -f conftest.defines + mv conftest.tail conftest.defines +done +rm -f conftest.defines +echo ' fi # grep' >>$CONFIG_STATUS +echo >>$CONFIG_STATUS + +# Break up conftest.undefs because some shells have a limit on the size +# of here documents, and old seds have small limits too (100 cmds). +echo ' # Handle all the #undef templates' >>$CONFIG_STATUS +rm -f conftest.tail +while grep . conftest.undefs >/dev/null +do + # Write a limited-size here document to $tmp/undefs.sed. + echo ' cat >$tmp/undefs.sed <<CEOF' >>$CONFIG_STATUS + # Speed up: don't consider the non `#undef' + echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS + # Work around the forget-to-reset-the-flag bug. + echo 't clr' >>$CONFIG_STATUS + echo ': clr' >>$CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS + echo 'CEOF + sed -f $tmp/undefs.sed $tmp/in >$tmp/out + rm -f $tmp/in + mv $tmp/out $tmp/in +' >>$CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail + rm -f conftest.undefs + mv conftest.tail conftest.undefs +done +rm -f conftest.undefs + +cat >>$CONFIG_STATUS <<\_ACEOF + # Let's still pretend it is `configure' which instantiates (i.e., don't + # use $as_me), people would be surprised to read: + # /* config.h. Generated by config.status. */ + if test x"$ac_file" = x-; then + echo "/* Generated by configure. */" >$tmp/config.h + else + echo "/* $ac_file. Generated by configure. */" >$tmp/config.h + fi + cat $tmp/in >>$tmp/config.h + rm -f $tmp/in + if test x"$ac_file" != x-; then + if diff $ac_file $tmp/config.h >/dev/null 2>&1; then + { echo "$as_me:$LINENO: $ac_file is unchanged" >&5 +echo "$as_me: $ac_file is unchanged" >&6;} + else + ac_dir=`(dirname "$ac_file") 2>/dev/null || +$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$ac_file" : 'X\(//\)[^/]' \| \ + X"$ac_file" : 'X\(//\)$' \| \ + X"$ac_file" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$ac_file" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + { if $as_mkdir_p; then + mkdir -p "$ac_dir" + else + as_dir="$ac_dir" + as_dirs= + while test ! -d "$as_dir"; do + as_dirs="$as_dir $as_dirs" + as_dir=`(dirname "$as_dir") 2>/dev/null || +$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ + X"$as_dir" : 'X\(//\)[^/]' \| \ + X"$as_dir" : 'X\(//\)$' \| \ + X"$as_dir" : 'X\(/\)' \| \ + . : '\(.\)' 2>/dev/null || +echo X"$as_dir" | + sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } + /^X\(\/\/\)[^/].*/{ s//\1/; q; } + /^X\(\/\/\)$/{ s//\1/; q; } + /^X\(\/\).*/{ s//\1/; q; } + s/.*/./; q'` + done + test ! -n "$as_dirs" || mkdir $as_dirs + fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 +echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} + { (exit 1); exit 1; }; }; } + + rm -f $ac_file + mv $tmp/config.h $ac_file + fi + else + cat $tmp/config.h + rm -f $tmp/config.h + fi +done +_ACEOF + +cat >>$CONFIG_STATUS <<\_ACEOF + +{ (exit 0); exit 0; } +_ACEOF +chmod +x $CONFIG_STATUS +ac_clean_files=$ac_clean_files_save + + +# configure is writing to config.log, and then calls config.status. +# config.status does its own redirection, appending to config.log. +# Unfortunately, on DOS this fails, as config.log is still kept open +# by configure, so config.status won't be able to write to it; its +# output is simply discarded. So we exec the FD to /dev/null, +# effectively closing config.log, so it can be properly (re)opened and +# appended to by config.status. When coming back to configure, we +# need to make the FD available again. +if test "$no_create" != yes; then + ac_cs_success=: + ac_config_status_args= + test "$silent" = yes && + ac_config_status_args="$ac_config_status_args --quiet" + exec 5>/dev/null + $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false + exec 5>>config.log + # Use ||, not &&, to avoid exiting from the if with $? = 1, which + # would make configure fail if this is the last instruction. + $ac_cs_success || { (exit 1); exit 1; } +fi + diff --git a/configure.in b/configure.in new file mode 100644 index 0000000..4e4110b --- /dev/null +++ b/configure.in @@ -0,0 +1,83 @@ +dnl Process this file with autoconf to produce a configure script. +AC_INIT(viterbi27.c) +AC_CONFIG_HEADER(config.h) +SO_NAME=3 +VERSION=3.0.0 +AC_SUBST(SO_NAME) +AC_SUBST(VERSION) + +dnl Checks for programs. +AC_PROG_CC +if test $GCC != "yes" +then + AC_MSG_ERROR([Need GNU C compiler]) +fi +dnl Checks for libraries. +AC_CHECK_LIB(c, malloc) + +dnl Checks for header files. +AC_CHECK_HEADERS(getopt.h stdio.h stdlib.h memory.h string.h) +if test -z "$HAVE_stdio.h" +then + AC_MSG_ERROR([Need stdio.h!]) +fi +if test -z "$HAVE_stdlib.h" +then + AC_MSG_ERROR([Need stdlib.h!]) +fi +if test -z "$HAVE_stdlib.h" +then + AC_MSG_ERROR([Need memory.h!]) +fi +if test -z "$HAVE_string.h" +then + AC_MSG_ERROR([Need string.h]) +fi + +AC_CANONICAL_SYSTEM +case $target_cpu in +i386|i486|i586|i686) + ARCH_OPTION="-march=$target_cpu" + MLIBS="viterbi27_mmx.o mmxbfly27.o viterbi27_sse.o ssebfly27.o viterbi27_sse2.o sse2bfly27.o \ + viterbi29_mmx.o mmxbfly29.o viterbi29_sse.o ssebfly29.o viterbi29_sse2.o sse2bfly29.o \ + viterbi39_sse2.o viterbi39_sse.o viterbi39_mmx.o \ + viterbi615_mmx.o viterbi615_sse.o viterbi615_sse2.o \ + dotprod_mmx.o dotprod_mmx_assist.o \ + dotprod_sse2.o dotprod_sse2_assist.o \ + peakval_mmx.o peakval_mmx_assist.o \ + peakval_sse.o peakval_sse_assist.o \ + peakval_sse2.o peakval_sse2_assist.o \ + sumsq.o sumsq_port.o \ + sumsq_sse2.o sumsq_sse2_assist.o \ + sumsq_mmx.o sumsq_mmx_assist.o \ + cpu_features.o cpu_mode_x86.o" + ;; +powerpc*) + ARCH_OPTION="-fno-common -faltivec" + MLIBS="viterbi27_av.o viterbi29_av.o viterbi39_av.o viterbi615_av.o \ + encode_rs_av.o \ + dotprod_av.o sumsq_av.o peakval_av.o cpu_mode_ppc.o" + ;; +*) + MLIBS= +esac +case $target_os in +darwin*) + SH_LIB=libfec.dylib + REBIND="" + ;; +*) + SH_LIB=libfec.so + REBIND=ldconfig + ;; +esac +AC_SUBST(SH_LIB) +AC_SUBST(REBIND) +AC_SUBST(MLIBS) +AC_SUBST(ARCH_OPTION) + + +dnl Checks for library functions. +AC_CHECK_FUNCS(getopt_long memset memmove) + +AC_OUTPUT(makefile) diff --git a/cpu_features.s b/cpu_features.s new file mode 100644 index 0000000..ef4ba4e --- /dev/null +++ b/cpu_features.s @@ -0,0 +1,15 @@ +.text +.global cpu_features + .type cpu_features,@function +cpu_features: + pushl %ebx + pushl %ecx + pushl %edx + movl $1,%eax + cpuid + movl %edx,%eax + popl %edx + popl %ecx + popl %ebx + ret +
\ No newline at end of file diff --git a/cpu_mode_ppc.c b/cpu_mode_ppc.c new file mode 100644 index 0000000..0071558 --- /dev/null +++ b/cpu_mode_ppc.c @@ -0,0 +1,40 @@ +/* Determine CPU support for SIMD on Power PC + * Copyright 2004 Phil Karn, KA9Q + */ +#include <stdio.h> +#include "fec.h" +#ifdef __VEC__ +#include <sys/sysctl.h> +#endif + +/* Various SIMD instruction set names */ +char *Cpu_modes[] = {"Unknown","Portable C","x86 Multi Media Extensions (MMX)", + "x86 Streaming SIMD Extensions (SSE)", + "x86 Streaming SIMD Extensions 2 (SSE2)", + "PowerPC G4/G5 Altivec/Velocity Engine"}; + +enum cpu_mode Cpu_mode; + +void find_cpu_mode(void){ + + if(Cpu_mode != UNKNOWN) + return; + +#ifdef __VEC__ + { + /* Ask the OS if we have Altivec support */ + int selectors[2] = { CTL_HW, HW_VECTORUNIT }; + int hasVectorUnit = 0; + size_t length = sizeof(hasVectorUnit); + int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); + if(0 == error && hasVectorUnit) + Cpu_mode = ALTIVEC; + else + Cpu_mode = PORT; + } +#else + Cpu_mode = PORT; +#endif + + fprintf(stderr,"SIMD CPU detect: %s\n",Cpu_modes[Cpu_mode]); +} diff --git a/cpu_mode_x86.c b/cpu_mode_x86.c new file mode 100644 index 0000000..322018e --- /dev/null +++ b/cpu_mode_x86.c @@ -0,0 +1,33 @@ +/* Determine CPU support for SIMD + * Copyright 2004 Phil Karn, KA9Q + */ +#include <stdio.h> +#include "fec.h" + +/* Various SIMD instruction set names */ +char *Cpu_modes[] = {"Unknown","Portable C","x86 Multi Media Extensions (MMX)", + "x86 Streaming SIMD Extensions (SSE)", + "x86 Streaming SIMD Extensions 2 (SSE2)", + "PowerPC G4/G5 Altivec/Velocity Engine"}; + +enum cpu_mode Cpu_mode; + +void find_cpu_mode(void){ + + int f; + if(Cpu_mode != UNKNOWN) + return; + + /* Figure out what kind of CPU we have */ + f = cpu_features(); + if(f & (1<<26)){ /* SSE2 is present */ + Cpu_mode = SSE2; + } else if(f & (1<<25)){ /* SSE is present */ + Cpu_mode = SSE; + } else if(f & (1<<23)){ /* MMX is present */ + Cpu_mode = MMX; + } else { /* No SIMD at all */ + Cpu_mode = PORT; + } + fprintf(stderr,"SIMD CPU detect: %s\n",Cpu_modes[Cpu_mode]); +} diff --git a/decode_rs.c b/decode_rs.c new file mode 100644 index 0000000..d7f97b3 --- /dev/null +++ b/decode_rs.c @@ -0,0 +1,262 @@ +/* Reed-Solomon decoder + * Copyright 2002 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +#ifdef DEBUG +#include <stdio.h> +#endif + +#include <string.h> + +#define NULL ((void *)0) +#define min(a,b) ((a) < (b) ? (a) : (b)) + +#ifdef FIXED +#include "fixed.h" +#elif defined(BIGSYM) +#include "int.h" +#else +#include "char.h" +#endif + +int DECODE_RS( +#ifdef FIXED +data_t *data, int *eras_pos, int no_eras,int pad){ +#else +void *p,data_t *data, int *eras_pos, int no_eras){ + struct rs *rs = (struct rs *)p; +#endif + int deg_lambda, el, deg_omega; + int i, j, r,k; + data_t u,q,tmp,num1,num2,den,discr_r; + data_t lambda[NROOTS+1], s[NROOTS]; /* Err+Eras Locator poly + * and syndrome poly */ + data_t b[NROOTS+1], t[NROOTS+1], omega[NROOTS+1]; + data_t root[NROOTS], reg[NROOTS+1], loc[NROOTS]; + int syn_error, count; + +#ifdef FIXED + /* Check pad parameter for validity */ + if(pad < 0 || pad >= NN) + return -1; +#endif + + /* form the syndromes; i.e., evaluate data(x) at roots of g(x) */ + for(i=0;i<NROOTS;i++) + s[i] = data[0]; + + for(j=1;j<NN-PAD;j++){ + for(i=0;i<NROOTS;i++){ + if(s[i] == 0){ + s[i] = data[j]; + } else { + s[i] = data[j] ^ ALPHA_TO[MODNN(INDEX_OF[s[i]] + (FCR+i)*PRIM)]; + } + } + } + + /* Convert syndromes to index form, checking for nonzero condition */ + syn_error = 0; + for(i=0;i<NROOTS;i++){ + syn_error |= s[i]; + s[i] = INDEX_OF[s[i]]; + } + + if (!syn_error) { + /* if syndrome is zero, data[] is a codeword and there are no + * errors to correct. So return data[] unmodified + */ + count = 0; + goto finish; + } + memset(&lambda[1],0,NROOTS*sizeof(lambda[0])); + lambda[0] = 1; + + if (no_eras > 0) { + /* Init lambda to be the erasure locator polynomial */ + lambda[1] = ALPHA_TO[MODNN(PRIM*(NN-1-eras_pos[0]))]; + for (i = 1; i < no_eras; i++) { + u = MODNN(PRIM*(NN-1-eras_pos[i])); + for (j = i+1; j > 0; j--) { + tmp = INDEX_OF[lambda[j - 1]]; + if(tmp != A0) + lambda[j] ^= ALPHA_TO[MODNN(u + tmp)]; + } + } + +#if DEBUG >= 1 + /* Test code that verifies the erasure locator polynomial just constructed + Needed only for decoder debugging. */ + + /* find roots of the erasure location polynomial */ + for(i=1;i<=no_eras;i++) + reg[i] = INDEX_OF[lambda[i]]; + + count = 0; + for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) { + q = 1; + for (j = 1; j <= no_eras; j++) + if (reg[j] != A0) { + reg[j] = MODNN(reg[j] + j); + q ^= ALPHA_TO[reg[j]]; + } + if (q != 0) + continue; + /* store root and error location number indices */ + root[count] = i; + loc[count] = k; + count++; + } + if (count != no_eras) { + printf("count = %d no_eras = %d\n lambda(x) is WRONG\n",count,no_eras); + count = -1; + goto finish; + } +#if DEBUG >= 2 + printf("\n Erasure positions as determined by roots of Eras Loc Poly:\n"); + for (i = 0; i < count; i++) + printf("%d ", loc[i]); + printf("\n"); +#endif +#endif + } + for(i=0;i<NROOTS+1;i++) + b[i] = INDEX_OF[lambda[i]]; + + /* + * Begin Berlekamp-Massey algorithm to determine error+erasure + * locator polynomial + */ + r = no_eras; + el = no_eras; + while (++r <= NROOTS) { /* r is the step number */ + /* Compute discrepancy at the r-th step in poly-form */ + discr_r = 0; + for (i = 0; i < r; i++){ + if ((lambda[i] != 0) && (s[r-i-1] != A0)) { + discr_r ^= ALPHA_TO[MODNN(INDEX_OF[lambda[i]] + s[r-i-1])]; + } + } + discr_r = INDEX_OF[discr_r]; /* Index form */ + if (discr_r == A0) { + /* 2 lines below: B(x) <-- x*B(x) */ + memmove(&b[1],b,NROOTS*sizeof(b[0])); + b[0] = A0; + } else { + /* 7 lines below: T(x) <-- lambda(x) - discr_r*x*b(x) */ + t[0] = lambda[0]; + for (i = 0 ; i < NROOTS; i++) { + if(b[i] != A0) + t[i+1] = lambda[i+1] ^ ALPHA_TO[MODNN(discr_r + b[i])]; + else + t[i+1] = lambda[i+1]; + } + if (2 * el <= r + no_eras - 1) { + el = r + no_eras - el; + /* + * 2 lines below: B(x) <-- inv(discr_r) * + * lambda(x) + */ + for (i = 0; i <= NROOTS; i++) + b[i] = (lambda[i] == 0) ? A0 : MODNN(INDEX_OF[lambda[i]] - discr_r + NN); + } else { + /* 2 lines below: B(x) <-- x*B(x) */ + memmove(&b[1],b,NROOTS*sizeof(b[0])); + b[0] = A0; + } + memcpy(lambda,t,(NROOTS+1)*sizeof(t[0])); + } + } + + /* Convert lambda to index form and compute deg(lambda(x)) */ + deg_lambda = 0; + for(i=0;i<NROOTS+1;i++){ + lambda[i] = INDEX_OF[lambda[i]]; + if(lambda[i] != A0) + deg_lambda = i; + } + /* Find roots of the error+erasure locator polynomial by Chien search */ + memcpy(®[1],&lambda[1],NROOTS*sizeof(reg[0])); + count = 0; /* Number of roots of lambda(x) */ + for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) { + q = 1; /* lambda[0] is always 0 */ + for (j = deg_lambda; j > 0; j--){ + if (reg[j] != A0) { + reg[j] = MODNN(reg[j] + j); + q ^= ALPHA_TO[reg[j]]; + } + } + if (q != 0) + continue; /* Not a root */ + /* store root (index-form) and error location number */ +#if DEBUG>=2 + printf("count %d root %d loc %d\n",count,i,k); +#endif + root[count] = i; + loc[count] = k; + /* If we've already found max possible roots, + * abort the search to save time + */ + if(++count == deg_lambda) + break; + } + if (deg_lambda != count) { + /* + * deg(lambda) unequal to number of roots => uncorrectable + * error detected + */ + count = -1; + goto finish; + } + /* + * Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo + * x**NROOTS). in index form. Also find deg(omega). + */ + deg_omega = deg_lambda-1; + for (i = 0; i <= deg_omega;i++){ + tmp = 0; + for(j=i;j >= 0; j--){ + if ((s[i - j] != A0) && (lambda[j] != A0)) + tmp ^= ALPHA_TO[MODNN(s[i - j] + lambda[j])]; + } + omega[i] = INDEX_OF[tmp]; + } + + /* + * Compute error values in poly-form. num1 = omega(inv(X(l))), num2 = + * inv(X(l))**(FCR-1) and den = lambda_pr(inv(X(l))) all in poly-form + */ + for (j = count-1; j >=0; j--) { + num1 = 0; + for (i = deg_omega; i >= 0; i--) { + if (omega[i] != A0) + num1 ^= ALPHA_TO[MODNN(omega[i] + i * root[j])]; + } + num2 = ALPHA_TO[MODNN(root[j] * (FCR - 1) + NN)]; + den = 0; + + /* lambda[i+1] for i even is the formal derivative lambda_pr of lambda[i] */ + for (i = min(deg_lambda,NROOTS-1) & ~1; i >= 0; i -=2) { + if(lambda[i+1] != A0) + den ^= ALPHA_TO[MODNN(lambda[i+1] + i * root[j])]; + } +#if DEBUG >= 1 + if (den == 0) { + printf("\n ERROR: denominator = 0\n"); + count = -1; + goto finish; + } +#endif + /* Apply error to data */ + if (num1 != 0 && loc[j] >= PAD) { + data[loc[j]-PAD] ^= ALPHA_TO[MODNN(INDEX_OF[num1] + INDEX_OF[num2] + NN - INDEX_OF[den])]; + } + } + finish: + if(eras_pos != NULL){ + for(i=0;i<count;i++) + eras_pos[i] = loc[i]; + } + return count; +} diff --git a/decode_rs.h b/decode_rs.h new file mode 100644 index 0000000..c165cf3 --- /dev/null +++ b/decode_rs.h @@ -0,0 +1,298 @@ +/* The guts of the Reed-Solomon decoder, meant to be #included + * into a function body with the following typedefs, macros and variables supplied + * according to the code parameters: + + * data_t - a typedef for the data symbol + * data_t data[] - array of NN data and parity symbols to be corrected in place + * retval - an integer lvalue into which the decoder's return code is written + * NROOTS - the number of roots in the RS code generator polynomial, + * which is the same as the number of parity symbols in a block. + Integer variable or literal. + * NN - the total number of symbols in a RS block. Integer variable or literal. + * PAD - the number of pad symbols in a block. Integer variable or literal. + * ALPHA_TO - The address of an array of NN elements to convert Galois field + * elements in index (log) form to polynomial form. Read only. + * INDEX_OF - The address of an array of NN elements to convert Galois field + * elements in polynomial form to index (log) form. Read only. + * MODNN - a function to reduce its argument modulo NN. May be inline or a macro. + * FCR - An integer literal or variable specifying the first consecutive root of the + * Reed-Solomon generator polynomial. Integer variable or literal. + * PRIM - The primitive root of the generator poly. Integer variable or literal. + * DEBUG - If set to 1 or more, do various internal consistency checking. Leave this + * undefined for production code + + * The memset(), memmove(), and memcpy() functions are used. The appropriate header + * file declaring these functions (usually <string.h>) must be included by the calling + * program. + */ + + +#if !defined(NROOTS) +#error "NROOTS not defined" +#endif + +#if !defined(NN) +#error "NN not defined" +#endif + +#if !defined(PAD) +#error "PAD not defined" +#endif + +#if !defined(ALPHA_TO) +#error "ALPHA_TO not defined" +#endif + +#if !defined(INDEX_OF) +#error "INDEX_OF not defined" +#endif + +#if !defined(MODNN) +#error "MODNN not defined" +#endif + +#if !defined(FCR) +#error "FCR not defined" +#endif + +#if !defined(PRIM) +#error "PRIM not defined" +#endif + +#if !defined(NULL) +#define NULL ((void *)0) +#endif + +#undef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#undef A0 +#define A0 (NN) + +{ + int deg_lambda, el, deg_omega; + int i, j, r,k; + data_t u,q,tmp,num1,num2,den,discr_r; + data_t lambda[NROOTS+1], s[NROOTS]; /* Err+Eras Locator poly + * and syndrome poly */ + data_t b[NROOTS+1], t[NROOTS+1], omega[NROOTS+1]; + data_t root[NROOTS], reg[NROOTS+1], loc[NROOTS]; + int syn_error, count; + + /* form the syndromes; i.e., evaluate data(x) at roots of g(x) */ + for(i=0;i<NROOTS;i++) + s[i] = data[0]; + + for(j=1;j<NN-PAD;j++){ + for(i=0;i<NROOTS;i++){ + if(s[i] == 0){ + s[i] = data[j]; + } else { + s[i] = data[j] ^ ALPHA_TO[MODNN(INDEX_OF[s[i]] + (FCR+i)*PRIM)]; + } + } + } + + /* Convert syndromes to index form, checking for nonzero condition */ + syn_error = 0; + for(i=0;i<NROOTS;i++){ + syn_error |= s[i]; + s[i] = INDEX_OF[s[i]]; + } + + if (!syn_error) { + /* if syndrome is zero, data[] is a codeword and there are no + * errors to correct. So return data[] unmodified + */ + count = 0; + goto finish; + } + memset(&lambda[1],0,NROOTS*sizeof(lambda[0])); + lambda[0] = 1; + + if (no_eras > 0) { + /* Init lambda to be the erasure locator polynomial */ + lambda[1] = ALPHA_TO[MODNN(PRIM*(NN-1-eras_pos[0]))]; + for (i = 1; i < no_eras; i++) { + u = MODNN(PRIM*(NN-1-eras_pos[i])); + for (j = i+1; j > 0; j--) { + tmp = INDEX_OF[lambda[j - 1]]; + if(tmp != A0) + lambda[j] ^= ALPHA_TO[MODNN(u + tmp)]; + } + } + +#if DEBUG >= 1 + /* Test code that verifies the erasure locator polynomial just constructed + Needed only for decoder debugging. */ + + /* find roots of the erasure location polynomial */ + for(i=1;i<=no_eras;i++) + reg[i] = INDEX_OF[lambda[i]]; + + count = 0; + for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) { + q = 1; + for (j = 1; j <= no_eras; j++) + if (reg[j] != A0) { + reg[j] = MODNN(reg[j] + j); + q ^= ALPHA_TO[reg[j]]; + } + if (q != 0) + continue; + /* store root and error location number indices */ + root[count] = i; + loc[count] = k; + count++; + } + if (count != no_eras) { + printf("count = %d no_eras = %d\n lambda(x) is WRONG\n",count,no_eras); + count = -1; + goto finish; + } +#if DEBUG >= 2 + printf("\n Erasure positions as determined by roots of Eras Loc Poly:\n"); + for (i = 0; i < count; i++) + printf("%d ", loc[i]); + printf("\n"); +#endif +#endif + } + for(i=0;i<NROOTS+1;i++) + b[i] = INDEX_OF[lambda[i]]; + + /* + * Begin Berlekamp-Massey algorithm to determine error+erasure + * locator polynomial + */ + r = no_eras; + el = no_eras; + while (++r <= NROOTS) { /* r is the step number */ + /* Compute discrepancy at the r-th step in poly-form */ + discr_r = 0; + for (i = 0; i < r; i++){ + if ((lambda[i] != 0) && (s[r-i-1] != A0)) { + discr_r ^= ALPHA_TO[MODNN(INDEX_OF[lambda[i]] + s[r-i-1])]; + } + } + discr_r = INDEX_OF[discr_r]; /* Index form */ + if (discr_r == A0) { + /* 2 lines below: B(x) <-- x*B(x) */ + memmove(&b[1],b,NROOTS*sizeof(b[0])); + b[0] = A0; + } else { + /* 7 lines below: T(x) <-- lambda(x) - discr_r*x*b(x) */ + t[0] = lambda[0]; + for (i = 0 ; i < NROOTS; i++) { + if(b[i] != A0) + t[i+1] = lambda[i+1] ^ ALPHA_TO[MODNN(discr_r + b[i])]; + else + t[i+1] = lambda[i+1]; + } + if (2 * el <= r + no_eras - 1) { + el = r + no_eras - el; + /* + * 2 lines below: B(x) <-- inv(discr_r) * + * lambda(x) + */ + for (i = 0; i <= NROOTS; i++) + b[i] = (lambda[i] == 0) ? A0 : MODNN(INDEX_OF[lambda[i]] - discr_r + NN); + } else { + /* 2 lines below: B(x) <-- x*B(x) */ + memmove(&b[1],b,NROOTS*sizeof(b[0])); + b[0] = A0; + } + memcpy(lambda,t,(NROOTS+1)*sizeof(t[0])); + } + } + + /* Convert lambda to index form and compute deg(lambda(x)) */ + deg_lambda = 0; + for(i=0;i<NROOTS+1;i++){ + lambda[i] = INDEX_OF[lambda[i]]; + if(lambda[i] != A0) + deg_lambda = i; + } + /* Find roots of the error+erasure locator polynomial by Chien search */ + memcpy(®[1],&lambda[1],NROOTS*sizeof(reg[0])); + count = 0; /* Number of roots of lambda(x) */ + for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) { + q = 1; /* lambda[0] is always 0 */ + for (j = deg_lambda; j > 0; j--){ + if (reg[j] != A0) { + reg[j] = MODNN(reg[j] + j); + q ^= ALPHA_TO[reg[j]]; + } + } + if (q != 0) + continue; /* Not a root */ + /* store root (index-form) and error location number */ +#if DEBUG>=2 + printf("count %d root %d loc %d\n",count,i,k); +#endif + root[count] = i; + loc[count] = k; + /* If we've already found max possible roots, + * abort the search to save time + */ + if(++count == deg_lambda) + break; + } + if (deg_lambda != count) { + /* + * deg(lambda) unequal to number of roots => uncorrectable + * error detected + */ + count = -1; + goto finish; + } + /* + * Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo + * x**NROOTS). in index form. Also find deg(omega). + */ + deg_omega = deg_lambda-1; + for (i = 0; i <= deg_omega;i++){ + tmp = 0; + for(j=i;j >= 0; j--){ + if ((s[i - j] != A0) && (lambda[j] != A0)) + tmp ^= ALPHA_TO[MODNN(s[i - j] + lambda[j])]; + } + omega[i] = INDEX_OF[tmp]; + } + + /* + * Compute error values in poly-form. num1 = omega(inv(X(l))), num2 = + * inv(X(l))**(FCR-1) and den = lambda_pr(inv(X(l))) all in poly-form + */ + for (j = count-1; j >=0; j--) { + num1 = 0; + for (i = deg_omega; i >= 0; i--) { + if (omega[i] != A0) + num1 ^= ALPHA_TO[MODNN(omega[i] + i * root[j])]; + } + num2 = ALPHA_TO[MODNN(root[j] * (FCR - 1) + NN)]; + den = 0; + + /* lambda[i+1] for i even is the formal derivative lambda_pr of lambda[i] */ + for (i = MIN(deg_lambda,NROOTS-1) & ~1; i >= 0; i -=2) { + if(lambda[i+1] != A0) + den ^= ALPHA_TO[MODNN(lambda[i+1] + i * root[j])]; + } +#if DEBUG >= 1 + if (den == 0) { + printf("\n ERROR: denominator = 0\n"); + count = -1; + goto finish; + } +#endif + /* Apply error to data */ + if (num1 != 0 && loc[j] >= PAD) { + data[loc[j]-PAD] ^= ALPHA_TO[MODNN(INDEX_OF[num1] + INDEX_OF[num2] + NN - INDEX_OF[den])]; + } + } + finish: + if(eras_pos != NULL){ + for(i=0;i<count;i++) + eras_pos[i] = loc[i]; + } + retval = count; +} diff --git a/decode_rs_8.c b/decode_rs_8.c new file mode 100644 index 0000000..995b0d9 --- /dev/null +++ b/decode_rs_8.c @@ -0,0 +1,24 @@ +/* General purpose Reed-Solomon decoder for 8-bit symbols or less + * Copyright 2003 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +#ifdef DEBUG +#include <stdio.h> +#endif + +#include <string.h> + +#include "fixed.h" + +int decode_rs_8(data_t *data, int *eras_pos, int no_eras, int pad){ + int retval; + + if(pad < 0 || pad > 222){ + return -1; + } + +#include "decode_rs.h" + + return retval; +} diff --git a/decode_rs_ccsds.c b/decode_rs_ccsds.c new file mode 100644 index 0000000..0e246b4 --- /dev/null +++ b/decode_rs_ccsds.c @@ -0,0 +1,26 @@ +/* This function wraps around the fixed 8-bit decoder, performing the + * basis transformations necessary to meet the CCSDS standard + * + * Copyright 2002, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include "ccsds.h" +#include "fec.h" + +int decode_rs_ccsds(data_t *data,int *eras_pos,int no_eras,int pad){ + int i,r; + data_t cdata[NN]; + + /* Convert data from dual basis to conventional */ + for(i=0;i<NN-pad;i++) + cdata[i] = Tal1tab[data[i]]; + + r = decode_rs_8(cdata,eras_pos,no_eras,pad); + + if(r > 0){ + /* Convert from conventional to dual basis */ + for(i=0;i<NN-pad;i++) + data[i] = Taltab[cdata[i]]; + } + return r; +} diff --git a/decode_rs_char.c b/decode_rs_char.c new file mode 100644 index 0000000..7105233 --- /dev/null +++ b/decode_rs_char.c @@ -0,0 +1,22 @@ +/* General purpose Reed-Solomon decoder for 8-bit symbols or less + * Copyright 2003 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +#ifdef DEBUG +#include <stdio.h> +#endif + +#include <string.h> + +#include "char.h" +#include "rs-common.h" + +int decode_rs_char(void *p, data_t *data, int *eras_pos, int no_eras){ + int retval; + struct rs *rs = (struct rs *)p; + +#include "decode_rs.h" + + return retval; +} diff --git a/decode_rs_int.c b/decode_rs_int.c new file mode 100644 index 0000000..1ef1a1f --- /dev/null +++ b/decode_rs_int.c @@ -0,0 +1,22 @@ +/* General purpose Reed-Solomon decoder + * Copyright 2003 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +#ifdef DEBUG +#include <stdio.h> +#endif + +#include <string.h> + +#include "int.h" +#include "rs-common.h" + +int decode_rs_int(void *p, data_t *data, int *eras_pos, int no_eras){ + int retval; + struct rs *rs = (struct rs *)p; + +#include "decode_rs.h" + + return retval; +} diff --git a/dotprod.c b/dotprod.c new file mode 100644 index 0000000..b3be913 --- /dev/null +++ b/dotprod.c @@ -0,0 +1,94 @@ +/* 16-bit signed integer dot product + * Switch to appropriate versions + * Copyright 2004 Phil Karn + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdlib.h> +#include "fec.h" + +void *initdp_port(signed short coeffs[],int len); +long dotprod_port(void *p,signed short *b); +void freedp_port(void *p); + +#ifdef __i386__ +void *initdp_mmx(signed short coeffs[],int len); +void *initdp_sse2(signed short coeffs[],int len); +long dotprod_mmx(void *p,signed short *b); +long dotprod_sse2(void *p,signed short *b); +void freedp_mmx(void *p); +void freedp_sse2(void *p); +#endif + +#ifdef __VEC__ +void *initdp_av(signed short coeffs[],int len); +long dotprod_av(void *p,signed short *b); +void freedp_av(void *p); +#endif + +/* Create and return a descriptor for use with the dot product function */ +void *initdp(signed short coeffs[],int len){ + find_cpu_mode(); + + switch(Cpu_mode){ + case PORT: + default: + return initdp_port(coeffs,len); +#ifdef __i386__ + case MMX: + case SSE: + return initdp_mmx(coeffs,len); + case SSE2: + return initdp_sse2(coeffs,len); +#endif + +#ifdef __VEC__ + case ALTIVEC: + return initdp_av(coeffs,len); +#endif + } +} + + +/* Free a dot product descriptor created earlier */ +void freedp(void *p){ + switch(Cpu_mode){ + case PORT: + default: +#ifdef __i386__ + case MMX: + case SSE: + return freedp_mmx(p); + case SSE2: + return freedp_sse2(p); +#endif +#ifdef __VEC__ + case ALTIVEC: + return freedp_av(p); +#endif + } +} + +/* Compute a dot product given a descriptor and an input array + * The length is taken from the descriptor + */ +long dotprod(void *p,signed short a[]){ + switch(Cpu_mode){ + case PORT: + default: + return dotprod_port(p,a); +#ifdef __i386__ + case MMX: + case SSE: + return dotprod_mmx(p,a); + case SSE2: + return dotprod_sse2(p,a); +#endif + +#ifdef __VEC__ + case ALTIVEC: + return dotprod_av(p,a); +#endif + } +} + + diff --git a/dotprod.h b/dotprod.h new file mode 100644 index 0000000..6b62b70 --- /dev/null +++ b/dotprod.h @@ -0,0 +1,15 @@ +/* Internal definitions for dotproduct function */ + +struct dotprod { + int len; /* Number of coefficients */ + + /* On a MMX or SSE machine, these hold 4 copies of the coefficients, + * preshifted by 0,1,2,3 words to meet all possible input data + * alignments (see Intel ap559 on MMX dot products). + * + * SSE2 is similar, but with 8 words at a time + * + * On a non-MMX machine, only one copy is present + */ + signed short *coeffs[8]; +}; diff --git a/dotprod_av.c b/dotprod_av.c new file mode 100644 index 0000000..1f70471 --- /dev/null +++ b/dotprod_av.c @@ -0,0 +1,93 @@ +/* 16-bit signed integer dot product + * Altivec-assisted version + * Copyright 2004 Phil Karn + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdlib.h> +#include "fec.h" + +struct dotprod { + int len; /* Number of coefficients */ + + /* On an Altivec machine, these hold 8 copies of the coefficients, + * preshifted by 0,1,..7 words to meet all possible input data + */ + signed short *coeffs[8]; +}; + +/* Create and return a descriptor for use with the dot product function */ +void *initdp_av(signed short coeffs[],int len){ + struct dotprod *dp; + int i,j; + + if(len == 0) + return NULL; + + dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); + dp->len = len; + + /* Make 8 copies of coefficients, one for each data alignment, + * each aligned to 16-byte boundary + */ + for(i=0;i<8;i++){ + dp->coeffs[i] = calloc(1+(len+i-1)/8,sizeof(vector signed short)); + for(j=0;j<len;j++) + dp->coeffs[i][j+i] = coeffs[j]; + } + return (void *)dp; +} + + +/* Free a dot product descriptor created earlier */ +void freedp_av(void *p){ + struct dotprod *dp = (struct dotprod *)p; + int i; + + for(i=0;i<8;i++) + if(dp->coeffs[i] != NULL) + free(dp->coeffs[i]); + free(dp); +} + +/* Compute a dot product given a descriptor and an input array + * The length is taken from the descriptor + */ +long dotprod_av(void *p,signed short a[]){ + struct dotprod *dp = (struct dotprod *)p; + int al; + vector signed short *ar,*d; + vector signed int sums0,sums1,sums2,sums3; + union { vector signed int v; signed int w[4];} s; + int nblocks; + + /* round ar down to beginning of 16-byte block containing 0th element of + * input buffer. Then set d to one of 8 sets of shifted coefficients + */ + ar = (vector signed short *)((int)a & ~15); + al = ((int)a & 15)/sizeof(signed short); + d = (vector signed short *)dp->coeffs[al]; + + nblocks = (dp->len+al-1)/8+1; + + /* Sum into four vectors each holding four 32-bit partial sums */ + sums3 = sums2 = sums1 = sums0 = (vector signed int)(0); + while(nblocks >= 4){ + sums0 = vec_msums(ar[nblocks-1],d[nblocks-1],sums0); + sums1 = vec_msums(ar[nblocks-2],d[nblocks-2],sums1); + sums2 = vec_msums(ar[nblocks-3],d[nblocks-3],sums2); + sums3 = vec_msums(ar[nblocks-4],d[nblocks-4],sums3); + nblocks -= 4; + } + sums0 = vec_adds(sums0,sums1); + sums2 = vec_adds(sums2,sums3); + sums0 = vec_adds(sums0,sums2); + while(nblocks-- > 0){ + sums0 = vec_msums(ar[nblocks],d[nblocks],sums0); + } + /* Sum 4 partial sums into final result */ + s.v = vec_sums(sums0,(vector signed int)(0)); + + return s.w[3]; +} + + diff --git a/dotprod_mmx.c b/dotprod_mmx.c new file mode 100644 index 0000000..c516afe --- /dev/null +++ b/dotprod_mmx.c @@ -0,0 +1,81 @@ +/* 16-bit signed integer dot product + * MMX assisted version; also for SSE + * + * Copyright 2004 Phil Karn + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdlib.h> +#include "fec.h" + +struct dotprod { + int len; /* Number of coefficients */ + + /* On a MMX or SSE machine, these hold 4 copies of the coefficients, + * preshifted by 0,1,2,3 words to meet all possible input data + * alignments (see Intel ap559 on MMX dot products). + */ + signed short *coeffs[4]; +}; +long dotprod_mmx_assist(signed short *a,signed short *b,int cnt); + +/* Create and return a descriptor for use with the dot product function */ +void *initdp_mmx(signed short coeffs[],int len){ + struct dotprod *dp; + int i,j; + + + if(len == 0) + return NULL; + + dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); + dp->len = len; + + /* Make 4 copies of coefficients, one for each data alignment */ + for(i=0;i<4;i++){ + dp->coeffs[i] = (signed short *)calloc(1+(len+i-1)/4, + 4*sizeof(signed short)); + for(j=0;j<len;j++) + dp->coeffs[i][j+i] = coeffs[j]; + } + return (void *)dp; +} + + +/* Free a dot product descriptor created earlier */ +void freedp_mmx(void *p){ + struct dotprod *dp = (struct dotprod *)p; + int i; + + for(i=0;i<4;i++) + if(dp->coeffs[i] != NULL) + free(dp->coeffs[i]); + free(dp); +} + +/* Compute a dot product given a descriptor and an input array + * The length is taken from the descriptor + */ +long dotprod_mmx(void *p,signed short a[]){ + struct dotprod *dp = (struct dotprod *)p; + int al; + signed short *ar; + + /* Round input data address down to 8 byte boundary + * NB: depending on the alignment of a[], memory + * before a[] will be accessed. The contents don't matter since they'll + * be multiplied by zero coefficients. I can't conceive of any + * situation where this could cause a segfault since memory protection + * in the x86 machines is done on much larger boundaries + */ + ar = (signed short *)((int)a & ~7); + + /* Choose one of 4 sets of pre-shifted coefficients. al is both the + * index into dp->coeffs[] and the number of 0 words padded onto + * that coefficients array for alignment purposes + */ + al = a - ar; + + /* Call assembler routine to do the work, passing number of 4-word blocks */ + return dotprod_mmx_assist(ar,dp->coeffs[al],(dp->len+al-1)/4+1); +} + diff --git a/dotprod_mmx_assist.s b/dotprod_mmx_assist.s new file mode 100644 index 0000000..25deffd --- /dev/null +++ b/dotprod_mmx_assist.s @@ -0,0 +1,83 @@ +# SIMD MMX dot product +# Equivalent to the following C code: +# long dotprod(signed short *a,signed short *b,int cnt) +# { +# long sum = 0; +# cnt *= 4; +# while(cnt--) +# sum += *a++ + *b++; +# return sum; +# } +# a and b should also be 64-bit aligned, or speed will suffer greatly +# Copyright 1999, Phil Karn KA9Q +# May be used under the terms of the GNU Lesser General Public License (LGPL) + + .text + .global dotprod_mmx_assist + .type dotprod_mmx_assist,@function +dotprod_mmx_assist: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ecx + pushl %ebx + movl 8(%ebp),%esi # a + movl 12(%ebp),%edi # b + movl 16(%ebp),%ecx # cnt + pxor %mm0,%mm0 # clear running sum (in two 32-bit halves) + +# MMX dot product loop unrolled 4 times, crunching 16 terms per loop + .align 16 +.Loop1: subl $4,%ecx + jl .Loop1Done + + movq (%esi),%mm1 # mm1 = a[3],a[2],a[1],a[0] + pmaddwd (%edi),%mm1 # mm1 = b[3]*a[3]+b[2]*a[2],b[1]*a[1]+b[0]*a[0] + paddd %mm1,%mm0 + + movq 8(%esi),%mm1 + pmaddwd 8(%edi),%mm1 + paddd %mm1,%mm0 + + movq 16(%esi),%mm1 + pmaddwd 16(%edi),%mm1 + paddd %mm1,%mm0 + + movq 24(%esi),%mm1 + addl $32,%esi + pmaddwd 24(%edi),%mm1 + addl $32,%edi + paddd %mm1,%mm0 + + jmp .Loop1 +.Loop1Done: + + addl $4,%ecx + +# MMX dot product loop, not unrolled, crunching 4 terms per loop +# This could be redone as Duff's Device on the unrolled loop above +.Loop2: subl $1,%ecx + jl .Loop2Done + + movq (%esi),%mm1 + addl $8,%esi + pmaddwd (%edi),%mm1 + addl $8,%edi + paddd %mm1,%mm0 + jmp .Loop2 +.Loop2Done: + + movd %mm0,%ebx # right-hand word to ebx + punpckhdq %mm0,%mm0 # left-hand word to right side of %mm0 + movd %mm0,%eax + addl %ebx,%eax # running sum now in %eax + emms # done with MMX + + popl %ebx + popl %ecx + popl %edi + popl %esi + movl %ebp,%esp + popl %ebp + ret diff --git a/dotprod_port.c b/dotprod_port.c new file mode 100644 index 0000000..ef635ec --- /dev/null +++ b/dotprod_port.c @@ -0,0 +1,58 @@ +/* 16-bit signed integer dot product + * Portable C version + * Copyright 2004 Phil Karn + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdlib.h> +#include "fec.h" + +struct dotprod { + int len; /* Number of coefficients */ + + signed short *coeffs; +}; + +/* Create and return a descriptor for use with the dot product function */ +void *initdp_port(signed short coeffs[],int len){ + struct dotprod *dp; + int j; + + if(len == 0) + return NULL; + + dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); + dp->len = len; + + /* Just one copy of the coefficients for the C version */ + dp->coeffs = (signed short *)calloc(len,sizeof(signed short)); + for(j=0;j<len;j++) + dp->coeffs[j] = coeffs[j]; + return (void *)dp; +} + + +/* Free a dot product descriptor created earlier */ +void freedp_port(void *p){ + struct dotprod *dp = (struct dotprod *)p; + + if(dp->coeffs != NULL) + free(dp->coeffs); + free(dp); +} + +/* Compute a dot product given a descriptor and an input array + * The length is taken from the descriptor + */ +long dotprod_port(void *p,signed short a[]){ + struct dotprod *dp = (struct dotprod *)p; + long corr; + int i; + + corr = 0; + for(i=0;i<dp->len;i++){ + corr += (long)a[i] * dp->coeffs[i]; + } + return corr; +} + + diff --git a/dotprod_sse2.c b/dotprod_sse2.c new file mode 100644 index 0000000..1fddd18 --- /dev/null +++ b/dotprod_sse2.c @@ -0,0 +1,72 @@ +/* 16-bit signed integer dot product + * SSE2 version + * Copyright 2004 Phil Karn + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#define _XOPEN_SOURCE 600 +#include <stdlib.h> +#include <memory.h> +#include "fec.h" + +struct dotprod { + int len; /* Number of coefficients */ + + /* On a SSE2 machine, these hold 8 copies of the coefficients, + * preshifted by 0,1,..7 words to meet all possible input data + * alignments (see Intel ap559 on MMX dot products). + */ + signed short *coeffs[8]; +}; + +long dotprod_sse2_assist(signed short *a,signed short *b,int cnt); + +/* Create and return a descriptor for use with the dot product function */ +void *initdp_sse2(signed short coeffs[],int len){ + struct dotprod *dp; + int i,j,blksize; + + if(len == 0) + return NULL; + + dp = (struct dotprod *)calloc(1,sizeof(struct dotprod)); + dp->len = len; + + /* Make 8 copies of coefficients, one for each data alignment, + * each aligned to 16-byte boundary + */ + for(i=0;i<8;i++){ + blksize = (1+(len+i-1)/8) * 8*sizeof(signed short); + posix_memalign((void **)&dp->coeffs[i],16,blksize); + memset(dp->coeffs[i],0,blksize); + for(j=0;j<len;j++) + dp->coeffs[i][j+i] = coeffs[j]; + } + return (void *)dp; +} + + +/* Free a dot product descriptor created earlier */ +void freedp_sse2(void *p){ + struct dotprod *dp = (struct dotprod *)p; + int i; + + for(i=0;i<8;i++) + if(dp->coeffs[i] != NULL) + free(dp->coeffs[i]); + free(dp); +} + +/* Compute a dot product given a descriptor and an input array + * The length is taken from the descriptor + */ +long dotprod_sse2(void *p,signed short a[]){ + struct dotprod *dp = (struct dotprod *)p; + int al; + signed short *ar; + + ar = (signed short *)((int)a & ~15); + al = a - ar; + + /* Call assembler routine to do the work, passing number of 8-word blocks */ + return dotprod_sse2_assist(ar,dp->coeffs[al],(dp->len+al-1)/8+1); +} diff --git a/dotprod_sse2_assist.s b/dotprod_sse2_assist.s new file mode 100644 index 0000000..47348fa --- /dev/null +++ b/dotprod_sse2_assist.s @@ -0,0 +1,85 @@ +# SIMD SSE2 dot product +# Equivalent to the following C code: +# long dotprod(signed short *a,signed short *b,int cnt) +# { +# long sum = 0; +# cnt *= 8; +# while(cnt--) +# sum += *a++ + *b++; +# return sum; +# } +# a and b must be 128-bit aligned +# Copyright 2001, Phil Karn KA9Q +# May be used under the terms of the GNU Lesser General Public License (LGPL) + + .text + .global dotprod_sse2_assist + .type dotprod_sse2_assist,@function +dotprod_sse2_assist: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %ecx + pushl %ebx + movl 8(%ebp),%esi # a + movl 12(%ebp),%edi # b + movl 16(%ebp),%ecx # cnt + pxor %xmm0,%xmm0 # clear running sum (in two 32-bit halves) + +# SSE2 dot product loop unrolled 4 times, crunching 32 terms per loop + .align 16 +.Loop1: subl $4,%ecx + jl .Loop1Done + + movdqa (%esi),%xmm1 + pmaddwd (%edi),%xmm1 + paddd %xmm1,%xmm0 + + movdqa 16(%esi),%xmm1 + pmaddwd 16(%edi),%xmm1 + paddd %xmm1,%xmm0 + + movdqa 32(%esi),%xmm1 + pmaddwd 32(%edi),%xmm1 + paddd %xmm1,%xmm0 + + movdqa 48(%esi),%xmm1 + addl $64,%esi + pmaddwd 48(%edi),%xmm1 + addl $64,%edi + paddd %xmm1,%xmm0 + + jmp .Loop1 +.Loop1Done: + + addl $4,%ecx + +# SSE2 dot product loop, not unrolled, crunching 4 terms per loop +# This could be redone as Duff's Device on the unrolled loop above +.Loop2: subl $1,%ecx + jl .Loop2Done + + movdqa (%esi),%xmm1 + addl $16,%esi + pmaddwd (%edi),%xmm1 + addl $16,%edi + paddd %xmm1,%xmm0 + jmp .Loop2 +.Loop2Done: + + movdqa %xmm0,%xmm1 + psrldq $8,%xmm0 + paddd %xmm1,%xmm0 + movd %xmm0,%eax # right-hand word to eax + psrldq $4,%xmm0 + movd %xmm0,%ebx + addl %ebx,%eax + + popl %ebx + popl %ecx + popl %edi + popl %esi + movl %ebp,%esp + popl %ebp + ret @@ -0,0 +1,63 @@ +.TH DSP 3 +.SH NAME +initdp, freedp, dotprod, sumsq, peakval -\ SIMD-assisted +digital signal processing primitives +.SH SYNOPSIS +.nf +.ft +#include "fec.h" + +void *initdp(signed short *coeffs,int len); +long dotprod(void *p,signed short *a); +void freedp(void *p); + +unsigned long long sumsq(signed short *in,int cnt); + +int peakval(signed short *b,int cnt); + +.SH DESCRIPTION +These functions provide several basic primitives useful in digital +signal processing (DSP), especially in modems. The \fBinitdp\fR, +\fBdotprod\fR and \fBfreedp\fR functions implement an integer dot +product useful in correlation and filtering operations on signed +16-bit integers. \fBsumsq\fR computes the sum +of the squares of an array of signed 16-bit integers, +useful for measuring the energy of a signal. \fBpeakval\fR returns the +absolute value of the largest magitude element in the input array, +useful for scaling a signal's amplitude. + +Each function uses IA32 or PowerPC Altivec instructions when +available; otherwise, a portable C version is used. + +.SH USAGE +To create a FIR filter or correlator, call \fBinitdp\fR with the +coefficients in \fBcoeff\fR and their number in \fBlen\fR. This +creates the appropriate data structures and returns a handle. + +To compute a dot product, pass the handle from \fBinitdp\fR and the +input array to \fBdotprod\fR. No length field is needed as the number +of samples will be taken from the \fBlen\fR parameter originally given +to \fBinitdp\fR. There must be at least as many samples in the input +array as there were coefficients passed to \fBinitdp\fR. + +When the filter or correlator is no longer needed, the data structures +may be freed by passing the handle to \fBfreedp\fR. + +The user is responsible for scaling the inputs to \fBinitdp\fR and +\fBdotprod\fR, as the 32-bit result from \fBdotprod\fR will silently +wrap around in the event of overflow. + +To compute the sum of the squares of an array of signed 16-bit +integers, use sumsq\fR. This returns a 64 bit sum. + +\fBpeakval\fR computes the absolute value of each 16-bit element in +the input array and returns the largest. + +.SH RETURN VALUES + +\fBinitdp\fR returns a handle that points to a control block, or NULL in +the event of an error (such as a memory allocation failure). \fBsumsq\fR +and \fBpeakval\fR have no error returns. + +.SH AUTHOR and COPYRIGHT +Phil Karn, KA9Q (karn@ka9q.net) @@ -0,0 +1,99 @@ +/* Test dot-product function */ + +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <math.h> +#include "config.h" +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif +#include "fec.h" + +#if HAVE_GETOPT_LONG +struct option Options[] = { + {"force-altivec",0,NULL,'a'}, + {"force-port",0,NULL,'p'}, + {"force-mmx",0,NULL,'m'}, + {"force-sse",0,NULL,'s'}, + {"force-sse2",0,NULL,'t'}, + {"trials",0,NULL,'n'}, + {NULL}, +}; +#endif + +int main(int argc,char *argv[]){ + short coeffs[512]; + short input[2048]; + int trials=1000,d; + int errors = 0; + +#if HAVE_GETOPT_LONG + while((d = getopt_long(argc,argv,"apmstn:",Options,NULL)) != EOF){ +#else + while((d = getopt(argc,argv,"apmstn:")) != EOF){ +#endif + switch(d){ + case 'a': + Cpu_mode = ALTIVEC; + break; + case 'p': + Cpu_mode = PORT; + break; + case 'm': + Cpu_mode = MMX; + break; + case 's': + Cpu_mode = SSE; + break; + case 't': + Cpu_mode = SSE2; + break; + case 'n': + trials = atoi(optarg); + break; + } + } + + while(trials--){ + long port_result; + long simd_result; + int ntaps; + int i; + int csum = 0; + int offset; + void *dp_simd,*dp_port; + + /* Generate set of coefficients + * limit sum of absolute values to 32767 to avoid overflow + */ + memset(coeffs,0,sizeof(coeffs)); + for(i=0;i<512;i++){ + double gv; + + gv = normal_rand(0.,100.); + if(csum + fabs(gv) > 32767) + break; + coeffs[i] = gv; + csum += fabs(gv); + } + ntaps = i; + + /* Compare results to portable C version for a bunch of random data buffers and offsets */ + dp_simd = initdp(coeffs,ntaps); + dp_port = initdp_port(coeffs,ntaps); + + for(i=0;i<2048;i++) + input[i] = random(); + + offset = random() & 511; + + simd_result = dotprod(dp_simd,input+offset); + port_result = dotprod_port(dp_port,input+offset); + if(simd_result != port_result){ + errors++; + } + } + printf("dtest: %d errors\n",errors); + exit(0); +} diff --git a/encode_rs.c b/encode_rs.c new file mode 100644 index 0000000..0649094 --- /dev/null +++ b/encode_rs.c @@ -0,0 +1,52 @@ +/* Reed-Solomon encoder + * Copyright 2002, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <string.h> + +#ifdef FIXED +#include "fixed.h" +#elif defined(BIGSYM) +#include "int.h" +#else +#include "char.h" +#endif + +void ENCODE_RS( +#ifdef FIXED +data_t *data, data_t *bb,int pad){ +#else +void *p,data_t *data, data_t *bb){ + struct rs *rs = (struct rs *)p; +#endif + int i, j; + data_t feedback; + +#ifdef FIXED + /* Check pad parameter for validity */ + if(pad < 0 || pad >= NN) + return; +#endif + + memset(bb,0,NROOTS*sizeof(data_t)); + + for(i=0;i<NN-NROOTS-PAD;i++){ + feedback = INDEX_OF[data[i] ^ bb[0]]; + if(feedback != A0){ /* feedback term is non-zero */ +#ifdef UNNORMALIZED + /* This line is unnecessary when GENPOLY[NROOTS] is unity, as it must + * always be for the polynomials constructed by init_rs() + */ + feedback = MODNN(NN - GENPOLY[NROOTS] + feedback); +#endif + for(j=1;j<NROOTS;j++) + bb[j] ^= ALPHA_TO[MODNN(feedback + GENPOLY[NROOTS-j])]; + } + /* Shift */ + memmove(&bb[0],&bb[1],sizeof(data_t)*(NROOTS-1)); + if(feedback != A0) + bb[NROOTS-1] = ALPHA_TO[MODNN(feedback + GENPOLY[0])]; + else + bb[NROOTS-1] = 0; + } +} diff --git a/encode_rs.h b/encode_rs.h new file mode 100644 index 0000000..2c157f9 --- /dev/null +++ b/encode_rs.h @@ -0,0 +1,58 @@ +/* The guts of the Reed-Solomon encoder, meant to be #included + * into a function body with the following typedefs, macros and variables supplied + * according to the code parameters: + + * data_t - a typedef for the data symbol + * data_t data[] - array of NN-NROOTS-PAD and type data_t to be encoded + * data_t parity[] - an array of NROOTS and type data_t to be written with parity symbols + * NROOTS - the number of roots in the RS code generator polynomial, + * which is the same as the number of parity symbols in a block. + Integer variable or literal. + * + * NN - the total number of symbols in a RS block. Integer variable or literal. + * PAD - the number of pad symbols in a block. Integer variable or literal. + * ALPHA_TO - The address of an array of NN elements to convert Galois field + * elements in index (log) form to polynomial form. Read only. + * INDEX_OF - The address of an array of NN elements to convert Galois field + * elements in polynomial form to index (log) form. Read only. + * MODNN - a function to reduce its argument modulo NN. May be inline or a macro. + * GENPOLY - an array of NROOTS+1 elements containing the generator polynomial in index form + + * The memset() and memmove() functions are used. The appropriate header + * file declaring these functions (usually <string.h>) must be included by the calling + * program. + + * Copyright 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + + +#undef A0 +#define A0 (NN) /* Special reserved value encoding zero in index form */ + +{ + int i, j; + data_t feedback; + + memset(parity,0,NROOTS*sizeof(data_t)); + + for(i=0;i<NN-NROOTS-PAD;i++){ + feedback = INDEX_OF[data[i] ^ parity[0]]; + if(feedback != A0){ /* feedback term is non-zero */ +#ifdef UNNORMALIZED + /* This line is unnecessary when GENPOLY[NROOTS] is unity, as it must + * always be for the polynomials constructed by init_rs() + */ + feedback = MODNN(NN - GENPOLY[NROOTS] + feedback); +#endif + for(j=1;j<NROOTS;j++) + parity[j] ^= ALPHA_TO[MODNN(feedback + GENPOLY[NROOTS-j])]; + } + /* Shift */ + memmove(&parity[0],&parity[1],sizeof(data_t)*(NROOTS-1)); + if(feedback != A0) + parity[NROOTS-1] = ALPHA_TO[MODNN(feedback + GENPOLY[0])]; + else + parity[NROOTS-1] = 0; + } +} diff --git a/encode_rs_8.c b/encode_rs_8.c new file mode 100644 index 0000000..5aaecca --- /dev/null +++ b/encode_rs_8.c @@ -0,0 +1,109 @@ +/* Reed-Solomon encoder + * Copyright 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <string.h> +#include "fixed.h" +#ifdef __VEC__ +#include <sys/sysctl.h> +#endif + + +static enum {UNKNOWN=0,MMX,SSE,SSE2,ALTIVEC,PORT} cpu_mode; + +static void encode_rs_8_c(data_t *data, data_t *parity,int pad); +#if __vec__ +static void encode_rs_8_av(data_t *data, data_t *parity,int pad); +#endif +#if __i386__ +int cpu_features(void); +#endif + +void encode_rs_8(data_t *data, data_t *parity,int pad){ + if(cpu_mode == UNKNOWN){ +#ifdef __i386__ + int f; + /* Figure out what kind of CPU we have */ + f = cpu_features(); + if(f & (1<<26)){ /* SSE2 is present */ + cpu_mode = SSE2; + } else if(f & (1<<25)){ /* SSE is present */ + cpu_mode = SSE; + } else if(f & (1<<23)){ /* MMX is present */ + cpu_mode = MMX; + } else { /* No SIMD at all */ + cpu_mode = PORT; + } +#elif __VEC__ + /* Ask the OS if we have Altivec support */ + int selectors[2] = { CTL_HW, HW_VECTORUNIT }; + int hasVectorUnit = 0; + size_t length = sizeof(hasVectorUnit); + int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); + if(0 == error && hasVectorUnit) + cpu_mode = ALTIVEC; + else + cpu_mode = PORT; +#else + cpu_mode = PORT; +#endif + } + switch(cpu_mode){ +#if __vec__ + case ALTIVEC: + encode_rs_8_av(data,parity,pad); + return; +#endif +#if __i386__ + case MMX: + case SSE: + case SSE2: +#endif + default: + encode_rs_8_c(data,parity,pad); + return; + } +} + +#if __vec__ /* PowerPC G4/G5 Altivec instructions are available */ + +static vector unsigned char reverse = (vector unsigned char)(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1); +static vector unsigned char shift_right = (vector unsigned char)(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30); + +/* Lookup table for feedback multiplications + * These are the low half of the coefficients. Since the generator polynomial is + * palindromic, we form the other half by reversing this one + */ +extern static union { vector unsigned char v; unsigned char c[16]; } table[256]; + +static void encode_rs_8_av(data_t *data, data_t *parity,int pad){ + union { vector unsigned char v[2]; unsigned char c[32]; } shift_register; + int i; + + shift_register.v[0] = (vector unsigned char)(0); + shift_register.v[1] = (vector unsigned char)(0); + + for(i=0;i<NN-NROOTS-pad;i++){ + vector unsigned char feedback0,feedback1; + unsigned char f; + + f = data[i] ^ shift_register.c[31]; + feedback1 = table[f].v; + feedback0 = vec_perm(feedback1,feedback1,reverse); + + /* Shift right one byte */ + shift_register.v[1] = vec_perm(shift_register.v[0],shift_register.v[1],shift_right) ^ feedback1; + shift_register.v[0] = vec_sro(shift_register.v[0],(vector unsigned char)(8)) ^ feedback0; + shift_register.c[0] = f; + } + for(i=0;i<NROOTS;i++) + parity[NROOTS-i-1] = shift_register.c[i]; +} +#endif + +/* Portable C version */ +static void encode_rs_8_c(data_t *data, data_t *parity,int pad){ + +#include "encode_rs.h" + +} diff --git a/encode_rs_av.c b/encode_rs_av.c new file mode 100644 index 0000000..32e528f --- /dev/null +++ b/encode_rs_av.c @@ -0,0 +1,61 @@ +/* Fast Reed-Solomon encoder for (255,223) CCSDS code on PowerPC G4/G5 using Altivec instructions + * Copyright 2004, Phil Karn KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <string.h> +#include "fixed.h" + +/* Lookup table for feedback multiplications + * These are the low half of the coefficients. Since the generator polynomial is + * palindromic, we form it by reversing these on the fly + */ +static union { vector unsigned char v; unsigned char c[16]; } table[256]; + +static vector unsigned char reverse = (vector unsigned char)(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1); +static vector unsigned char shift_right = (vector unsigned char)(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30); + +extern data_t CCSDS_alpha_to[]; +extern data_t CCSDS_index_of[]; +extern data_t CCSDS_poly[]; + +void rs_init_av(){ + int i,j; + + /* The PowerPC is big-endian, so the low-order byte of each vector contains the highest order term in the polynomial */ + for(j=0;j<16;j++){ + table[0].c[j] = 0; + for(i=1;i<256;i++){ + table[i].c[16-j-1] = CCSDS_alpha_to[MODNN(CCSDS_poly[j+1] + CCSDS_index_of[i])]; + } + } +#if 0 + for(i=0;i<256;i++){ + printf("table[%3d] = %3vu\n",i,table[i].v); + } +#endif +} + +void encode_rs_av(unsigned char *data,unsigned char *parity,int pad){ + union { vector unsigned char v[2]; unsigned char c[32]; } shift_register; + int i; + + shift_register.v[0] = (vector unsigned char)(0); + shift_register.v[1] = (vector unsigned char)(0); + + for(i=0;i<NN-NROOTS-pad;i++){ + vector unsigned char feedback0,feedback1; + unsigned char f; + + f = data[i] ^ shift_register.c[31]; + feedback1 = table[f].v; + feedback0 = vec_perm(feedback1,feedback1,reverse); + + /* Shift right one byte */ + shift_register.v[1] = vec_perm(shift_register.v[0],shift_register.v[1],shift_right) ^ feedback1; + shift_register.v[0] = vec_sro(shift_register.v[0],(vector unsigned char)(8)) ^ feedback0; + shift_register.c[0] = f; + } + for(i=0;i<NROOTS;i++) + parity[NROOTS-i-1] = shift_register.c[i]; +} diff --git a/encode_rs_ccsds.c b/encode_rs_ccsds.c new file mode 100644 index 0000000..5a2ec70 --- /dev/null +++ b/encode_rs_ccsds.c @@ -0,0 +1,24 @@ +/* This function wraps around the fixed 8-bit encoder, performing the + * basis transformations necessary to meet the CCSDS standard + * + * Copyright 2002, Phil Karn, KA9Q + * fixed bug Aug 2007 + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include "ccsds.h" +#include "fec.h" + +void encode_rs_ccsds(data_t *data,data_t *parity,int pad){ + int i; + data_t cdata[NN-NROOTS]; + + /* Convert data from dual basis to conventional */ + for(i=0;i<NN-NROOTS-pad;i++) + cdata[i] = Tal1tab[data[i]]; + + encode_rs_8(cdata,parity,pad); + + /* Convert parity from conventional to dual basis */ + for(i=0;i<NROOTS;i++) + parity[i] = Taltab[parity[i]]; +} diff --git a/encode_rs_char.c b/encode_rs_char.c new file mode 100644 index 0000000..a9bf2b8 --- /dev/null +++ b/encode_rs_char.c @@ -0,0 +1,15 @@ +/* Reed-Solomon encoder + * Copyright 2002, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <string.h> + +#include "char.h" +#include "rs-common.h" + +void encode_rs_char(void *p,data_t *data, data_t *parity){ + struct rs *rs = (struct rs *)p; + +#include "encode_rs.h" + +} diff --git a/encode_rs_int.c b/encode_rs_int.c new file mode 100644 index 0000000..3c9ce78 --- /dev/null +++ b/encode_rs_int.c @@ -0,0 +1,15 @@ +/* Reed-Solomon encoder + * Copyright 2003, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <string.h> + +#include "int.h" +#include "rs-common.h" + +void encode_rs_int(void *p,data_t *data, data_t *parity){ + struct rs *rs = (struct rs *)p; + +#include "encode_rs.h" + +} diff --git a/exercise.c b/exercise.c new file mode 100644 index 0000000..8ae008c --- /dev/null +++ b/exercise.c @@ -0,0 +1,122 @@ +/* Exercise an RS codec a specified number of times using random + * data and error patterns + * + * Copyright 2002 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#define FLAG_ERASURE 1 /* Randomly flag 50% of errors as erasures */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifdef FIXED +#include "fixed.h" +#define EXERCISE exercise_8 +#elif defined(CCSDS) +#include "fixed.h" +#include "ccsds.h" +#define EXERCISE exercise_ccsds +#elif defined(BIGSYM) +#include "int.h" +#define EXERCISE exercise_int +#else +#include "char.h" +#define EXERCISE exercise_char +#endif + +#ifdef FIXED +#define PRINTPARM printf("(255,223):"); +#elif defined(CCSDS) +#define PRINTPARM printf("CCSDS (255,223):"); +#else +#define PRINTPARM printf("(%d,%d):",rs->nn,rs->nn-rs->nroots); +#endif + +/* Exercise the RS codec passed as an argument */ +int EXERCISE( +#if !defined(CCSDS) && !defined(FIXED) +void *p, +#endif +int trials){ +#if !defined(CCSDS) && !defined(FIXED) + struct rs *rs = (struct rs *)p; +#endif + data_t block[NN],tblock[NN]; + int i; + int errors; + int errlocs[NN]; + int derrlocs[NROOTS]; + int derrors; + int errval,errloc; + int erasures; + int decoder_errors = 0; + + while(trials-- != 0){ + /* Test up to the error correction capacity of the code */ + for(errors=0;errors <= NROOTS/2;errors++){ + + /* Load block with random data and encode */ + for(i=0;i<NN-NROOTS;i++) + block[i] = random() & NN; + +#if defined(CCSDS) || defined(FIXED) + ENCODE_RS(&block[0],&block[NN-NROOTS],0); +#else + ENCODE_RS(rs,&block[0],&block[NN-NROOTS]); +#endif + + /* Make temp copy, seed with errors */ + memcpy(tblock,block,sizeof(tblock)); + memset(errlocs,0,sizeof(errlocs)); + memset(derrlocs,0,sizeof(derrlocs)); + erasures=0; + for(i=0;i<errors;i++){ + do { + errval = random() & NN; + } while(errval == 0); /* Error value must be nonzero */ + + do { + errloc = random() % NN; + } while(errlocs[errloc] != 0); /* Must not choose the same location twice */ + + errlocs[errloc] = 1; + +#if FLAG_ERASURE + if(random() & 1) /* 50-50 chance */ + derrlocs[erasures++] = errloc; +#endif + tblock[errloc] ^= errval; + } + + /* Decode the errored block */ +#if defined(CCSDS) || defined(FIXED) + derrors = DECODE_RS(tblock,derrlocs,erasures,0); +#else + derrors = DECODE_RS(rs,tblock,derrlocs,erasures); +#endif + + if(derrors != errors){ + PRINTPARM + printf(" decoder says %d errors, true number is %d\n",derrors,errors); + decoder_errors++; + } + for(i=0;i<derrors;i++){ + if(errlocs[derrlocs[i]] == 0){ + PRINTPARM + printf(" decoder indicates error in location %d without error\n",derrlocs[i]); + decoder_errors++; + } + } + if(memcmp(tblock,block,sizeof(tblock)) != 0){ + PRINTPARM + printf(" uncorrected errors! output ^ input:"); + decoder_errors++; + for(i=0;i<NN;i++) + printf(" %02x",tblock[i] ^ block[i]); + printf("\n"); + } + } + } + return decoder_errors; +} @@ -0,0 +1,66 @@ +/* Utility routines for FEC support + * Copyright 2004, Phil Karn, KA9Q + */ + +#include <stdio.h> +#include "fec.h" + +unsigned char Partab[256]; +int P_init; + +/* Create 256-entry odd-parity lookup table + * Needed only on non-ia32 machines + */ +void partab_init(void){ + int i,cnt,ti; + + /* Initialize parity lookup table */ + for(i=0;i<256;i++){ + cnt = 0; + ti = i; + while(ti){ + if(ti & 1) + cnt++; + ti >>= 1; + } + Partab[i] = cnt & 1; + } + P_init=1; +} + +/* Lookup table giving count of 1 bits for integers 0-255 */ +int Bitcnt[] = { + 0, 1, 1, 2, 1, 2, 2, 3, + 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, + 5, 6, 6, 7, 6, 7, 7, 8, +}; + @@ -0,0 +1,347 @@ +/* User include file for libfec + * Copyright 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +#ifndef _FEC_H_ +#define _FEC_H_ + +/* r=1/2 k=7 convolutional encoder polynomials + * The NASA-DSN convention is to use V27POLYA inverted, then V27POLYB + * The CCSDS/NASA-GSFC convention is to use V27POLYB, then V27POLYA inverted + */ +#define V27POLYA 0x6d +#define V27POLYB 0x4f + +void *create_viterbi27(int len); +void set_viterbi27_polynomial(int polys[2]); +int init_viterbi27(void *vp,int starting_state); +int update_viterbi27_blk(void *vp,unsigned char sym[],int npairs); +int chainback_viterbi27(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi27(void *vp); + +#ifdef __VEC__ +void *create_viterbi27_av(int len); +void set_viterbi27_polynomial_av(int polys[2]); +int init_viterbi27_av(void *p,int starting_state); +int chainback_viterbi27_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi27_av(void *p); +int update_viterbi27_blk_av(void *p,unsigned char *syms,int nbits); +#endif + +#ifdef __i386__ +void *create_viterbi27_mmx(int len); +void set_viterbi27_polynomial_mmx(int polys[2]); +int init_viterbi27_mmx(void *p,int starting_state); +int chainback_viterbi27_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi27_mmx(void *p); +int update_viterbi27_blk_mmx(void *p,unsigned char *syms,int nbits); + +void *create_viterbi27_sse(int len); +void set_viterbi27_polynomial_sse(int polys[2]); +int init_viterbi27_sse(void *p,int starting_state); +int chainback_viterbi27_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi27_sse(void *p); +int update_viterbi27_blk_sse(void *p,unsigned char *syms,int nbits); + +void *create_viterbi27_sse2(int len); +void set_viterbi27_polynomial_sse2(int polys[2]); +int init_viterbi27_sse2(void *p,int starting_state); +int chainback_viterbi27_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi27_sse2(void *p); +int update_viterbi27_blk_sse2(void *p,unsigned char *syms,int nbits); +#endif + +void *create_viterbi27_port(int len); +void set_viterbi27_polynomial_port(int polys[2]); +int init_viterbi27_port(void *p,int starting_state); +int chainback_viterbi27_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi27_port(void *p); +int update_viterbi27_blk_port(void *p,unsigned char *syms,int nbits); + +/* r=1/2 k=9 convolutional encoder polynomials */ +#define V29POLYA 0x1af +#define V29POLYB 0x11d + +void *create_viterbi29(int len); +void set_viterbi29_polynomial(int polys[2]); +int init_viterbi29(void *vp,int starting_state); +int update_viterbi29_blk(void *vp,unsigned char syms[],int nbits); +int chainback_viterbi29(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi29(void *vp); + +#ifdef __VEC__ +void *create_viterbi29_av(int len); +void set_viterbi29_polynomial_av(int polys[2]); +int init_viterbi29_av(void *p,int starting_state); +int chainback_viterbi29_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi29_av(void *p); +int update_viterbi29_blk_av(void *p,unsigned char *syms,int nbits); +#endif + +#ifdef __i386__ +void *create_viterbi29_mmx(int len); +void set_viterbi29_polynomial_mmx(int polys[2]); +int init_viterbi29_mmx(void *p,int starting_state); +int chainback_viterbi29_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi29_mmx(void *p); +int update_viterbi29_blk_mmx(void *p,unsigned char *syms,int nbits); + +void *create_viterbi29_sse(int len); +void set_viterbi29_polynomial_sse(int polys[2]); +int init_viterbi29_sse(void *p,int starting_state); +int chainback_viterbi29_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi29_sse(void *p); +int update_viterbi29_blk_sse(void *p,unsigned char *syms,int nbits); + +void *create_viterbi29_sse2(int len); +void set_viterbi29_polynomial_sse2(int polys[2]); +int init_viterbi29_sse2(void *p,int starting_state); +int chainback_viterbi29_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi29_sse2(void *p); +int update_viterbi29_blk_sse2(void *p,unsigned char *syms,int nbits); +#endif + +void *create_viterbi29_port(int len); +void set_viterbi29_polynomial_port(int polys[2]); +int init_viterbi29_port(void *p,int starting_state); +int chainback_viterbi29_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi29_port(void *p); +int update_viterbi29_blk_port(void *p,unsigned char *syms,int nbits); + +/* r=1/3 k=9 convolutional encoder polynomials */ +#define V39POLYA 0x1ed +#define V39POLYB 0x19b +#define V39POLYC 0x127 + +void *create_viterbi39(int len); +void set_viterbi39_polynomial(int polys[3]); +int init_viterbi39(void *vp,int starting_state); +int update_viterbi39_blk(void *vp,unsigned char syms[],int nbits); +int chainback_viterbi39(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi39(void *vp); + +#ifdef __VEC__ +void *create_viterbi39_av(int len); +void set_viterbi39_polynomial_av(int polys[3]); +int init_viterbi39_av(void *p,int starting_state); +int chainback_viterbi39_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi39_av(void *p); +int update_viterbi39_blk_av(void *p,unsigned char *syms,int nbits); +#endif + +#ifdef __i386__ +void *create_viterbi39_mmx(int len); +void set_viterbi39_polynomial_mmx(int polys[3]); +int init_viterbi39_mmx(void *p,int starting_state); +int chainback_viterbi39_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi39_mmx(void *p); +int update_viterbi39_blk_mmx(void *p,unsigned char *syms,int nbits); + +void *create_viterbi39_sse(int len); +void set_viterbi39_polynomial_sse(int polys[3]); +int init_viterbi39_sse(void *p,int starting_state); +int chainback_viterbi39_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi39_sse(void *p); +int update_viterbi39_blk_sse(void *p,unsigned char *syms,int nbits); + +void *create_viterbi39_sse2(int len); +void set_viterbi39_polynomial_sse2(int polys[3]); +int init_viterbi39_sse2(void *p,int starting_state); +int chainback_viterbi39_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi39_sse2(void *p); +int update_viterbi39_blk_sse2(void *p,unsigned char *syms,int nbits); +#endif + +void *create_viterbi39_port(int len); +void set_viterbi39_polynomial_port(int polys[3]); +int init_viterbi39_port(void *p,int starting_state); +int chainback_viterbi39_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi39_port(void *p); +int update_viterbi39_blk_port(void *p,unsigned char *syms,int nbits); + + +/* r=1/6 k=15 Cassini convolutional encoder polynomials without symbol inversion + * dfree = 56 + * These bits may be left-right flipped from some textbook representations; + * here I have the bits entering the shift register from the right (low) end + * + * Some other spacecraft use the same code, but with the polynomials in a different order. + * E.g., Mars Pathfinder and STEREO swap POLYC and POLYD. All use alternate symbol inversion, + * so use set_viterbi615_polynomial() as appropriate. + */ +#define V615POLYA 042631 +#define V615POLYB 047245 +#define V615POLYC 056507 +#define V615POLYD 073363 +#define V615POLYE 077267 +#define V615POLYF 064537 + +void *create_viterbi615(int len); +void set_viterbi615_polynomial(int polys[6]); +int init_viterbi615(void *vp,int starting_state); +int update_viterbi615_blk(void *vp,unsigned char *syms,int nbits); +int chainback_viterbi615(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi615(void *vp); + +#ifdef __VEC__ +void *create_viterbi615_av(int len); +void set_viterbi615_polynomial_av(int polys[6]); +int init_viterbi615_av(void *p,int starting_state); +int chainback_viterbi615_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi615_av(void *p); +int update_viterbi615_blk_av(void *p,unsigned char *syms,int nbits); +#endif + +#ifdef __i386__ +void *create_viterbi615_mmx(int len); +void set_viterbi615_polynomial_mmx(int polys[6]); +int init_viterbi615_mmx(void *p,int starting_state); +int chainback_viterbi615_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi615_mmx(void *p); +int update_viterbi615_blk_mmx(void *p,unsigned char *syms,int nbits); + +void *create_viterbi615_sse(int len); +void set_viterbi615_polynomial_sse(int polys[6]); +int init_viterbi615_sse(void *p,int starting_state); +int chainback_viterbi615_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi615_sse(void *p); +int update_viterbi615_blk_sse(void *p,unsigned char *syms,int nbits); + +void *create_viterbi615_sse2(int len); +void set_viterbi615_polynomial_sse2(int polys[6]); +int init_viterbi615_sse2(void *p,int starting_state); +int chainback_viterbi615_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi615_sse2(void *p); +int update_viterbi615_blk_sse2(void *p,unsigned char *syms,int nbits); + +#endif + +void *create_viterbi615_port(int len); +void set_viterbi615_polynomial_port(int polys[6]); +int init_viterbi615_port(void *p,int starting_state); +int chainback_viterbi615_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi615_port(void *p); +int update_viterbi615_blk_port(void *p,unsigned char *syms,int nbits); + + +/* General purpose RS codec, 8-bit symbols */ +void encode_rs_char(void *rs,unsigned char *data,unsigned char *parity); +int decode_rs_char(void *rs,unsigned char *data,int *eras_pos, + int no_eras); +void *init_rs_char(int symsize,int gfpoly, + int fcr,int prim,int nroots, + int pad); +void free_rs_char(void *rs); + +/* General purpose RS codec, integer symbols */ +void encode_rs_int(void *rs,int *data,int *parity); +int decode_rs_int(void *rs,int *data,int *eras_pos,int no_eras); +void *init_rs_int(int symsize,int gfpoly,int fcr, + int prim,int nroots,int pad); +void free_rs_int(void *rs); + +/* CCSDS standard (255,223) RS codec with conventional (*not* dual-basis) + * symbol representation + */ +void encode_rs_8(unsigned char *data,unsigned char *parity,int pad); +int decode_rs_8(unsigned char *data,int *eras_pos,int no_eras,int pad); + +/* CCSDS standard (255,223) RS codec with dual-basis symbol representation */ +void encode_rs_ccsds(unsigned char *data,unsigned char *parity,int pad); +int decode_rs_ccsds(unsigned char *data,int *eras_pos,int no_eras,int pad); + +/* Tables to map from conventional->dual (Taltab) and + * dual->conventional (Tal1tab) bases + */ +extern unsigned char Taltab[],Tal1tab[]; + + +/* CPU SIMD instruction set available */ +extern enum cpu_mode {UNKNOWN=0,PORT,MMX,SSE,SSE2,ALTIVEC} Cpu_mode; +void find_cpu_mode(void); /* Call this once at startup to set Cpu_mode */ + +/* Determine parity of argument: 1 = odd, 0 = even */ +#ifdef __i386__ +static inline int parityb(unsigned char x){ + __asm__ __volatile__ ("test %1,%1;setpo %0" : "=g"(x) : "r" (x)); + return x; +} +#else +void partab_init(); + +static inline int parityb(unsigned char x){ + extern unsigned char Partab[256]; + extern int P_init; + if(!P_init){ + partab_init(); + } + return Partab[x]; +} +#endif + + +static inline int parity(int x){ + /* Fold down to one byte */ + x ^= (x >> 16); + x ^= (x >> 8); + return parityb(x); +} + +/* Useful utilities for simulation */ +double normal_rand(double mean, double std_dev); +unsigned char addnoise(int sym,double amp,double gain,double offset,int clip); + +extern int Bitcnt[]; + +/* Dot product functions */ +void *initdp(signed short coeffs[],int len); +void freedp(void *dp); +long dotprod(void *dp,signed short a[]); + +void *initdp_port(signed short coeffs[],int len); +void freedp_port(void *dp); +long dotprod_port(void *dp,signed short a[]); + +#ifdef __i386__ +void *initdp_mmx(signed short coeffs[],int len); +void freedp_mmx(void *dp); +long dotprod_mmx(void *dp,signed short a[]); + +void *initdp_sse(signed short coeffs[],int len); +void freedp_sse(void *dp); +long dotprod_sse(void *dp,signed short a[]); + +void *initdp_sse2(signed short coeffs[],int len); +void freedp_sse2(void *dp); +long dotprod_sse2(void *dp,signed short a[]); +#endif + +#ifdef __VEC__ +void *initdp_av(signed short coeffs[],int len); +void freedp_av(void *dp); +long dotprod_av(void *dp,signed short a[]); +#endif + +/* Sum of squares - accepts signed shorts, produces unsigned long long */ +unsigned long long sumsq(signed short *in,int cnt); +unsigned long long sumsq_port(signed short *in,int cnt); + +#ifdef __i386__ +unsigned long long sumsq_mmx(signed short *in,int cnt); +unsigned long long sumsq_sse(signed short *in,int cnt); +unsigned long long sumsq_sse2(signed short *in,int cnt); +#endif +#ifdef __VEC__ +unsigned long long sumsq_av(signed short *in,int cnt); +#endif + + +/* Low-level data structures and routines */ + +int cpu_features(void); + +#endif /* _FEC_H_ */ + + + @@ -0,0 +1,33 @@ +/* Stuff specific to the CCSDS (255,223) RS codec + * (255,223) code over GF(256). Note: the conventional basis is still + * used; the dual-basis mappings are performed in [en|de]code_rs_ccsds.c + * + * Copyright 2003 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +typedef unsigned char data_t; + +static inline int mod255(int x){ + while (x >= 255) { + x -= 255; + x = (x >> 8) + (x & 255); + } + return x; +} +#define MODNN(x) mod255(x) + +extern data_t CCSDS_alpha_to[]; +extern data_t CCSDS_index_of[]; +extern data_t CCSDS_poly[]; + +#define MM 8 +#define NN 255 +#define ALPHA_TO CCSDS_alpha_to +#define INDEX_OF CCSDS_index_of +#define GENPOLY CCSDS_poly +#define NROOTS 32 +#define FCR 112 +#define PRIM 11 +#define IPRIM 116 +#define PAD pad + diff --git a/gen_ccsds.c b/gen_ccsds.c new file mode 100644 index 0000000..e1e2e26 --- /dev/null +++ b/gen_ccsds.c @@ -0,0 +1,39 @@ +/* Generate tables for CCSDS code + * Copyright 2002 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include "char.h" +#include "rs-common.h" +#include "fec.h" + +int main(){ + struct rs *rs; + int i; + + rs = init_rs_char(8,0x187,112,11,32,0); /* CCSDS standard */ + assert(rs != NULL); + printf("char CCSDS_alpha_to[] = {"); + for(i=0;i<256;i++){ + if((i % 16) == 0) + printf("\n"); + printf("0x%02x,",rs->alpha_to[i]); + } + printf("\n};\n\nchar CCSDS_index_of[] = {"); + for(i=0;i<256;i++){ + if((i % 16) == 0) + printf("\n"); + printf("%3d,",rs->index_of[i]); + } + printf("\n};\n\nchar CCSDS_poly[] = {"); + for(i=0;i<33;i++){ + if((i % 16) == 0) + printf("\n"); + + printf("%3d,",rs->genpoly[i]); + } + printf("\n};\n"); + exit(0); +} diff --git a/gen_ccsds_tal.c b/gen_ccsds_tal.c new file mode 100644 index 0000000..fc75503 --- /dev/null +++ b/gen_ccsds_tal.c @@ -0,0 +1,53 @@ +/* Conversion lookup tables from conventional alpha to Berlekamp's + * dual-basis representation. Used in the CCSDS version only. + * taltab[] -- convert conventional to dual basis + * tal1tab[] -- convert dual basis to conventional + + * Note: the actual RS encoder/decoder works with the conventional basis. + * So data is converted from dual to conventional basis before either + * encoding or decoding and then converted back. + * + * Copyright 2002 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> + +#define DTYPE unsigned char +DTYPE Taltab[256],Tal1tab[256]; + +static DTYPE tal[] = { 0x8d, 0xef, 0xec, 0x86, 0xfa, 0x99, 0xaf, 0x7b }; + +/* Generate conversion lookup tables between conventional alpha representation + * (@**7, @**6, ...@**0) + * and Berlekamp's dual basis representation + * (l0, l1, ...l7) + */ +int main(){ + int i,j,k; + + for(i=0;i<256;i++){/* For each value of input */ + Taltab[i] = 0; + for(j=0;j<8;j++) /* for each column of matrix */ + for(k=0;k<8;k++){ /* for each row of matrix */ + if(i & (1<<k)) + Taltab[i] ^= tal[7-k] & (1<<j); + } + Tal1tab[Taltab[i]] = i; + } + printf("unsigned char Taltab[] = {\n"); + for(i=0;i<256;i++){ + if((i % 16) == 0) + printf("\n"); + printf("0x%02x,",Taltab[i]); + } + printf("\n};\n\nunsigned char Tal1tab[] = {"); + for(i=0;i<256;i++){ + if((i % 16) == 0) + printf("\n"); + printf("0x%02x,",Tal1tab[i]); + } + printf("\n};\n"); + exit(0); +} + diff --git a/init_rs.c b/init_rs.c new file mode 100644 index 0000000..ef1cf47 --- /dev/null +++ b/init_rs.c @@ -0,0 +1,39 @@ +/* Initialize a RS codec + * + * Copyright 2002 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdlib.h> +#include "fec.h" + +#if !defined(NULL) +#define NULL ((void *)0) +#endif + +#include "rs-common.h" + +void free_rs(void *p){ + struct rs *rs = (struct rs *)p; + + free(rs->alpha_to); + free(rs->index_of); + free(rs->genpoly); + free(rs); +} + +/* Initialize a Reed-Solomon codec + * symsize = symbol size, bits + * gfpoly = Field generator polynomial coefficients + * fcr = first root of RS code generator polynomial, index form + * prim = primitive element to generate polynomial roots + * nroots = RS code generator polynomial degree (number of roots) + * pad = padding bytes at front of shortened block + */ +void *init_rs_common(int symsize,int gfpoly,int fcr,int prim, + int nroots,int pad){ + struct rs *rs; + +#include "init_rs.h" + + return rs; +} diff --git a/init_rs.h b/init_rs.h new file mode 100644 index 0000000..2b2ae98 --- /dev/null +++ b/init_rs.h @@ -0,0 +1,106 @@ +/* Common code for intializing a Reed-Solomon control block (char or int symbols) + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#undef NULL +#define NULL ((void *)0) + +{ + int i, j, sr,root,iprim; + + rs = NULL; + /* Check parameter ranges */ + if(symsize < 0 || symsize > 8*sizeof(data_t)){ + goto done; + } + + if(fcr < 0 || fcr >= (1<<symsize)) + goto done; + if(prim <= 0 || prim >= (1<<symsize)) + goto done; + if(nroots < 0 || nroots >= (1<<symsize)) + goto done; /* Can't have more roots than symbol values! */ + if(pad < 0 || pad >= ((1<<symsize) -1 - nroots)) + goto done; /* Too much padding */ + + rs = (struct rs *)calloc(1,sizeof(struct rs)); + if(rs == NULL) + goto done; + + rs->mm = symsize; + rs->nn = (1<<symsize)-1; + rs->pad = pad; + + rs->alpha_to = (data_t *)malloc(sizeof(data_t)*(rs->nn+1)); + if(rs->alpha_to == NULL){ + free(rs); + rs = NULL; + goto done; + } + rs->index_of = (data_t *)malloc(sizeof(data_t)*(rs->nn+1)); + if(rs->index_of == NULL){ + free(rs->alpha_to); + free(rs); + rs = NULL; + goto done; + } + + /* Generate Galois field lookup tables */ + rs->index_of[0] = A0; /* log(zero) = -inf */ + rs->alpha_to[A0] = 0; /* alpha**-inf = 0 */ + sr = 1; + for(i=0;i<rs->nn;i++){ + rs->index_of[sr] = i; + rs->alpha_to[i] = sr; + sr <<= 1; + if(sr & (1<<symsize)) + sr ^= gfpoly; + sr &= rs->nn; + } + if(sr != 1){ + /* field generator polynomial is not primitive! */ + free(rs->alpha_to); + free(rs->index_of); + free(rs); + rs = NULL; + goto done; + } + + /* Form RS code generator polynomial from its roots */ + rs->genpoly = (data_t *)malloc(sizeof(data_t)*(nroots+1)); + if(rs->genpoly == NULL){ + free(rs->alpha_to); + free(rs->index_of); + free(rs); + rs = NULL; + goto done; + } + rs->fcr = fcr; + rs->prim = prim; + rs->nroots = nroots; + + /* Find prim-th root of 1, used in decoding */ + for(iprim=1;(iprim % prim) != 0;iprim += rs->nn) + ; + rs->iprim = iprim / prim; + + rs->genpoly[0] = 1; + for (i = 0,root=fcr*prim; i < nroots; i++,root += prim) { + rs->genpoly[i+1] = 1; + + /* Multiply rs->genpoly[] by @**(root + x) */ + for (j = i; j > 0; j--){ + if (rs->genpoly[j] != 0) + rs->genpoly[j] = rs->genpoly[j-1] ^ rs->alpha_to[modnn(rs,rs->index_of[rs->genpoly[j]] + root)]; + else + rs->genpoly[j] = rs->genpoly[j-1]; + } + /* rs->genpoly[0] can never be zero */ + rs->genpoly[0] = rs->alpha_to[modnn(rs,rs->index_of[rs->genpoly[0]] + root)]; + } + /* convert rs->genpoly[] to index form for quicker encoding */ + for (i = 0; i <= nroots; i++) + rs->genpoly[i] = rs->index_of[rs->genpoly[i]]; + done:; + +} diff --git a/init_rs_char.c b/init_rs_char.c new file mode 100644 index 0000000..a51099a --- /dev/null +++ b/init_rs_char.c @@ -0,0 +1,35 @@ +/* Initialize a RS codec + * + * Copyright 2002 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdlib.h> + +#include "char.h" +#include "rs-common.h" + +void free_rs_char(void *p){ + struct rs *rs = (struct rs *)p; + + free(rs->alpha_to); + free(rs->index_of); + free(rs->genpoly); + free(rs); +} + +/* Initialize a Reed-Solomon codec + * symsize = symbol size, bits + * gfpoly = Field generator polynomial coefficients + * fcr = first root of RS code generator polynomial, index form + * prim = primitive element to generate polynomial roots + * nroots = RS code generator polynomial degree (number of roots) + * pad = padding bytes at front of shortened block + */ +void *init_rs_char(int symsize,int gfpoly,int fcr,int prim, + int nroots,int pad){ + struct rs *rs; + +#include "init_rs.h" + + return rs; +} diff --git a/init_rs_int.c b/init_rs_int.c new file mode 100644 index 0000000..a6036c2 --- /dev/null +++ b/init_rs_int.c @@ -0,0 +1,35 @@ +/* Initialize a RS codec + * + * Copyright 2002 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdlib.h> + +#include "int.h" +#include "rs-common.h" + +void free_rs_int(void *p){ + struct rs *rs = (struct rs *)p; + + free(rs->alpha_to); + free(rs->index_of); + free(rs->genpoly); + free(rs); +} + +/* Initialize a Reed-Solomon codec + * symsize = symbol size, bits + * gfpoly = Field generator polynomial coefficients + * fcr = first root of RS code generator polynomial, index form + * prim = primitive element to generate polynomial roots + * nroots = RS code generator polynomial degree (number of roots) + * pad = padding bytes at front of shortened block + */ +void *init_rs_int(int symsize,int gfpoly,int fcr,int prim, + int nroots,int pad){ + struct rs *rs; + +#include "init_rs.h" + + return rs; +} diff --git a/install-sh b/install-sh new file mode 100755 index 0000000..e9de238 --- /dev/null +++ b/install-sh @@ -0,0 +1,251 @@ +#!/bin/sh +# +# install - install a program, script, or datafile +# This comes from X11R5 (mit/util/scripts/install.sh). +# +# Copyright 1991 by the Massachusetts Institute of Technology +# +# Permission to use, copy, modify, distribute, and sell this software and its +# documentation for any purpose is hereby granted without fee, provided that +# the above copyright notice appear in all copies and that both that +# copyright notice and this permission notice appear in supporting +# documentation, and that the name of M.I.T. not be used in advertising or +# publicity pertaining to distribution of the software without specific, +# written prior permission. M.I.T. makes no representations about the +# suitability of this software for any purpose. It is provided "as is" +# without express or implied warranty. +# +# Calling this script install-sh is preferred over install.sh, to prevent +# `make' implicit rules from creating a file called install from it +# when there is no Makefile. +# +# This script is compatible with the BSD install script, but was written +# from scratch. It can only install one file at a time, a restriction +# shared with many OS's install programs. + + +# set DOITPROG to echo to test this script + +# Don't use :- since 4.3BSD and earlier shells don't like it. +doit="${DOITPROG-}" + + +# put in absolute paths if you don't have them in your path; or use env. vars. + +mvprog="${MVPROG-mv}" +cpprog="${CPPROG-cp}" +chmodprog="${CHMODPROG-chmod}" +chownprog="${CHOWNPROG-chown}" +chgrpprog="${CHGRPPROG-chgrp}" +stripprog="${STRIPPROG-strip}" +rmprog="${RMPROG-rm}" +mkdirprog="${MKDIRPROG-mkdir}" + +transformbasename="" +transform_arg="" +instcmd="$mvprog" +chmodcmd="$chmodprog 0755" +chowncmd="" +chgrpcmd="" +stripcmd="" +rmcmd="$rmprog -f" +mvcmd="$mvprog" +src="" +dst="" +dir_arg="" + +while [ x"$1" != x ]; do + case $1 in + -c) instcmd="$cpprog" + shift + continue;; + + -d) dir_arg=true + shift + continue;; + + -m) chmodcmd="$chmodprog $2" + shift + shift + continue;; + + -o) chowncmd="$chownprog $2" + shift + shift + continue;; + + -g) chgrpcmd="$chgrpprog $2" + shift + shift + continue;; + + -s) stripcmd="$stripprog" + shift + continue;; + + -t=*) transformarg=`echo $1 | sed 's/-t=//'` + shift + continue;; + + -b=*) transformbasename=`echo $1 | sed 's/-b=//'` + shift + continue;; + + *) if [ x"$src" = x ] + then + src=$1 + else + # this colon is to work around a 386BSD /bin/sh bug + : + dst=$1 + fi + shift + continue;; + esac +done + +if [ x"$src" = x ] +then + echo "install: no input file specified" + exit 1 +else + true +fi + +if [ x"$dir_arg" != x ]; then + dst=$src + src="" + + if [ -d $dst ]; then + instcmd=: + chmodcmd="" + else + instcmd=mkdir + fi +else + +# Waiting for this to be detected by the "$instcmd $src $dsttmp" command +# might cause directories to be created, which would be especially bad +# if $src (and thus $dsttmp) contains '*'. + + if [ -f $src -o -d $src ] + then + true + else + echo "install: $src does not exist" + exit 1 + fi + + if [ x"$dst" = x ] + then + echo "install: no destination specified" + exit 1 + else + true + fi + +# If destination is a directory, append the input filename; if your system +# does not like double slashes in filenames, you may need to add some logic + + if [ -d $dst ] + then + dst="$dst"/`basename $src` + else + true + fi +fi + +## this sed command emulates the dirname command +dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` + +# Make sure that the destination directory exists. +# this part is taken from Noah Friedman's mkinstalldirs script + +# Skip lots of stat calls in the usual case. +if [ ! -d "$dstdir" ]; then +defaultIFS=' +' +IFS="${IFS-${defaultIFS}}" + +oIFS="${IFS}" +# Some sh's can't handle IFS=/ for some reason. +IFS='%' +set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` +IFS="${oIFS}" + +pathcomp='' + +while [ $# -ne 0 ] ; do + pathcomp="${pathcomp}${1}" + shift + + if [ ! -d "${pathcomp}" ] ; + then + $mkdirprog "${pathcomp}" + else + true + fi + + pathcomp="${pathcomp}/" +done +fi + +if [ x"$dir_arg" != x ] +then + $doit $instcmd $dst && + + if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi +else + +# If we're going to rename the final executable, determine the name now. + + if [ x"$transformarg" = x ] + then + dstfile=`basename $dst` + else + dstfile=`basename $dst $transformbasename | + sed $transformarg`$transformbasename + fi + +# don't allow the sed command to completely eliminate the filename + + if [ x"$dstfile" = x ] + then + dstfile=`basename $dst` + else + true + fi + +# Make a temp file name in the proper directory. + + dsttmp=$dstdir/#inst.$$# + +# Move or copy the file name to the temp name + + $doit $instcmd $src $dsttmp && + + trap "rm -f ${dsttmp}" 0 && + +# and set any options; do chmod last to preserve setuid bits + +# If any of these fail, we abort the whole thing. If we want to +# ignore errors from any of these, just make sure not to ignore +# errors from the above "$doit $instcmd $src $dsttmp" command. + + if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && + if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && + if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && + if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && + +# Now rename the file to the real destination. + + $doit $rmcmd -f $dstdir/$dstfile && + $doit $mvcmd $dsttmp $dstdir/$dstfile + +fi && + + +exit 0 @@ -0,0 +1,22 @@ +/* Stuff specific to the general (integer) version of the Reed-Solomon codecs + * + * Copyright 2003, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +typedef unsigned int data_t; + +#define MODNN(x) modnn(rs,x) + +#define MM (rs->mm) +#define NN (rs->nn) +#define ALPHA_TO (rs->alpha_to) +#define INDEX_OF (rs->index_of) +#define GENPOLY (rs->genpoly) +#define NROOTS (rs->nroots) +#define FCR (rs->fcr) +#define PRIM (rs->prim) +#define IPRIM (rs->iprim) +#define PAD (rs->pad) +#define A0 (NN) + + diff --git a/lesser.txt b/lesser.txt new file mode 100644 index 0000000..b1e3f5a --- /dev/null +++ b/lesser.txt @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/makefile.in b/makefile.in new file mode 100644 index 0000000..53fdfcb --- /dev/null +++ b/makefile.in @@ -0,0 +1,242 @@ +# Makefile prototype for configure +# Copyright 2004 Phil Karn, KA9Q +# May be used under the terms of the GNU Lesser General Public License (LGPL) + +# @configure_input@ +srcdir = @srcdir@ +prefix = @prefix@ +exec_prefix=@exec_prefix@ +VPATH = @srcdir@ +CC=@CC@ +LIBS=@MLIBS@ fec.o sim.o viterbi27.o viterbi27_port.o viterbi29.o viterbi29_port.o \ + viterbi39.o viterbi39_port.o \ + viterbi615.o viterbi615_port.o encode_rs_char.o encode_rs_int.o encode_rs_8.o \ + decode_rs_char.o decode_rs_int.o decode_rs_8.o \ + init_rs_char.o init_rs_int.o ccsds_tab.o \ + encode_rs_ccsds.o decode_rs_ccsds.o ccsds_tal.o \ + dotprod.o dotprod_port.o \ + peakval.o peakval_port.o \ + sumsq.o sumsq_port.o + +CFLAGS=@CFLAGS@ -I. -Wall @ARCH_OPTION@ + +SHARED_LIB=@SH_LIB@ + +all: libfec.a $(SHARED_LIB) + +test: vtest27 vtest29 vtest39 vtest615 rstest dtest sumsq_test peaktest + @echo "Correctness tests:" + ./vtest27 -e 3.0 -n 1000 -v + ./vtest29 -e 2.5 -n 1000 -v + ./vtest39 -e 2.5 -n 1000 -v + ./vtest615 -e 1.0 -n 100 -v + ./rstest + ./dtest + ./sumsq_test + ./peaktest + @echo "Speed tests:" + ./vtest27 + ./vtest29 + ./vtest39 + ./vtest615 + +install: all + mkdir -p @libdir@ + install -m 644 -p $(SHARED_LIB) libfec.a @libdir@ +# (cd @libdir@;ln -f -s $(SHARED_LIB) libfec.so) + @REBIND@ + mkdir -p @includedir@ + install -m 644 -p fec.h @includedir@ + mkdir -m 0755 -p @mandir@/man3 + install -m 644 -p simd-viterbi.3 rs.3 dsp.3 @mandir@/man3 + +peaktest: peaktest.o libfec.a + gcc -g -o $@ $^ + +sumsq_test: sumsq_test.o libfec.a + gcc -g -o $@ $^ + +dtest: dtest.o libfec.a + gcc -g -o $@ $^ -lm + +vtest27: vtest27.o libfec.a + gcc -g -o $@ $^ -lm + +vtest29: vtest29.o libfec.a + gcc -g -o $@ $^ -lm + +vtest39: vtest39.o libfec.a + gcc -g -o $@ $^ -lm + +vtest615: vtest615.o libfec.a + gcc -g -o $@ $^ -lm + +rstest: rstest.o libfec.a + gcc -g -o $@ $^ + +rs_speedtest: rs_speedtest.o libfec.a + gcc -g -o $@ $^ + +# for some reason, the test programs without args segfault on the PPC with -O2 optimization. Dunno why - compiler bug? +vtest27.o: vtest27.c fec.h + gcc -g -c $< + +vtest29.o: vtest29.c fec.h + gcc -g -c $< + +vtest39.o: vtest39.c fec.h + gcc -g -c $< + +vtest615.o: vtest615.c fec.h + gcc -g -c $< + +libfec.a: $(LIBS) + ar rv $@ $^ + ranlib libfec.a + +# for Darwin +libfec.dylib: $(LIBS) + $(CC) -dynamiclib -install_name $@ -o $@ $^ + +# for Linux et al +libfec.so: $(LIBS) + gcc -shared -Xlinker -soname=$@ -o $@ -Wl,-whole-archive $^ -Wl,-no-whole-archive -lc + +dotprod.o: dotprod.c fec.h + +dotprod_port.o: dotprod_port.c fec.h + +viterbi27.o: viterbi27.c fec.h + +viterbi27_port.o: viterbi27_port.c fec.h + +viterbi29.o: viterbi29.c fec.h + +viterbi39.o: viterbi39.c fec.h + +viterbi39_port.o: viterbi39_port.c fec.h + +viterbi39_sse2.o: viterbi39_sse2.c fec.h + +viterbi39_sse.o: viterbi39_sse.c fec.h + +viterbi39_mmx.o: viterbi39_mmx.c fec.h + +encode_rs_char.o: encode_rs_char.c char.h rs-common.h + +encode_rs_int.o: encode_rs_int.c int.h rs-common.h + +encode_rs_8.o: encode_rs_8.c fixed.h + +encode_rs_av.o: encode_rs_av.c fixed.h + +decode_rs_char.o: decode_rs_char.c char.h rs-common.h + +decode_rs_int.o: decode_rs_int.c int.h rs-common.h + +decode_rs_8.o: decode_rs_8.c fixed.h + +init_rs_char.o: init_rs_char.c char.h rs-common.h + +init_rs_int.o: init_rs_int.c int.h rs-common.h + +ccsds_tab.o: ccsds_tab.c + +ccsds_tab.c: gen_ccsds + ./gen_ccsds > ccsds_tab.c + +gen_ccsds: gen_ccsds.o init_rs_char.o + gcc -o $@ $^ + +gen_ccsds.o: gen_ccsds.c + gcc $(CFLAGS) -c -o $@ $< + +ccsds_tal.o: ccsds_tal.c + +ccsds_tal.c: gen_ccsds_tal + ./gen_ccsds_tal > ccsds_tal.c + +exercise_char.o: exercise.c + gcc $(CFLAGS) -c -o $@ $< + +exercise_int.o: exercise.c + gcc -DBIGSYM=1 $(CFLAGS) -c -o $@ $< + +exercise_8.o: exercise.c + gcc -DFIXED=1 $(CFLAGS) -c -o $@ $< + +exercise_ccsds.o: exercise.c + gcc -DCCSDS=1 $(CFLAGS) -c -o $@ $< + +viterbi27.o: viterbi27.c fec.h + +viterbi27_port.o: viterbi27_port.c fec.h + +viterbi27_av.o: viterbi27_av.c fec.h + +viterbi27_mmx.o: viterbi27_mmx.c fec.h + gcc $(CFLAGS) -mmmx -c -o $@ $< + +viterbi27_sse.o: viterbi27_sse.c fec.h + gcc $(CFLAGS) -msse -c -o $@ $< + +viterbi27_sse2.o: viterbi27_sse2.c fec.h + gcc $(CFLAGS) -msse2 -c -o $@ $< + +viterbi29.o: viterbi29.c fec.h + +viterbi29_port.o: viterbi29_port.c fec.h + +viterbi29_av.o: viterbi29_av.c fec.h + +viterbi29_mmx.o: viterbi29_mmx.c fec.h + gcc $(CFLAGS) -mmmx -c -o $@ $< + +viterbi29_sse.o: viterbi29_sse.c fec.h + gcc $(CFLAGS) -msse -c -o $@ $< + +viterbi29_sse2.o: viterbi29_sse2.c fec.h + gcc $(CFLAGS) -msse2 -c -o $@ $< + +viterbi39.o: viterbi39.c fec.h + +viterbi39_port.o: viterbi39_port.c fec.h + +viterbi39_av.o: viterbi39_av.c fec.h + +viterbi39_mmx.o: viterbi39_mmx.c fec.h + gcc $(CFLAGS) -mmmx -c -o $@ $< + +viterbi39_sse.o: viterbi39_sse.c fec.h + gcc $(CFLAGS) -msse -c -o $@ $< + +viterbi39_sse2.o: viterbi39_sse2.c fec.h + gcc $(CFLAGS) -msse2 -c -o $@ $< + +viterbi615.o: viterbi615.c fec.h + +viterbi615_port.o: viterbi615_port.c fec.h + +viterbi615_av.o: viterbi615_av.c fec.h + +viterbi615_mmx.o: viterbi615_mmx.c fec.h + gcc $(CFLAGS) -mmmx -c -o $@ $< + +viterbi615_sse.o: viterbi615_sse.c fec.h + gcc $(CFLAGS) -msse -c -o $@ $< + +viterbi615_sse2.o: viterbi615_sse2.c fec.h + gcc $(CFLAGS) -msse2 -c -o $@ $< + +cpu_mode_x86.o: cpu_mode_x86.c fec.h + +cpu_mode_ppc.o: cpu_mode_ppc.c fec.h + + +clean: + rm -f *.o $(SHARED_LIB) *.a rs_speedtest peaktest sumsq_test dtest vtest27 vtest29 vtest39 vtest615 rstest ccsds_tab.c ccsds_tal.c gen_ccsds gen_ccsds_tal core + rm -rf autom4te.cache + +distclean: clean + rm -f config.log config.cache config.status config.h makefile + diff --git a/mmxbfly27.s b/mmxbfly27.s new file mode 100644 index 0000000..4abbf48 --- /dev/null +++ b/mmxbfly27.s @@ -0,0 +1,148 @@ +/* Intel SIMD MMX implementation of Viterbi ACS butterflies + for 64-state (k=7) convolutional code + Copyright 2004 Phil Karn, KA9Q + This code may be used under the terms of the GNU Lesser General Public License (LGPL) + + int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ; +*/ + # MMX (64-bit SIMD) version + # requires Pentium-MMX, Pentium-II or better + + # These are offsets into struct v27, defined in viterbi27_mmx.c + .set DP,128 + .set OLDMETRICS,132 + .set NEWMETRICS,136 + .text + .global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2 + .type update_viterbi27_blk_mmx,@function + .align 16 + +update_viterbi27_blk_mmx: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %edx + pushl %ebx + + movl 8(%ebp),%edx # edx = vp + testl %edx,%edx + jnz 0f + movl -1,%eax + jmp err +0: movl OLDMETRICS(%edx),%esi # esi -> old metrics + movl NEWMETRICS(%edx),%edi # edi -> new metrics + movl DP(%edx),%edx # edx -> decisions + +1: movl 16(%ebp),%eax # eax = nbits + decl %eax + jl 2f # passed zero, we're done + movl %eax,16(%ebp) + + movl 12(%ebp),%ebx # ebx = syms + movw (%ebx),%ax # ax = second symbol : first symbol + addl $2,%ebx + movl %ebx,12(%ebp) + + movb %ah,%bl + andl $255,%eax + andl $255,%ebx + + # shift into first array index dimension slot + shll $5,%eax + shll $5,%ebx + + # each invocation of this macro will do 8 butterflies in parallel + .MACRO butterfly GROUP + # Compute branch metrics + movq (Mettab27_1+8*\GROUP)(%eax),%mm3 + movq fifteens,%mm0 + + paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3 + paddb ones,%mm3 # emulate pavgb - this may not be necessary + psrlq $1,%mm3 + pand %mm0,%mm3 + + movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0 + movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1 + movq %mm6,%mm1 + movq %mm2,%mm7 + + paddb %mm3,%mm6 + paddb %mm3,%mm2 + pxor %mm0,%mm3 # invert branch metric + paddb %mm3,%mm7 # path metric for inverted symbols + paddb %mm3,%mm1 + + # live registers 1 2 6 7 + # Compare mm6 and mm7; mm1 and mm2 + pxor %mm3,%mm3 + movq %mm6,%mm4 + movq %mm1,%mm5 + psubb %mm7,%mm4 # mm4 = mm6 - mm7 + psubb %mm2,%mm5 # mm5 = mm1 - mm2 + pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better) + pcmpgtb %mm3,%mm5 # mm5 = second set of decisions + + # live registers 1 2 4 5 6 7 + # select survivors + movq %mm4,%mm0 + pand %mm4,%mm7 + movq %mm5,%mm3 + pand %mm5,%mm2 + pandn %mm6,%mm0 + pandn %mm1,%mm3 + por %mm0,%mm7 # mm7 = first set of survivors + por %mm3,%mm2 # mm2 = second set of survivors + + # live registers 2 4 5 7 + # interleave & store decisions in mm4, mm5 + # interleave & store new branch metrics in mm2, mm7 + movq %mm4,%mm3 + movq %mm7,%mm0 + punpckhbw %mm5,%mm4 + punpcklbw %mm5,%mm3 + punpcklbw %mm2,%mm7 # interleave second 8 new metrics + punpckhbw %mm2,%mm0 # interleave first 8 new metrics + movq %mm4,(16*\GROUP+8)(%edx) + movq %mm3,(16*\GROUP)(%edx) + movq %mm7,(16*\GROUP)(%edi) + movq %mm0,(16*\GROUP+8)(%edi) + + .endm + +# invoke macro 4 times for a total of 32 butterflies + butterfly GROUP=0 + butterfly GROUP=1 + butterfly GROUP=2 + butterfly GROUP=3 + + addl $64,%edx # bump decision pointer + + # swap metrics + movl %esi,%eax + movl %edi,%esi + movl %eax,%edi + jmp 1b + +2: emms + movl 8(%ebp),%ebx # ebx = vp + # stash metric pointers + movl %esi,OLDMETRICS(%ebx) + movl %edi,NEWMETRICS(%ebx) + movl %edx,DP(%ebx) # stash incremented value of vp->dp + xorl %eax,%eax +err: popl %ebx + popl %edx + popl %edi + popl %esi + popl %ebp + ret + + .data + .align 8 +fifteens: + .byte 15,15,15,15,15,15,15,15 + + .align 8 +ones: .byte 1,1,1,1,1,1,1,1 diff --git a/mmxbfly29.s b/mmxbfly29.s new file mode 100644 index 0000000..e37cab8 --- /dev/null +++ b/mmxbfly29.s @@ -0,0 +1,161 @@ +/* Intel SIMD MMX implementation of Viterbi ACS butterflies + for 256-state (k=9) convolutional code + Copyright 2004 Phil Karn, KA9Q + This code may be used under the terms of the GNU Lesser General Public License (LGPL) + + void update_viterbi29_blk_mmx(struct v29 *vp,unsigned char *syms,int nbits); +*/ + + # These are offsets into struct v29, defined in viterbi29.h + .set DP,512 + .set OLDMETRICS,516 + .set NEWMETRICS,520 + .text + .global update_viterbi29_blk_mmx,Mettab29_1,Mettab29_2 + .type update_viterbi29_blk_mmx,@function + .align 16 + + # MMX (64-bit SIMD) version + # requires Pentium-MMX, Pentium-II or better + +update_viterbi29_blk_mmx: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %edx + pushl %ebx + + movl 8(%ebp),%edx # edx = vp + movl 8(%ebp),%edx # edx = vp + testl %edx,%edx + jnz 0f + movl -1,%eax + jmp err +0: movl OLDMETRICS(%edx),%esi # esi -> old metrics + movl NEWMETRICS(%edx),%edi # edi -> new metrics + movl DP(%edx),%edx # edx -> decisions + +1: movl 16(%ebp),%eax # eax = nbits + decl %eax + jl 2f # passed zero, we're done + movl %eax,16(%ebp) + + movl 12(%ebp),%ebx # ebx = syms + movw (%ebx),%ax # ax = second symbol : first symbol + addl $2,%ebx + movl %ebx,12(%ebp) + + movb %ah,%bl + andl $255,%eax + andl $255,%ebx + + # shift into first array index dimension slot + shll $7,%eax + shll $7,%ebx + + # each invocation of this macro will do 8 butterflies in parallel + .MACRO butterfly GROUP + # Compute branch metrics + movq (Mettab29_1+8*\GROUP)(%eax),%mm3 + movq fifteens,%mm0 + paddb (Mettab29_2+8*\GROUP)(%ebx),%mm3 + paddb ones,%mm3 # emulate pavgb - this may not be necessary + psrlq $1,%mm3 + pand %mm0,%mm3 + + movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0 + movq ((8*\GROUP)+128)(%esi),%mm2 # Incoming path metric, high bit = 1 + movq %mm6,%mm1 + movq %mm2,%mm7 + + paddb %mm3,%mm6 + paddb %mm3,%mm2 + pxor %mm0,%mm3 # invert branch metric + paddb %mm3,%mm7 # path metric for inverted symbols + paddb %mm3,%mm1 + + # live registers 1 2 6 7 + # Compare mm6 and mm7; mm1 and mm2 + pxor %mm3,%mm3 + movq %mm6,%mm4 + movq %mm1,%mm5 + psubb %mm7,%mm4 # mm4 = mm6 - mm7 + psubb %mm2,%mm5 # mm5 = mm1 - mm2 + pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better) + pcmpgtb %mm3,%mm5 # mm5 = second set of decisions + + # live registers 1 2 4 5 6 7 + # select survivors + movq %mm4,%mm0 + pand %mm4,%mm7 + movq %mm5,%mm3 + pand %mm5,%mm2 + pandn %mm6,%mm0 + pandn %mm1,%mm3 + por %mm0,%mm7 # mm7 = first set of survivors + por %mm3,%mm2 # mm2 = second set of survivors + + # live registers 2 4 5 7 + # interleave & store decisions in mm4, mm5 + # interleave & store new branch metrics in mm2, mm7 + movq %mm4,%mm3 + movq %mm7,%mm0 + punpckhbw %mm5,%mm4 + punpcklbw %mm5,%mm3 + punpcklbw %mm2,%mm7 # interleave second 8 new metrics + punpckhbw %mm2,%mm0 # interleave first 8 new metrics + movq %mm4,(16*\GROUP+8)(%edx) + movq %mm3,(16*\GROUP)(%edx) + movq %mm7,(16*\GROUP)(%edi) + movq %mm0,(16*\GROUP+8)(%edi) + + .endm + +# invoke macro 16 times for a total of 128 butterflies + butterfly GROUP=0 + butterfly GROUP=1 + butterfly GROUP=2 + butterfly GROUP=3 + butterfly GROUP=4 + butterfly GROUP=5 + butterfly GROUP=6 + butterfly GROUP=7 + butterfly GROUP=8 + butterfly GROUP=9 + butterfly GROUP=10 + butterfly GROUP=11 + butterfly GROUP=12 + butterfly GROUP=13 + butterfly GROUP=14 + butterfly GROUP=15 + + addl $256,%edx # bump decision pointer + + # swap metrics + movl %esi,%eax + movl %edi,%esi + movl %eax,%edi + jmp 1b + +2: emms + movl 8(%ebp),%ebx # ebx = vp + # stash metric pointers + movl %esi,OLDMETRICS(%ebx) + movl %edi,NEWMETRICS(%ebx) + movl %edx,DP(%ebx) # stash incremented value of vp->dp + xorl %eax,%eax +err: popl %ebx + popl %edx + popl %edi + popl %esi + popl %ebp + ret + + .data + .align 8 +fifteens: + .byte 15,15,15,15,15,15,15,15 + + .align 8 +ones: .byte 1,1,1,1,1,1,1,1 diff --git a/peak_mmx_assist.s b/peak_mmx_assist.s new file mode 100644 index 0000000..dae831f --- /dev/null +++ b/peak_mmx_assist.s @@ -0,0 +1,70 @@ +# MMX assist routines for peakval +# Copyright 2001 Phil Karn, KA9Q +# May be used under the terms of the GNU Lesser General Public License (LGPL) + + .text + +# Find peak value in signed 16-bit input samples +# int peakval_mmx(signed short *in,int cnt); + .global peakval_mmx + .type peakval_mmx,@function + .align 16 +peakval_mmx: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %ecx + pushl %ebx + + movl 8(%ebp),%esi + movl 12(%ebp),%ecx + + pxor %mm7,%mm7 # clear peak + +1: subl $4,%ecx + jl 2f + movq (%esi),%mm0 + movq %mm0,%mm1 + psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive + pxor %mm1,%mm0 # complement negatives + psubw %mm1,%mm0 # add 1 to negatives + movq %mm7,%mm6 # copy previous peak + pcmpgtw %mm0,%mm6 # ff == old peak greater + pand %mm6,%mm7 # select old peaks that are greater + pandn %mm0,%mm6 # select new values that are greater + por %mm6,%mm7 + + addl $8,%esi + jmp 1b + +2: movd %mm7,%eax + psrlq $16,%mm7 + andl $0xffff,%eax + + movd %mm7,%edx + psrlq $16,%mm7 + andl $0xffff,%edx + cmpl %edx,%eax + jnl 3f + movl %edx,%eax +3: + movd %mm7,%edx + psrlq $16,%mm7 + andl $0xffff,%edx + cmpl %edx,%eax + jnl 4f + movl %edx,%eax +4: + movd %mm7,%edx + andl $0xffff,%edx + cmpl %edx,%eax + jnl 5f + movl %edx,%eax +5: + emms + popl %ebx + popl %ecx + popl %esi + popl %ebp + ret + diff --git a/peak_sse2_assist.s b/peak_sse2_assist.s new file mode 100644 index 0000000..1dee3a8 --- /dev/null +++ b/peak_sse2_assist.s @@ -0,0 +1,51 @@ +# SSE2 assist routines for peakval +# Copyright 2001 Phil Karn, KA9Q +# May be used under the terms of the GNU Public License (GPL) + + .text + +# Find peak absolute value in signed 16-bit input samples +# int peakval_sse2(signed short *in,int cnt); + .global peakval_sse2 + .type peakval_sse2,@function + .align 16 +peakval_sse2: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %ecx + + movl 8(%ebp),%esi + movl 12(%ebp),%ecx + + pxor %xmm7,%xmm7 # clear peak + +1: subl $8,%ecx + jl 2f + movaps (%esi),%xmm0 + movaps %xmm0,%xmm1 + psraw $15,%xmm1 # xmm1 = 1's if negative, 0's if positive + pxor %xmm1,%xmm0 # complement negatives + psubw %xmm1,%xmm0 # add 1 to negatives + pmaxsw %xmm0,%xmm7 # store peak + + addl $16,%esi + jmp 1b + +2: movaps %xmm7,%xmm0 + psrldq $8,%xmm0 + pmaxsw %xmm0,%xmm7 + movaps %xmm7,%xmm0 + psrlq $32,%xmm0 + pmaxsw %xmm0,%xmm7 + movaps %xmm7,%xmm0 + psrlq $16,%xmm0 + pmaxsw %xmm0,%xmm7 # min value in low word of %xmm7 + + movd %xmm7,%eax + andl $0xffff,%eax + + popl %ecx + popl %esi + popl %ebp + ret diff --git a/peak_sse_assist.s b/peak_sse_assist.s new file mode 100644 index 0000000..ea6fce8 --- /dev/null +++ b/peak_sse_assist.s @@ -0,0 +1,49 @@ +# SSE assist routines for peakval +# Copyright 2001 Phil Karn, KA9Q +# May be used under the terms of the GNU Lesser General Public License (LGPL) + + .text + +# Find peak absolute value in signed 16-bit input samples +# int peakval_sse(signed short *in,int cnt); + .global peakval_sse + .type peakval_sse,@function + .align 16 +peakval_sse: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %ecx + + movl 8(%ebp),%esi + movl 12(%ebp),%ecx + + pxor %mm7,%mm7 # clear peak + +1: subl $4,%ecx + jl 2f + movq (%esi),%mm0 + movq %mm0,%mm1 + psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive + pxor %mm1,%mm0 # complement negatives + psubw %mm1,%mm0 # add 1 to negatives + pmaxsw %mm0,%mm7 # store peak + + addl $8,%esi + jmp 1b + +2: movq %mm7,%mm0 + psrlq $32,%mm0 + pmaxsw %mm0,%mm7 + movq %mm7,%mm0 + psrlq $16,%mm0 + pmaxsw %mm0,%mm7 # min value in low word of %mm7 + + movd %mm7,%eax + andl $0xffff,%eax + + emms + popl %ecx + popl %esi + popl %ebp + ret diff --git a/peaktest.c b/peaktest.c new file mode 100644 index 0000000..fa4b280 --- /dev/null +++ b/peaktest.c @@ -0,0 +1,38 @@ +/* Verify correctness of the peak routine + * Copyright 2004 Phil Karn, KA9Q + */ +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +/* These values should trigger leading/trailing array fragment handling */ +#define NSAMP 200002 +#define OFFSET 1 + +int peakval(signed short *,int); +int peakval_port(signed short *,int); + +int main(){ + int i,s; + int result,rresult; + signed short samples[NSAMP]; + + srandom(time(NULL)); + + for(i=0;i<NSAMP;i++){ + do { + s = random() & 0x0fff; + } while(s == 0x8000); + samples[i] = s; + } + samples[5] = 25000; + + rresult = peakval_port(&samples[OFFSET],NSAMP-OFFSET); + result = peakval(&samples[OFFSET],NSAMP-OFFSET); + if(result == rresult){ + printf("OK\n"); + } else { + printf("peak mismatch: %d != %d\n",result,rresult); + } + exit(0); +} diff --git a/peakval.c b/peakval.c new file mode 100644 index 0000000..811a3a9 --- /dev/null +++ b/peakval.c @@ -0,0 +1,39 @@ +/* Switch to appropriate version of peakval routine + * Copyright 2004, Phil Karn, KA9Q + */ + +#include <stdlib.h> +#include "fec.h" + +int peakval_port(signed short *b,int cnt); +#ifdef __i386__ +int peakval_mmx(signed short *b,int cnt); +int peakval_sse(signed short *b,int cnt); +int peakval_sse2(signed short *b,int cnt); +#endif + +#ifdef __VEC__ +int peakval_av(signed short *b,int cnt); +#endif + +int peakval(signed short *b,int cnt){ + find_cpu_mode(); + + switch(Cpu_mode){ + case PORT: + default: + return peakval_port(b,cnt); +#ifdef __i386__ + case MMX: + return peakval_mmx(b,cnt); + case SSE: + return peakval_sse(b,cnt); + case SSE2: + return peakval_sse2(b,cnt); +#endif +#ifdef __VEC__ + case ALTIVEC: + return peakval_av(b,cnt); +#endif + } +} diff --git a/peakval_av.c b/peakval_av.c new file mode 100644 index 0000000..ae54c10 --- /dev/null +++ b/peakval_av.c @@ -0,0 +1,61 @@ +/* Return the largest absolute value of a vector of signed shorts + + * This is the Altivec SIMD version. + + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +#include "fec.h" + +signed short peakval_av(signed short *in,int cnt){ + vector signed short x; + int pad; + union { vector signed char cv; vector signed short hv; signed short s[8]; signed char c[16];} s; + vector signed short smallest,largest; + + smallest = (vector signed short)(0); + largest = (vector signed short)(0); + if((pad = (int)in & 15)!=0){ + /* Load unaligned leading word */ + x = vec_perm(vec_ld(0,in),(vector signed short)(0),vec_lvsl(0,in)); + if(cnt < 8){ /* Shift right to chop stuff beyond end of short block */ + s.c[15] = (8-cnt)<<4; + x = vec_sro(x,s.cv); + } + smallest = vec_min(smallest,x); + largest = vec_max(largest,x); + in += 8-pad/2; + cnt -= 8-pad/2; + } + /* Everything is now aligned, rip through most of the block */ + while(cnt >= 8){ + x = vec_ld(0,in); + smallest = vec_min(smallest,x); + largest = vec_max(largest,x); + in += 8; + cnt -= 8; + } + /* Handle trailing fragment, if any */ + if(cnt > 0){ + x = vec_ld(0,in); + s.c[15] = (8-cnt)<<4; + x = vec_sro(x,s.cv); + smallest = vec_min(smallest,x); + largest = vec_max(largest,x); + } + /* Combine and extract result */ + largest = vec_max(largest,vec_abs(smallest)); + + s.c[15] = 64; /* Shift right four 16-bit words */ + largest = vec_max(largest,vec_sro(largest,s.cv)); + + s.c[15] = 32; /* Shift right two 16-bit words */ + largest = vec_max(largest,vec_sro(largest,s.cv)); + + s.c[15] = 16; /* Shift right one 16-bit word */ + largest = vec_max(largest,vec_sro(largest,s.cv)); + + s.hv = largest; + return s.s[7]; +} diff --git a/peakval_mmx.c b/peakval_mmx.c new file mode 100644 index 0000000..436fe88 --- /dev/null +++ b/peakval_mmx.c @@ -0,0 +1,34 @@ +/* Wrapper for the MMX version of peakval + * Copyright 2004 Phil Karn, KA9Q + */ + +#include <stdlib.h> + +int peakval_mmx_assist(signed short *,int); + +int peakval_mmx(signed short *b,int cnt){ + int peak = 0; + int a; + + while(((int)b & 7) != 0 && cnt != 0){ + a = abs(*b); + if(a > peak) + peak = a; + b++; + cnt--; + } + a = peakval_mmx_assist(b,cnt); + if(a > peak) + peak = a; + b += cnt & ~3; + cnt &= 3; + + while(cnt != 0){ + a = abs(*b); + if(a > peak) + peak = a; + b++; + cnt--; + } + return peak; +} diff --git a/peakval_mmx_assist.s b/peakval_mmx_assist.s new file mode 100644 index 0000000..553cb79 --- /dev/null +++ b/peakval_mmx_assist.s @@ -0,0 +1,70 @@ +# MMX assist routines for peakval +# Copyright 2001 Phil Karn, KA9Q +# May be used under the terms of the GNU Lesser General Public License (LGPL) + + .text + +# Find peak value in signed 16-bit input samples +# int peakval_mmx_assist(signed short *in,int cnt); + .global peakval_mmx_assist + .type peakval_mmx_assist,@function + .align 16 +peakval_mmx_assist: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %ecx + pushl %ebx + + movl 8(%ebp),%esi + movl 12(%ebp),%ecx + + pxor %mm7,%mm7 # clear peak + +1: subl $4,%ecx + jl 2f + movq (%esi),%mm0 + movq %mm0,%mm1 + psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive + pxor %mm1,%mm0 # complement negatives + psubw %mm1,%mm0 # add 1 to negatives + movq %mm7,%mm6 # copy previous peak + pcmpgtw %mm0,%mm6 # ff == old peak greater + pand %mm6,%mm7 # select old peaks that are greater + pandn %mm0,%mm6 # select new values that are greater + por %mm6,%mm7 + + addl $8,%esi + jmp 1b + +2: movd %mm7,%eax + psrlq $16,%mm7 + andl $0xffff,%eax + + movd %mm7,%edx + psrlq $16,%mm7 + andl $0xffff,%edx + cmpl %edx,%eax + jnl 3f + movl %edx,%eax +3: + movd %mm7,%edx + psrlq $16,%mm7 + andl $0xffff,%edx + cmpl %edx,%eax + jnl 4f + movl %edx,%eax +4: + movd %mm7,%edx + andl $0xffff,%edx + cmpl %edx,%eax + jnl 5f + movl %edx,%eax +5: + emms + popl %ebx + popl %ecx + popl %esi + popl %ebp + ret + diff --git a/peakval_port.c b/peakval_port.c new file mode 100644 index 0000000..07ab316 --- /dev/null +++ b/peakval_port.c @@ -0,0 +1,16 @@ +/* Portable C version of peakval + * Copyright 2004 Phil Karn, KA9Q + */ +#include <stdlib.h> +#include "fec.h" +int peakval_port(signed short *b,int len){ + int peak = 0; + int a,i; + + for(i=0;i<len;i++){ + a = abs(b[i]); + if(a > peak) + peak = a; + } + return peak; +} diff --git a/peakval_sse.c b/peakval_sse.c new file mode 100644 index 0000000..9868b7f --- /dev/null +++ b/peakval_sse.c @@ -0,0 +1,35 @@ +/* IA-32 SSE version of peakval + * Copyright 2004 Phil Karn, KA9Q + */ + +#include <stdlib.h> +#include "fec.h" + +int peakval_sse_assist(signed short *,int); + +int peakval_sse(signed short *b,int cnt){ + int peak = 0; + int a; + + while(((int)b & 7) != 0 && cnt != 0){ + a = abs(*b); + if(a > peak) + peak = a; + b++; + cnt--; + } + a = peakval_sse_assist(b,cnt); + if(a > peak) + peak = a; + b += cnt & ~3; + cnt &= 3; + + while(cnt != 0){ + a = abs(*b); + if(a > peak) + peak = a; + b++; + cnt--; + } + return peak; +} diff --git a/peakval_sse2.c b/peakval_sse2.c new file mode 100644 index 0000000..79d9059 --- /dev/null +++ b/peakval_sse2.c @@ -0,0 +1,34 @@ +/* Portable C version of peakval + * Copyright 2004 Phil Karn, KA9Q + */ +#include <stdlib.h> +#include "fec.h" + +int peakval_sse2_assist(signed short *,int); + +int peakval_sse2(signed short *b,int cnt){ + int peak = 0; + int a; + + while(((int)b & 15) != 0 && cnt != 0){ + a = abs(*b); + if(a > peak) + peak = a; + b++; + cnt--; + } + a = peakval_sse2_assist(b,cnt); + if(a > peak) + peak = a; + b += cnt & ~7; + cnt &= 7; + + while(cnt != 0){ + a = abs(*b); + if(a > peak) + peak = a; + b++; + cnt--; + } + return peak; +} diff --git a/peakval_sse2_assist.s b/peakval_sse2_assist.s new file mode 100644 index 0000000..c7a58e7 --- /dev/null +++ b/peakval_sse2_assist.s @@ -0,0 +1,51 @@ +# SSE2 assist routines for peakval +# Copyright 2001 Phil Karn, KA9Q +# May be used under the terms of the GNU Lesser General Public License (LGPL) + + .text + +# Find peak absolute value in signed 16-bit input samples +# int peakval_sse2_assist(signed short *in,int cnt); + .global peakval_sse2_assist + .type peakval_sse2_assist,@function + .align 16 +peakval_sse2_assist: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %ecx + + movl 8(%ebp),%esi + movl 12(%ebp),%ecx + + pxor %xmm7,%xmm7 # clear peak + +1: subl $8,%ecx + jl 2f + movaps (%esi),%xmm0 + movaps %xmm0,%xmm1 + psraw $15,%xmm1 # xmm1 = 1's if negative, 0's if positive + pxor %xmm1,%xmm0 # complement negatives + psubw %xmm1,%xmm0 # add 1 to negatives + pmaxsw %xmm0,%xmm7 # store peak + + addl $16,%esi + jmp 1b + +2: movaps %xmm7,%xmm0 + psrldq $8,%xmm0 + pmaxsw %xmm0,%xmm7 + movaps %xmm7,%xmm0 + psrlq $32,%xmm0 + pmaxsw %xmm0,%xmm7 + movaps %xmm7,%xmm0 + psrlq $16,%xmm0 + pmaxsw %xmm0,%xmm7 # min value in low word of %xmm7 + + movd %xmm7,%eax + andl $0xffff,%eax + + popl %ecx + popl %esi + popl %ebp + ret diff --git a/peakval_sse_assist.s b/peakval_sse_assist.s new file mode 100644 index 0000000..827c800 --- /dev/null +++ b/peakval_sse_assist.s @@ -0,0 +1,49 @@ +# SSE assist routines for peakval +# Copyright 2001 Phil Karn, KA9Q +# May be used under the terms of the GNU Lesser General Public License (LGPL) + + .text + +# Find peak absolute value in signed 16-bit input samples +# int peakval_sse_assist(signed short *in,int cnt); + .global peakval_sse_assist + .type peakval_sse_assist,@function + .align 16 +peakval_sse_assist: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %ecx + + movl 8(%ebp),%esi + movl 12(%ebp),%ecx + + pxor %mm7,%mm7 # clear peak + +1: subl $4,%ecx + jl 2f + movq (%esi),%mm0 + movq %mm0,%mm1 + psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive + pxor %mm1,%mm0 # complement negatives + psubw %mm1,%mm0 # add 1 to negatives + pmaxsw %mm0,%mm7 # store peak + + addl $8,%esi + jmp 1b + +2: movq %mm7,%mm0 + psrlq $32,%mm0 + pmaxsw %mm0,%mm7 + movq %mm7,%mm0 + psrlq $16,%mm0 + pmaxsw %mm0,%mm7 # min value in low word of %mm7 + + movd %mm7,%eax + andl $0xffff,%eax + + emms + popl %ecx + popl %esi + popl %ebp + ret diff --git a/rs-common.h b/rs-common.h new file mode 100644 index 0000000..e64eb39 --- /dev/null +++ b/rs-common.h @@ -0,0 +1,26 @@ +/* Stuff common to all the general-purpose Reed-Solomon codecs + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +/* Reed-Solomon codec control block */ +struct rs { + int mm; /* Bits per symbol */ + int nn; /* Symbols per block (= (1<<mm)-1) */ + data_t *alpha_to; /* log lookup table */ + data_t *index_of; /* Antilog lookup table */ + data_t *genpoly; /* Generator polynomial */ + int nroots; /* Number of generator roots = number of parity symbols */ + int fcr; /* First consecutive root, index form */ + int prim; /* Primitive element, index form */ + int iprim; /* prim-th root of 1, index form */ + int pad; /* Padding bytes in shortened block */ +}; + +static inline int modnn(struct rs *rs,int x){ + while (x >= rs->nn) { + x -= rs->nn; + x = (x >> rs->mm) + (x & rs->nn); + } + return x; +} @@ -0,0 +1,198 @@ +.TH REED-SOLOMON 3 +.SH NAME +init_rs_int, encode_rs_int, decode_rs_int, free_rs_int, +init_rs_char, encode_rs_char, decode_rs_char, free_rs_char, +encode_rs_8, decode_rs_8, encode_rs_ccsds, decode_rs_ccsds +\- Reed-Solomon encoding/decoding +.SH SYNOPSIS +.nf +.ft B +#include "fec.h" + +void *init_rs_int(int symsize,int gfpoly,int fcr,int prim, + int nroots,int pad); + +void encode_rs_int(void *rs,int *data,int *parity); + +int decode_rs_int(void *rs,int *data,int *eras_pos,int no_eras); + +void free_rs_int(void *rs); + + +void *init_rs_char(int symsize,int gfpoly,int fcr,int prim, + int nroots,int pad); + +void encode_rs_char(void *rs,unsigned char *data, + unsigned char *parity); + +int decode_rs_char(void *rs,unsigned char *data,int *eras_pos, + int no_eras); + +void free_rs_char(void *rs); + + +void encode_rs_8(unsigned char *data,unsigned char *parity, + int pad); + +int decode_rs_8(unsigned char *data,int *eras_pos,int no_eras, + int pad); + + +void encode_rs_ccsds(unsigned char *data,unsigned char *parity, + int pad); + +int decode_rs_ccsds(unsigned char *data,int *eras_pos,int no_eras, + int pad); + +unsigned char Taltab[256]; +unsigned char Tal1tab[256]; + +.fi + +.SH DESCRIPTION +These functions implement Reed-Solomon error control encoding and +decoding. For optimal performance in a variety of applications, three +sets of functions are supplied. To access these functions, add "-lfec" +to your linker command line. + +The functions with names ending in \fB_int\fR handle data in integer arrays, +permitting arbitrarily large codewords limited only by machine +resources. + +The functions with names ending in \fB_char\fR take unsigned char arrays and can +handle codes with symbols of 8 bits or less (i.e., with codewords of +255 symbols or less). + +\fBencode_rs_8\fR and \fBdecode_rs_8\fR implement a specific +(255,223) code with 8-bit symbols specified by the CCSDS: +a field generator of 1 + X + X^2 + X^7 + X^8 and a code +generator with first consecutive root = 112 and a primitive element of +11. These functions use the conventional +polynomial form, \fInot\fR the dual-basis specified in +the CCSDS standard, to represent symbols. This code may be +shortened by giving a non-zero \fBpad\fR value to produce a +(255-\fBpad\fR,223-\fBpad\fR) code. The padding will consist of the +specified number of zeroes at the front of the full codeword. + +For full CCSDS compatibility, \fBencode_rs_ccsds\fR and +\fBdecode_rs_ccsds\fR are provided. These functions use two lookup +tables, \fBTaltab\fR to convert from conventional to dual-basis, and +\fBTal1tab\fR to perform the inverse mapping from dual-basis to +conventional form, before and after calls to \fBencode_rs_8\fR +and \fBdecode_rs_8\fR. + +The \fB_8\fR and \fB_ccsds\fR functions do not require initialization. + +To use the general purpose RS encoder or decoder (i.e., +the \fB_char\fR or \fB_int\fR versions), the user must first +call \fBinit_rs_int\fR or \fBinit_rs_char\fR as appropriate. The +arguments are as follows: + +\fBsymsize\fR gives the symbol size in bits, up to 8 for \fBinit_rs_char\fR +or 32 for \fBinit_rs_int\fR on a machine with 32-bit ints (though such a +huge code would exhaust memory limits on a 32-bit machine). The resulting +Reed-Solomon code word will have 2^\fBsymsize\fR - 1 symbols, +each containing \fBsymsize\fR bits. The codeword may be shortened with the +\fBpad\fR parameter described below. + +\fBgfpoly\fR gives the extended Galois field generator polynomial coefficients, +with the 0th coefficient in the low order bit. The polynomial +\fImust\fR be primitive; if not, the call will fail and NULL will be +returned. + +\fBfcr\fR gives, in index form, the first consecutive root of the +Reed Solomon code generator polynomial. + +\fBprim\fR gives, in index form, the primitive element in the Galois field +used to generate the Reed Solomon code generator polynomial. + +\fBnroots\fR gives the number of roots in the Reed Solomon code +generator polynomial. This equals the number of parity symbols +per code block. + +\fBpad\fR gives the number of leading symbols in the codeword +that are implicitly padded to zero in a shortened code block. + +The resulting Reed-Solomon code has parameters (N,K), where +N = 2^\fBsymsize\fR - \fBpad\fR - 1 and K = N-\fBnroots\fR. + +The \fBencode_rs_char\fR and \fBencode_rs_int\fR functions accept +the pointer returned by \fBinit_rs_char\fR or +\fBinit_rs_int\fR, respectively, to +encode a block of data using the specified code. +The input data array is expected to +contain K symbols (of \fBsymsize\fR bits each, right justified +in each char or int) and \fBnroots\fR parity symbols will be placed +into the \fBparity\fR array, right justified. + +The \fBdecode_\fR functions correct +the errors in a Reed-Solomon codeword of N symbols up to the capability of the code. +An optional list of "erased" symbol indices may be given in the \fBeras_pos\fR +array to assist the decoder; this parameter may be NULL if no erasures +are given. The number of erased symbols must be given in the \fBno_eras\fR +parameter. + +To maximize performance, the encode and decode functions perform no +"sanity checking" of their inputs. Decoder failure may result if +\fBeras_pos\fR contains duplicate entries, and both encoder and +decoder will fail if an input symbol exceeds its allowable range. +(Symbol range overflow cannot occur with the \fB_8\fR or +\fB_ccsds\fR functions, +or with the \fB_char\fR functions when 8-bit symbols are specified.) + +The decoder corrects the symbols "in place", returning the number +of symbols in error. If the codeword is uncorrectable, -1 is returned +and the data block is unchanged. If \fBeras_pos\fR is non-null, it is +used to return a list of corrected symbol positions, in no particular +order. This means that the +array passed through this parameter \fImust\fR have at least \fBnroots\fR +elements to prevent a possible buffer overflow. + +The \fBfree_rs_int\fR and \fBfree_rs_char\fR functions free the internal +space allocated by the \fBinit_rs_int\fR and \fBinit_rs_char\fR functions, +respecitively. + +The functions \fBencode_rs_8\fR and \fBdecode_rs_8\fR do not have +corresponding \fBinit\fR and \fBfree\fR, nor do they take the +\fBrs\fR argument accepted by the other functions as their parameters +are statically compiled. These functions implement a code +equivalent to calling + +\fBinit_rs_char\fR(8,0x187,112,11,32,pad); + +and using the resulting pointer with \fBencode_rs_char\fR and +\fBdecode_rs_char\fR. + +.SH RETURN VALUES +\fBinit_rs_int\fR and \fBinit_rs_char\fR return a pointer to an internal +control structure that must be passed to the corresponding encode, decode +and free functions. These functions return NULL on error. + +The \fBdecode_\fR functions return a count of corrected +symbols, or -1 if the block was uncorrectible. + +.SH AUTHOR +Phil Karn, KA9Q (karn@ka9q.net), based heavily on earlier work by Robert +Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari Thirumoorthy +(harit@spectra.eng.hawaii.edu). Extra improvements suggested by Detmar +Welz (dwelz@web.de). + +.SH COPYRIGHT +Copyright 2004, Phil Karn, KA9Q. May be used under the terms of the +GNU Lesser General Public License (LGPL). + +.SH SEE ALSO +CCSDS 101.0-B-6: Telemetry Channel Coding. +http://www.ccsds.org/documents/101x0b6.pdf + +.SH NOTE +CCSDS chose the "dual basis" symbol representation because it +simplified the implementation of a Reed-Solomon encoder in dedicated +hardware. However, this approach holds no advantages for a software +implementation on a general purpose computer, so use of the dual basis +is recommended only if compatibility with the CCSDS standard is needed, +e.g., to decode data from an existing spacecraft using the CCSDS +standard. If you just want a fast (255,223) RS codec without needing +to interoperate with a CCSDS standard code, use \fBencode_rs_8\fR +and \fBdecode_rs_8\fR. + diff --git a/rs_speedtest.c b/rs_speedtest.c new file mode 100644 index 0000000..225f160 --- /dev/null +++ b/rs_speedtest.c @@ -0,0 +1,54 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <sys/time.h> +#include <sys/resource.h> +#include "fec.h" + +int main(){ + unsigned char block[255]; + int i; + void *rs; + struct rusage start,finish; + double extime; + int trials = 10000; + + for(i=0;i<223;i++) + block[i] = 0x01; + + rs = init_rs_char(8,0x187,112,11,32,0); + encode_rs_char(rs,block,&block[223]); + + getrusage(RUSAGE_SELF,&start); + for(i=0;i<trials;i++){ +#if 0 + block[0] ^= 0xff; /* Introduce an error */ + block[2] ^= 0xff; /* Introduce an error */ +#endif + decode_rs_char(rs,block,NULL,0); + } + getrusage(RUSAGE_SELF,&finish); + extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec); + + printf("Execution time for %d Reed-Solomon blocks using general decoder: %.2f sec\n",trials,extime); + printf("decoder speed: %g bits/s\n",trials*223*8/extime); + + + encode_rs_8(block,&block[223],0); + getrusage(RUSAGE_SELF,&start); + for(i=0;i<trials;i++){ +#if 0 + block[0] ^= 0xff; /* Introduce an error */ + block[2] ^= 0xff; /* Introduce an error */ +#endif + decode_rs_8(block,NULL,0,0); + } + getrusage(RUSAGE_SELF,&finish); + extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec); + printf("Execution time for %d Reed-Solomon blocks using CCSDS decoder: %.2f sec\n",trials,extime); + printf("decoder speed: %g bits/s\n",trials*223*8/extime); + + exit(0); +} + diff --git a/rstest.c b/rstest.c new file mode 100644 index 0000000..539b40a --- /dev/null +++ b/rstest.c @@ -0,0 +1,296 @@ +/* Test the Reed-Solomon codecs + * for various block sizes and with random data and random error patterns + * + * Copyright 2002 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <time.h> +#include "fec.h" + + +struct etab { + int symsize; + int genpoly; + int fcs; + int prim; + int nroots; + int ntrials; +} Tab[] = { + {2, 0x7, 1, 1, 1, 10 }, + {3, 0xb, 1, 1, 2, 10 }, + {4, 0x13, 1, 1, 4, 10 }, + {5, 0x25, 1, 1, 6, 10 }, + {6, 0x43, 1, 1, 8, 10 }, + {7, 0x89, 1, 1, 10, 10 }, + {8, 0x11d, 1, 1, 32, 10 }, + {8, 0x187, 112,11, 32, 10 }, /* Duplicates CCSDS codec */ + {9, 0x211, 1, 1, 32, 10 }, + {10,0x409, 1, 1, 32, 10 }, + {11,0x805, 1, 1, 32, 10 }, + {12,0x1053, 1, 1, 32, 5 }, + {13,0x201b, 1, 1, 32, 2 }, + {14,0x4443, 1, 1, 32, 1 }, + {15,0x8003, 1, 1, 32, 1 }, + {16,0x1100b, 1, 1, 32, 1 }, + {0, 0, 0, 0, 0}, +}; + +int exercise_char(struct etab *e); +int exercise_int(struct etab *e); +int exercise_8(void); + +int main(){ + int i; + + srandom(time(NULL)); + + printf("Testing fixed CCSDS encoder...\n"); + exercise_8(); + for(i=0;Tab[i].symsize != 0;i++){ + int nn,kk; + + nn = (1<<Tab[i].symsize) - 1; + kk = nn - Tab[i].nroots; + printf("Testing (%d,%d) code...\n",nn,kk); + if(Tab[i].symsize <= 8) + exercise_char(&Tab[i]); + else + exercise_int(&Tab[i]); + } + exit(0); +} + +int exercise_8(void){ + int nn = 255; + unsigned char block[nn],tblock[nn]; + int errlocs[nn],derrlocs[nn]; + int i; + int errors; + int derrors,kk; + int errval,errloc; + int erasures; + int decoder_errors = 0; + + /* Compute code parameters */ + kk = 223; + + + /* Test up to the error correction capacity of the code */ + for(errors=0;errors<=(nn-kk)/2;errors++){ + + /* Load block with random data and encode */ + for(i=0;i<kk;i++) + block[i] = random() & nn; + memcpy(tblock,block,sizeof(block)); + encode_rs_8(block,&block[kk],0); + + /* Make temp copy, seed with errors */ + memcpy(tblock,block,sizeof(block)); + memset(errlocs,0,sizeof(errlocs)); + memset(derrlocs,0,sizeof(derrlocs)); + erasures=0; + for(i=0;i<errors;i++){ + do { + errval = random() & nn; + } while(errval == 0); /* Error value must be nonzero */ + + do { + errloc = random() % nn; + } while(errlocs[errloc] != 0); /* Must not choose the same location twice */ + + errlocs[errloc] = 1; + +#if FLAG_ERASURE + if(random() & 1) /* 50-50 chance */ + derrlocs[erasures++] = errloc; +#endif + tblock[errloc] ^= errval; + } + + /* Decode the errored block */ + derrors = decode_rs_8(tblock,derrlocs,erasures,0); + + if(derrors != errors){ + printf("(%d,%d) decoder says %d errors, true number is %d\n",nn,kk,derrors,errors); + decoder_errors++; + } + for(i=0;i<derrors;i++){ + if(errlocs[derrlocs[i]] == 0){ + printf("(%d,%d) decoder indicates error in location %d without error\n",nn,kk,derrlocs[i]); + decoder_errors++; + } + } + if(memcmp(tblock,block,sizeof(tblock)) != 0){ + printf("(%d,%d) decoder uncorrected errors! output ^ input:",nn,kk); + decoder_errors++; + for(i=0;i<nn;i++) + printf(" %02x",tblock[i] ^ block[i]); + printf("\n"); + } + } + return decoder_errors; +} + + +int exercise_char(struct etab *e){ + int nn = (1<<e->symsize) - 1; + unsigned char block[nn],tblock[nn]; + int errlocs[nn],derrlocs[nn]; + int i; + int errors; + int derrors,kk; + int errval,errloc; + int erasures; + int decoder_errors = 0; + void *rs; + + if(e->symsize > 8) + return -1; + + /* Compute code parameters */ + kk = nn - e->nroots; + + rs = init_rs_char(e->symsize,e->genpoly,e->fcs,e->prim,e->nroots,0); + if(rs == NULL){ + printf("init_rs_char failed!\n"); + return -1; + } + /* Test up to the error correction capacity of the code */ + for(errors=0;errors <= e->nroots/2;errors++){ + + /* Load block with random data and encode */ + for(i=0;i<kk;i++) + block[i] = random() & nn; + memcpy(tblock,block,sizeof(block)); + encode_rs_char(rs,block,&block[kk]); + + /* Make temp copy, seed with errors */ + memcpy(tblock,block,sizeof(block)); + memset(errlocs,0,sizeof(errlocs)); + memset(derrlocs,0,sizeof(derrlocs)); + erasures=0; + for(i=0;i<errors;i++){ + do { + errval = random() & nn; + } while(errval == 0); /* Error value must be nonzero */ + + do { + errloc = random() % nn; + } while(errlocs[errloc] != 0); /* Must not choose the same location twice */ + + errlocs[errloc] = 1; + +#if FLAG_ERASURE + if(random() & 1) /* 50-50 chance */ + derrlocs[erasures++] = errloc; +#endif + tblock[errloc] ^= errval; + } + + /* Decode the errored block */ + derrors = decode_rs_char(rs,tblock,derrlocs,erasures); + + if(derrors != errors){ + printf("(%d,%d) decoder says %d errors, true number is %d\n",nn,kk,derrors,errors); + decoder_errors++; + } + for(i=0;i<derrors;i++){ + if(errlocs[derrlocs[i]] == 0){ + printf("(%d,%d) decoder indicates error in location %d without error\n",nn,kk,derrlocs[i]); + decoder_errors++; + } + } + if(memcmp(tblock,block,sizeof(tblock)) != 0){ + printf("(%d,%d) decoder uncorrected errors! output ^ input:",nn,kk); + decoder_errors++; + for(i=0;i<nn;i++) + printf(" %02x",tblock[i] ^ block[i]); + printf("\n"); + } + } + + free_rs_char(rs); + return 0; +} + +int exercise_int(struct etab *e){ + int nn = (1<<e->symsize) - 1; + int block[nn],tblock[nn]; + int errlocs[nn],derrlocs[nn]; + int i; + int errors; + int derrors,kk; + int errval,errloc; + int erasures; + int decoder_errors = 0; + void *rs; + + /* Compute code parameters */ + kk = nn - e->nroots; + + rs = init_rs_int(e->symsize,e->genpoly,e->fcs,e->prim,e->nroots,0); + if(rs == NULL){ + printf("init_rs_int failed!\n"); + return -1; + } + /* Test up to the error correction capacity of the code */ + for(errors=0;errors <= e->nroots/2;errors++){ + + /* Load block with random data and encode */ + for(i=0;i<kk;i++) + block[i] = random() & nn; + memcpy(tblock,block,sizeof(block)); + encode_rs_int(rs,block,&block[kk]); + + /* Make temp copy, seed with errors */ + memcpy(tblock,block,sizeof(block)); + memset(errlocs,0,sizeof(errlocs)); + memset(derrlocs,0,sizeof(derrlocs)); + erasures=0; + for(i=0;i<errors;i++){ + do { + errval = random() & nn; + } while(errval == 0); /* Error value must be nonzero */ + + do { + errloc = random() % nn; + } while(errlocs[errloc] != 0); /* Must not choose the same location twice */ + + errlocs[errloc] = 1; + +#if FLAG_ERASURE + if(random() & 1) /* 50-50 chance */ + derrlocs[erasures++] = errloc; +#endif + tblock[errloc] ^= errval; + } + + /* Decode the errored block */ + derrors = decode_rs_int(rs,tblock,derrlocs,erasures); + + if(derrors != errors){ + printf("(%d,%d) decoder says %d errors, true number is %d\n",nn,kk,derrors,errors); + decoder_errors++; + } + for(i=0;i<derrors;i++){ + if(errlocs[derrlocs[i]] == 0){ + printf("(%d,%d) decoder indicates error in location %d without error\n",nn,kk,derrlocs[i]); + decoder_errors++; + } + } + if(memcmp(tblock,block,sizeof(tblock)) != 0){ + printf("(%d,%d) decoder uncorrected errors! output ^ input:",nn,kk); + decoder_errors++; + for(i=0;i<nn;i++) + printf(" %02x",tblock[i] ^ block[i]); + printf("\n"); + } + } + + free_rs_int(rs); + return 0; +} @@ -0,0 +1,43 @@ +#include <math.h> +#include <stdlib.h> +#include "fec.h" + +#define MAX_RANDOM 0x7fffffff + +/* Generate gaussian random double with specified mean and std_dev */ +double normal_rand(double mean, double std_dev) +{ + double fac,rsq,v1,v2; + static double gset; + static int iset; + + if(iset){ + /* Already got one */ + iset = 0; + return mean + std_dev*gset; + } + /* Generate two evenly distributed numbers between -1 and +1 + * that are inside the unit circle + */ + do { + v1 = 2.0 * (double)random() / MAX_RANDOM - 1; + v2 = 2.0 * (double)random() / MAX_RANDOM - 1; + rsq = v1*v1 + v2*v2; + } while(rsq >= 1.0 || rsq == 0.0); + fac = sqrt(-2.0*log(rsq)/rsq); + gset = v1*fac; + iset++; + return mean + std_dev*v2*fac; +} + +unsigned char addnoise(int sym,double amp,double gain,double offset,int clip){ + int sample; + + sample = offset + gain*normal_rand(sym?amp:-amp,1.0); + /* Clip to 8-bit offset range */ + if(sample < 0) + sample = 0; + else if(sample > clip) + sample = clip; + return sample; +} diff --git a/simd-viterbi.3 b/simd-viterbi.3 new file mode 100644 index 0000000..4c67593 --- /dev/null +++ b/simd-viterbi.3 @@ -0,0 +1,247 @@ +.TH SIMD-VITERBI 3 +.SH NAME +create_viterbi27, set_viterbi27_polynomial, init_viterbi27, update_viterbi27_blk, +chainback_viterbi27, delete_viterbi27, +create_viterbi29, set_viterbi_29_polynomial, init_viterbi29, update_viterbi29_blk, +chainback_viterbi29, delete_viterbi29, +create_viterbi39, set_viterbi_39_polynomial, init_viterbi39, update_viterbi39_blk, +chainback_viterbi39, delete_viterbi39, +create_viterbi615, set_viterbi615_polynomial, init_viterbi615, update_viterbi615_blk, +chainback_viterbi615, delete_viterbi615 -\ IA32 SIMD-assisted Viterbi decoders +.SH SYNOPSIS +.nf +.ft B +#include "fec.h" +void *create_viterbi27(int blocklen); +void set_viterbi27_polynomial(int polys[2]); +int init_viterbi27(void *vp,int starting_state); +int update_viterbi27_blk(void *vp,unsigned char syms[],int nbits); +int chainback_viterbi27(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi27(void *vp); +.fi +.sp +.nf +.ft B +void *create_viterbi29(int blocklen); +void set_viterbi29_polynomial(int polys[2]); +int init_viterbi29(void *vp,int starting_state); +int update_viterbi29_blk(void *vp,unsigned char syms[],int nbits); +int chainback_viterbi29(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi29(void *vp); +.fi +.sp +.nf +.ft B +void *create_viterbi39(int blocklen); +void set_viterbi39_polynomial(int polys[3]); +int init_viterbi39(void *vp,int starting_state); +int update_viterbi39_blk(void *vp,unsigned char syms[],int nbits); +int chainback_viterbi39(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi39(void *vp); +.fi +.sp +.nf +.ft B +void *create_viterbi615(int blocklen); +void set_viterbi615_polynomial(int polys[6]); +int init_viterbi615(void *vp,int starting_state); +int update_viterbi615_blk(void *vp,unsigned char syms[],int nbits); +int chainback_viterbi615(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate); +void delete_viterbi615(void *vp); +.fi +.SH DESCRIPTION +These functions implement high performance Viterbi decoders for four +convolutional codes: a rate 1/2 constraint length 7 (k=7) code +("viterbi27"), a rate 1/2 k=9 code ("viterbi29"), +a rate 1/3 k=9 code ("viterbi39") and a rate 1/6 k=15 code ("viterbi615"). +The decoders use the Intel IA32 or PowerPC SIMD instruction sets, if available, to improve +decoding speed. + +On the IA32 there are three different SIMD instruction sets. The first +and most common is MMX, introduced on later Intel Pentiums and then on +the Intel Pentium II and most Intel clones (AMD K6, Transmeta Crusoe, +etc). SSE was introduced on the Pentium III and later implemented in +the AMD Athlon 4 (AMD calls it "3D Now! Professional"). Most +recently, SSE2 was introduced in the Intel Pentium 4, and has been +adopted by more recent AMD CPUs. The presence of SSE2 implies the +existence of SSE, which in turn implies MMX. + +Altivec is the PowerPC SIMD instruction set. It is roughly comparable +to SSE2. Altivec was introduced to the general public in the Apple +Macintosh G4; it is also present in the G5. Altivec is actually a +Motorola trademark; Apple calls it "Velocity Engine" and IBM calls it +"VMX". All refer to the same thing. + +When built for the IA32 or PPC architectures, the functions +automatically use the most powerful SIMD instruction set available. If +no SIMD instructions are available, or if the library is built for a +non-IA32, non-PPC machine, a portable C version is executed +instead. + +.SH USAGE +Four versions of each function are provided, one for each code. +In the following discussion, change "viterbi" to "viterbi27", "viterbi29", "viterbi39" +or "viterbi615" as desired. + +Before Viterbi decoding can begin, an instance must first be created with +\fBcreate_viterbi()\fR. This function creates and returns a pointer to +an internal control structure +containing the path metrics and the branch +decisions. \fBcreate_viterbi()\fR takes one argument that gives the +length of the data block in bits. You \fImust not\fR attempt to +decode a block longer than the length given to \fBcreate_viterbi()\fR. + +Before decoding a new frame, +\fBinit_viterbi()\fR must be called to reset the decoder state. +It accepts the instance pointer returned by +\fBcreate_viterbi()\fR and the initial starting state of the +convolutional encoder (usually 0). If the initial starting state is unknown or +incorrect, the decoder will still function but the decoded data may be +incorrect at the start of the block. + +Blocks of received symbols are processed with calls to +\fBupdate_viterbi_blk()\fR. The \fBnbits\fR parameter specifies the +number of \fIdata bits\fR (not channel symbols) represented by the +\fBsyms\fR buffer. (For rate 1/2 codes, the number of symbols in +\fBsyms\fR is twice \fInbits\fR, and so on.) +Each symbol is expected to range +from 0 through 255, with 0 corresponding to a "strong 0" and 255 +corresponding to a "strong 1". The caller is responsible for +determining the proper pairing of input symbols (commonly known as +decoder symbol phasing). + +At the end of the block, the data is recovered with a call to +\fBchainback_viterbi()\fR. The arguments are the pointer to the +decoder instance, a pointer to a user-supplied buffer into which the +decoded data is to be written, the number of data bits (not bytes) +that are to be decoded, and the terminal state of the convolutional +encoder at the end of the frame (usually 0). If the terminal state is +incorrect or unknown, the decoded data bits at the end of the frame +may be unreliable. The decoded data is written in big-endian order, +i.e., the first bit in the frame is written into the high order bit of +the first byte in the buffer. If the frame is not an integral number +of bytes long, the low order bits of the last byte in the frame will +be unused. + +Note that the decoders assume the use of a tail, i.e., the encoding +and transmission of a sufficient number of padding bits beyond the end +of the user data to force the convolutional encoder into the known +terminal state given to \fBchainback_viterbi()\fR. The tail is +always one bit less than the constraint length of the code, so the k=7 +code uses 6 tail bits (12 tail symbols), the k=9 code uses 8 tail bits +(16 tail symbols) and the k=15 code uses 14 tail bits (84 tail +symbols). + +The tail bits are not included in the length arguments to +\fBcreate_viterbi()\fR and \fBchainback_viterbi()\fR. For example, if +the block contains 1000 user bits, then this would be the length +parameter given to \fBcreate_viterbi27()\fR and +\fBchainback_viterbi27()\fR, and \fBupdate_viterbi27_blk()\fR would be called +with a total of 2012 symbols - the last 12 encoded symbols +representing the tail bits. + +After the call to \fBchainback_viterbi()\fR, the decoder may be reset +with a call to \fBinit_viterbi()\fR and another block can be decoded. +Alternatively, \fBdelete_viterbi()\fR can be called to free all resources +used by the Viterbi decoder. + +The \fBset_viterbi_polynomial()\fR function allows use of other than the default +code generator polynomials. Although only one set of polynomials are generally +used with each code, there can are different conventions as to their order and +symbol polarity, and these functions simplifies their use. + +The default polynomials for the viterbi27 routes +are those of the NASA-JPL convention \fIwithout\fR symbol inversion. +The NASA-JPL convention normally inverts the first symbol. +The CCSDS/NASA-GSFC convention swaps the two symbols and inverts the second. +.sp +To set the NASA-JPL convention with symbol inversion: +.sp +.nf +.ft B +int polys[2] = { -V27POLYA,V27POLYB }; +set_viterbi27_polynomial(polys); +.ft R +.fi +.sp +and to set the CCSDS convention with symbol inversion: +.sp +.nf +.ft B +int polys[2] = { V27POLYB,-V27POLYA }; +set_viterbi27_polynomial(polys); +.ft R +.fi +.sp +The default polynomials for the viterbi615 routines +are those used by the Cassini spacecraft \fIwithout\fR +symbol inversion. Mars Pathfinder (MPF) and STEREO +swap the third and fourth polynomials. +Both conventions invert the +first, third and fifth symbols. Refer to fec.h for the polynomial constant definitions. +.sp +To set the Cassini convention with symbol inversion, do the following: + +.nf +.ft B +int polys[6] = { -V615POLYA,V615POLYB,-V615POLYC,V615POLYD,-V615POLYE,V615POLYF }; +set_viterbi615_polynomial(polys); +.ft R +.fi +.sp +and to set the MPF/STEREO convention with symbol inversion: +.sp +.nf +.ft B +int polys[6] = { -V615POLYA,V615POLYB,-V615POLYD,V615POLYC,-V615POLYE,V615POLYF }; +set_viterbi615_polynomial(polys); +.ft R +.fi + +For performance reasons, calling this function changes the code +generator polynomials for \fIall\fR instances of corresponding Viterbi decoder, +including those already created. + +.SH ERROR PERFORMANCE +These decoders have all been extensively tested and found to provide +performance consistent with that expected for soft-decision Viterbi +decoding with 8-bit symbols. + +Due to internal differences, the implementations +vary slightly in error performance. In +general, the portable C versions exhibit the best error performance +because they use full-sized branch metrics, and the MMX versions +exhibit the worst because they use 8-bit branch metrics with modulo +comparisons. The SSE, SSE2 and Altivec implementations of the r=1/2 k=7 and +r=1/2 k=9 codes use unsigned +8-bit branch metrics, and are almost as good as the C versions. The +r=1/3 k=9 and r=1/6 k=15 codes are implemented with 16-bit path metrics in all SIMD +versions. + +.SH DIRECT ACCESS TO SPECIFIC FUNCTION VERSIONS +Calling the functions listed above automatically calls the appropriate +version of the function depending on the CPU type and available SIMD +instructions. A particular version can also be called directly by +appending the appropriate suffix to the function name. The available +suffixes are "_mmx", "_sse", "_sse2", "_av" and "_port", for the MMX, +SSE, SSE2, Altivec and portable versions, respectively. For example, +the SSE2 version of the update_viterbi27_blk() function can be invoked +as update_viterbi27_blk_sse2(). + +Naturally, the _av functions are only available on the PowerPC and the +_mmx, _sse and _sse2 versions are only available on IA-32. Calling +a SIMD-enabled function on a CPU that doesn't support the appropriate +set of instructions will result in an illegal instruction exception. + +.SH RETURN VALUES +\fBcreate_viterbi\fR returns a pointer to the structure containing +the decoder state. +The other functions return -1 on error, 0 otherwise. + +.SH AUTHOR & COPYRIGHT +Phil Karn, KA9Q (karn@ka9q.net) + +.SH LICENSE +This software may be used under the terms of the GNU Limited General Public License (LGPL). + + diff --git a/sqtest.c b/sqtest.c new file mode 100644 index 0000000..b2abb09 --- /dev/null +++ b/sqtest.c @@ -0,0 +1,42 @@ +/* Verify correctness of the sum-of-square routines */ +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +/* These values should trigger leading/trailing array fragment handling */ +#define NSAMP 200002 +#define OFFSET 1 + +long long sumsq_wq(signed short *in,int cnt); +long long sumsq_wq_ref(signed short *in,int cnt); + +int main(){ + int i; + long long result,rresult; + signed short samples[NSAMP]; + + srandom(time(NULL)); + + for(i=0;i<NSAMP;i++) + samples[i] = random() & 0xffff; + + rresult = sumsq_wq(&samples[OFFSET],NSAMP-OFFSET); + result = sumsq_wq(&samples[OFFSET],NSAMP-OFFSET); + if(result == rresult){ + printf("OK\n"); + } else { + printf("sum mismatch: %lld != %lld\n",result,rresult); + } + exit(0); +} + +long long sumsq_wq_ref(signed short *in,int cnt){ + long long sum = 0; + int i; + + for(i=0;i<cnt;i++){ + sum += (long)in[i] * in[i]; + } + return sum; +} + diff --git a/sse2bfly27.s b/sse2bfly27.s new file mode 100644 index 0000000..27422a2 --- /dev/null +++ b/sse2bfly27.s @@ -0,0 +1,202 @@ +/* Intel SIMD (SSE2) implementations of Viterbi ACS butterflies + for 64-state (k=7) convolutional code + Copyright 2003 Phil Karn, KA9Q + This code may be used under the terms of the GNU Lesser General Public License (LGPL) + + void update_viterbi27_blk_sse2(struct v27 *vp,unsigned char syms[],int nbits) ; +*/ + # SSE2 (128-bit integer SIMD) version + # Requires Pentium 4 or better + + # These are offsets into struct v27, defined in viterbi27.h + .set DP,128 + .set OLDMETRICS,132 + .set NEWMETRICS,136 + .text + .global update_viterbi27_blk_sse2,Branchtab27_sse2 + .type update_viterbi27_blk_sse2,@function + .align 16 + +update_viterbi27_blk_sse2: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %edx + pushl %ebx + + movl 8(%ebp),%edx # edx = vp + testl %edx,%edx + jnz 0f + movl -1,%eax + jmp err +0: movl OLDMETRICS(%edx),%esi # esi -> old metrics + movl NEWMETRICS(%edx),%edi # edi -> new metrics + movl DP(%edx),%edx # edx -> decisions + +1: movl 16(%ebp),%eax # eax = nbits + decl %eax + jl 2f # passed zero, we're done + movl %eax,16(%ebp) + + xorl %eax,%eax + movl 12(%ebp),%ebx # ebx = syms + movb (%ebx),%al + movd %eax,%xmm6 # xmm6[0] = first symbol + movb 1(%ebx),%al + movd %eax,%xmm5 # xmm5[0] = second symbol + addl $2,%ebx + movl %ebx,12(%ebp) + + punpcklbw %xmm6,%xmm6 # xmm6[1] = xmm6[0] + punpcklbw %xmm5,%xmm5 + pshuflw $0,%xmm6,%xmm6 # copy low word to low 3 + pshuflw $0,%xmm5,%xmm5 + punpcklqdq %xmm6,%xmm6 # propagate to all 16 + punpcklqdq %xmm5,%xmm5 + # xmm6 now contains first symbol in each byte, xmm5 the second + + movdqa thirtyones,%xmm7 + + # each invocation of this macro does 16 butterflies in parallel + .MACRO butterfly GROUP + # compute branch metrics + movdqa Branchtab27_sse2+(16*\GROUP),%xmm4 + movdqa Branchtab27_sse2+32+(16*\GROUP),%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + + # compute 5-bit branch metric in xmm4 by adding the individual symbol metrics + # This is okay for this + # code because the worst-case metric spread (at high Eb/No) is only 120, + # well within the range of our unsigned 8-bit path metrics, and even within + # the range of signed 8-bit path metrics + pavgb %xmm3,%xmm4 + psrlw $3,%xmm4 + + pand %xmm7,%xmm4 + + movdqa (16*\GROUP)(%esi),%xmm0 # Incoming path metric, high bit = 0 + movdqa ((16*\GROUP)+32)(%esi),%xmm3 # Incoming path metric, high bit = 1 + movdqa %xmm0,%xmm2 + movdqa %xmm3,%xmm1 + paddusb %xmm4,%xmm0 # note use of saturating arithmetic + paddusb %xmm4,%xmm3 # this shouldn't be necessary, but why not? + + # negate branch metrics + pxor %xmm7,%xmm4 + paddusb %xmm4,%xmm1 + paddusb %xmm4,%xmm2 + + # Find survivors, leave in mm0,2 + pminub %xmm1,%xmm0 + pminub %xmm3,%xmm2 + # get decisions, leave in mm1,3 + pcmpeqb %xmm0,%xmm1 + pcmpeqb %xmm2,%xmm3 + + # interleave and store new branch metrics in mm0,2 + movdqa %xmm0,%xmm4 + punpckhbw %xmm2,%xmm0 # interleave second 16 new metrics + punpcklbw %xmm2,%xmm4 # interleave first 16 new metrics + movdqa %xmm0,(32*\GROUP+16)(%edi) + movdqa %xmm4,(32*\GROUP)(%edi) + + # interleave decisions & store + movdqa %xmm1,%xmm4 + punpckhbw %xmm3,%xmm1 + punpcklbw %xmm3,%xmm4 + # work around bug in gas due to Intel doc error + .byte 0x66,0x0f,0xd7,0xd9 # pmovmskb %xmm1,%ebx + shll $16,%ebx + .byte 0x66,0x0f,0xd7,0xc4 # pmovmskb %xmm4,%eax + orl %eax,%ebx + movl %ebx,(4*\GROUP)(%edx) + .endm + + # invoke macro 2 times for a total of 32 butterflies + butterfly GROUP=0 + butterfly GROUP=1 + + addl $8,%edx # bump decision pointer + + # See if we have to normalize. This requires an explanation. We don't want + # our path metrics to exceed 255 on the *next* iteration. Since the + # largest branch metric is 30, that means we don't want any to exceed 225 + # on *this* iteration. Rather than look them all, we just pick an arbitrary one + # (the first) and see if it exceeds 225-120=105, where 120 is the experimentally- + # determined worst-case metric spread for this code and branch metrics in the range 0-30. + + # This is extremely conservative, and empirical testing at a variety of Eb/Nos might + # show that a higher threshold could be used without affecting BER performance + movl (%edi),%eax # extract first output metric + andl $255,%eax + cmp $105,%eax + jle done # No, no need to normalize + + # Normalize by finding smallest metric and subtracting it + # from all metrics. We can't just pick an arbitrary small constant because + # the minimum metric might be zero! + movdqa (%edi),%xmm0 + movdqa %xmm0,%xmm4 + movdqa 16(%edi),%xmm1 + pminub %xmm1,%xmm4 + movdqa 32(%edi),%xmm2 + pminub %xmm2,%xmm4 + movdqa 48(%edi),%xmm3 + pminub %xmm3,%xmm4 + + # crunch down to single lowest metric + movdqa %xmm4,%xmm5 + psrldq $8,%xmm5 # the count to psrldq is bytes, not bits! + pminub %xmm5,%xmm4 + movdqa %xmm4,%xmm5 + psrlq $32,%xmm5 + pminub %xmm5,%xmm4 + movdqa %xmm4,%xmm5 + psrlq $16,%xmm5 + pminub %xmm5,%xmm4 + movdqa %xmm4,%xmm5 + psrlq $8,%xmm5 + pminub %xmm5,%xmm4 # now in lowest byte of %xmm4 + + punpcklbw %xmm4,%xmm4 # lowest 2 bytes + pshuflw $0,%xmm4,%xmm4 # lowest 8 bytes + punpcklqdq %xmm4,%xmm4 # all 16 bytes + + # xmm4 now contains lowest metric in all 16 bytes + # subtract it from every output metric + psubusb %xmm4,%xmm0 + psubusb %xmm4,%xmm1 + psubusb %xmm4,%xmm2 + psubusb %xmm4,%xmm3 + movdqa %xmm0,(%edi) + movdqa %xmm1,16(%edi) + movdqa %xmm2,32(%edi) + movdqa %xmm3,48(%edi) + +done: + # swap metrics + movl %esi,%eax + movl %edi,%esi + movl %eax,%edi + jmp 1b + +2: movl 8(%ebp),%ebx # ebx = vp + # stash metric pointers + movl %esi,OLDMETRICS(%ebx) + movl %edi,NEWMETRICS(%ebx) + movl %edx,DP(%ebx) # stash incremented value of vp->dp + xorl %eax,%eax +err: popl %ebx + popl %edx + popl %edi + popl %esi + popl %ebp + ret + + .data + .align 16 + +thirtyones: + .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 diff --git a/sse2bfly29.s b/sse2bfly29.s new file mode 100644 index 0000000..0fa1742 --- /dev/null +++ b/sse2bfly29.s @@ -0,0 +1,245 @@ +/* Intel SIMD SSE2 implementation of Viterbi ACS butterflies + for 256-state (k=9) convolutional code + Copyright 2004 Phil Karn, KA9Q + This code may be used under the terms of the GNU Lesser General Public License (LGPL) + + void update_viterbi29_blk_sse2(struct v29 *vp,unsigned char *syms,int nbits) ; +*/ + + # SSE2 (128-bit integer SIMD) version + # Requires Pentium 4 or better + # These are offsets into struct v29, defined in viterbi29.h + .set DP,512 + .set OLDMETRICS,516 + .set NEWMETRICS,520 + + .text + .global update_viterbi29_blk_sse2,Branchtab29_sse2 + .type update_viterbi29_blk_sse2,@function + .align 16 + +update_viterbi29_blk_sse2: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %edx + pushl %ebx + + movl 8(%ebp),%edx # edx = vp + testl %edx,%edx + jnz 0f + movl -1,%eax + jmp err +0: movl OLDMETRICS(%edx),%esi # esi -> old metrics + movl NEWMETRICS(%edx),%edi # edi -> new metrics + movl DP(%edx),%edx # edx -> decisions + +1: movl 16(%ebp),%eax # eax = nbits + decl %eax + jl 2f # passed zero, we're done + movl %eax,16(%ebp) + + xorl %eax,%eax + movl 12(%ebp),%ebx # ebx = syms + movb (%ebx),%al + movd %eax,%xmm6 # xmm6[0] = first symbol + movb 1(%ebx),%al + movd %eax,%xmm5 # xmm5[0] = second symbol + addl $2,%ebx + movl %ebx,12(%ebp) + + punpcklbw %xmm6,%xmm6 # xmm6[1] = xmm6[0] + punpcklbw %xmm5,%xmm5 + movdqa thirtyones,%xmm7 + pshuflw $0,%xmm6,%xmm6 # copy low word to low 3 + pshuflw $0,%xmm5,%xmm5 + punpcklqdq %xmm6,%xmm6 # propagate to all 16 + punpcklqdq %xmm5,%xmm5 + # xmm6 now contains first symbol in each byte, xmm5 the second + + movdqa thirtyones,%xmm7 + + # each invocation of this macro does 16 butterflies in parallel + .MACRO butterfly GROUP + # compute branch metrics + movdqa Branchtab29_sse2+(16*\GROUP),%xmm4 + movdqa Branchtab29_sse2+128+(16*\GROUP),%xmm3 + pxor %xmm6,%xmm4 + pxor %xmm5,%xmm3 + pavgb %xmm3,%xmm4 + psrlw $3,%xmm4 + + pand %xmm7,%xmm4 # xmm4 contains branch metrics + + movdqa (16*\GROUP)(%esi),%xmm0 # Incoming path metric, high bit = 0 + movdqa ((16*\GROUP)+128)(%esi),%xmm3 # Incoming path metric, high bit = 1 + movdqa %xmm0,%xmm2 + movdqa %xmm3,%xmm1 + paddusb %xmm4,%xmm0 + paddusb %xmm4,%xmm3 + + # invert branch metrics + pxor %xmm7,%xmm4 + + paddusb %xmm4,%xmm1 + paddusb %xmm4,%xmm2 + + # Find survivors, leave in mm0,2 + pminub %xmm1,%xmm0 + pminub %xmm3,%xmm2 + # get decisions, leave in mm1,3 + pcmpeqb %xmm0,%xmm1 + pcmpeqb %xmm2,%xmm3 + + # interleave and store new branch metrics in mm0,2 + movdqa %xmm0,%xmm4 + punpckhbw %xmm2,%xmm0 # interleave second 16 new metrics + punpcklbw %xmm2,%xmm4 # interleave first 16 new metrics + movdqa %xmm0,(32*\GROUP+16)(%edi) + movdqa %xmm4,(32*\GROUP)(%edi) + + # interleave decisions & store + movdqa %xmm1,%xmm4 + punpckhbw %xmm3,%xmm1 + punpcklbw %xmm3,%xmm4 + # work around bug in gas due to Intel doc error + .byte 0x66,0x0f,0xd7,0xd9 # pmovmskb %xmm1,%ebx + shll $16,%ebx + .byte 0x66,0x0f,0xd7,0xc4 # pmovmskb %xmm4,%eax + orl %eax,%ebx + movl %ebx,(4*\GROUP)(%edx) + .endm + + # invoke macro 8 times for a total of 128 butterflies + butterfly GROUP=0 + butterfly GROUP=1 + butterfly GROUP=2 + butterfly GROUP=3 + butterfly GROUP=4 + butterfly GROUP=5 + butterfly GROUP=6 + butterfly GROUP=7 + + addl $32,%edx # bump decision pointer + + # see if we have to normalize + movl (%edi),%eax # extract first output metric + andl $255,%eax + cmp $50,%eax # is it greater than 50? + movl $0,%eax + jle done # No, no need to normalize + + # Normalize by finding smallest metric and subtracting it + # from all metrics + movdqa (%edi),%xmm0 + pminub 16(%edi),%xmm0 + pminub 32(%edi),%xmm0 + pminub 48(%edi),%xmm0 + pminub 64(%edi),%xmm0 + pminub 80(%edi),%xmm0 + pminub 96(%edi),%xmm0 + pminub 112(%edi),%xmm0 + pminub 128(%edi),%xmm0 + pminub 144(%edi),%xmm0 + pminub 160(%edi),%xmm0 + pminub 176(%edi),%xmm0 + pminub 192(%edi),%xmm0 + pminub 208(%edi),%xmm0 + pminub 224(%edi),%xmm0 + pminub 240(%edi),%xmm0 + + # crunch down to single lowest metric + movdqa %xmm0,%xmm1 + psrldq $8,%xmm0 # the count to psrldq is bytes, not bits! + pminub %xmm1,%xmm0 + movdqa %xmm0,%xmm1 + psrlq $32,%xmm0 + pminub %xmm1,%xmm0 + movdqa %xmm0,%xmm1 + psrlq $16,%xmm0 + pminub %xmm1,%xmm0 + movdqa %xmm0,%xmm1 + psrlq $8,%xmm0 + pminub %xmm1,%xmm0 + + punpcklbw %xmm0,%xmm0 # lowest 2 bytes + pshuflw $0,%xmm0,%xmm0 # lowest 8 bytes + punpcklqdq %xmm0,%xmm0 # all 16 bytes + + # xmm0 now contains lowest metric in all 16 bytes + # subtract it from every output metric + movdqa (%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,(%edi) + movdqa 16(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,16(%edi) + movdqa 32(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,32(%edi) + movdqa 48(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,48(%edi) + movdqa 64(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,64(%edi) + movdqa 80(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,80(%edi) + movdqa 96(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,96(%edi) + movdqa 112(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,112(%edi) + movdqa 128(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,128(%edi) + movdqa 144(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,144(%edi) + movdqa 160(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,160(%edi) + movdqa 176(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,176(%edi) + movdqa 192(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,192(%edi) + movdqa 208(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,208(%edi) + movdqa 224(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,224(%edi) + movdqa 240(%edi),%xmm1 + psubusb %xmm0,%xmm1 + movdqa %xmm1,240(%edi) + +done: + # swap metrics + movl %esi,%eax + movl %edi,%esi + movl %eax,%edi + jmp 1b + +2: movl 8(%ebp),%ebx # ebx = vp + # stash metric pointers + movl %esi,OLDMETRICS(%ebx) + movl %edi,NEWMETRICS(%ebx) + movl %edx,DP(%ebx) # stash incremented value of vp->dp + xorl %eax,%eax +err: popl %ebx + popl %edx + popl %edi + popl %esi + popl %ebp + ret + + .data + .align 16 +thirtyones: + .byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31 + diff --git a/ssebfly27.s b/ssebfly27.s new file mode 100644 index 0000000..7f445da --- /dev/null +++ b/ssebfly27.s @@ -0,0 +1,205 @@ +/* Intel SIMD (SSE) implementation of Viterbi ACS butterflies + for 64-state (k=7) convolutional code + Copyright 2001 Phil Karn, KA9Q + This code may be used under the terms of the GNU Lesser General Public License (LGPL) + + int update_viterbi27_blk_sse(struct v27 *vp,unsigned char syms[],int nbits) ; +*/ + + # SSE (64-bit integer SIMD) version + # Requires Pentium III or better + + # These are offsets into struct v27, defined in viterbi27.h + .set DP,128 + .set OLDMETRICS,132 + .set NEWMETRICS,136 +.text +.global update_viterbi27_blk_sse,Branchtab27_sse + .type update_viterbi27_blk_sse,@function + .align 16 + +update_viterbi27_blk_sse: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %edx + pushl %ebx + + movl 8(%ebp),%edx # edx = vp + testl %edx,%edx + jnz 0f + movl -1,%eax + jmp err +0: movl OLDMETRICS(%edx),%esi # esi -> old metrics + movl NEWMETRICS(%edx),%edi # edi -> new metrics + movl DP(%edx),%edx # edx -> decisions + +1: movl 16(%ebp),%eax # eax = nbits + decl %eax + jl 2f # passed zero, we're done + movl %eax,16(%ebp) + + xorl %eax,%eax + movl 12(%ebp),%ebx # %ebx = syms + movb (%ebx),%al + movd %eax,%mm6 # mm6[0] = first symbol + movb 1(%ebx),%al + movd %eax,%mm5 # mm5[0] = second symbol + addl $2,%ebx + movl %ebx,12(%ebp) + + punpcklbw %mm6,%mm6 # mm6[1] = mm6[0] + punpcklbw %mm5,%mm5 + movq thirtyones,%mm7 + + pshufw $0,%mm6,%mm6 # copy low word to upper 3 + pshufw $0,%mm5,%mm5 + # mm6 now contains first symbol in each byte, mm5 the second + + # each invocation of this macro does 8 butterflies in parallel + .MACRO butterfly GROUP + # compute branch metrics + movq Branchtab27_sse+(8*\GROUP),%mm4 + movq Branchtab27_sse+32+(8*\GROUP),%mm3 + pxor %mm6,%mm4 + pxor %mm5,%mm3 + pavgb %mm3,%mm4 # mm4 contains branch metrics + psrlw $3,%mm4 + pand %mm7,%mm4 + + movq (8*\GROUP)(%esi),%mm0 # Incoming path metric, high bit = 0 + movq ((8*\GROUP)+32)(%esi),%mm3 # Incoming path metric, high bit = 1 + movq %mm0,%mm2 + movq %mm3,%mm1 + paddusb %mm4,%mm0 + paddusb %mm4,%mm3 + + # invert branch metrics. This works only because they're 5 bits + pxor %mm7,%mm4 + + paddusb %mm4,%mm1 + paddusb %mm4,%mm2 + + # Find survivors, leave in mm0,2 + pminub %mm1,%mm0 + pminub %mm3,%mm2 + # get decisions, leave in mm1,3 + pcmpeqb %mm0,%mm1 + pcmpeqb %mm2,%mm3 + + # interleave and store new branch metrics in mm0,2 + movq %mm0,%mm4 + punpckhbw %mm2,%mm0 # interleave second 8 new metrics + punpcklbw %mm2,%mm4 # interleave first 8 new metrics + movq %mm0,(16*\GROUP+8)(%edi) + movq %mm4,(16*\GROUP)(%edi) + + # interleave decisions, accumulate into %ebx + movq %mm1,%mm4 + punpckhbw %mm3,%mm1 + punpcklbw %mm3,%mm4 + # Due to an error in the Intel instruction set ref (the register + # fields are swapped), gas assembles pmovmskb incorrectly + # See http://mail.gnu.org/pipermail/bug-gnu-utils/2000-August/002341.html + .byte 0x0f,0xd7,0xc1 # pmovmskb %mm1,%eax + shll $((16*\GROUP+8)&31),%eax + orl %eax,%ebx + .byte 0x0f,0xd7,0xc4 # pmovmskb %mm4,%eax + shll $((16*\GROUP)&31),%eax + orl %eax,%ebx + .endm + + # invoke macro 4 times for a total of 32 butterflies + xorl %ebx,%ebx # clear decisions + butterfly GROUP=0 + butterfly GROUP=1 + movl %ebx,(%edx) # stash first 32 decisions + xorl %ebx,%ebx + butterfly GROUP=2 + butterfly GROUP=3 + movl %ebx,4(%edx) # stash second 32 decisions + + addl $8,%edx # bump decision pointer + + # see if we have to normalize + movl (%edi),%eax # extract first output metric + andl $255,%eax + cmpl $150,%eax # is it greater than 150? + movl $0,%eax + jle done # No, no need to normalize + + # Normalize by finding smallest metric and subtracting it + # from all metrics + movq (%edi),%mm0 + pminub 8(%edi),%mm0 + pminub 16(%edi),%mm0 + pminub 24(%edi),%mm0 + pminub 32(%edi),%mm0 + pminub 40(%edi),%mm0 + pminub 48(%edi),%mm0 + pminub 56(%edi),%mm0 + # mm0 contains 8 smallest metrics + # crunch down to single lowest metric + movq %mm0,%mm1 + psrlq $32,%mm0 + pminub %mm1,%mm0 + movq %mm0,%mm1 + psrlq $16,%mm0 + pminub %mm1,%mm0 + movq %mm0,%mm1 + psrlq $8,%mm0 + pminub %mm1,%mm0 + punpcklbw %mm0,%mm0 # expand to all 8 bytes + pshufw $0,%mm0,%mm0 + + # mm0 now contains lowest metric in all 8 bytes + # subtract it from every output metric + # Trashes %mm7 + .macro PSUBUSBM REG,MEM + movq \MEM,%mm7 + psubusb \REG,%mm7 + movq %mm7,\MEM + .endm + + PSUBUSBM %mm0,(%edi) + PSUBUSBM %mm0,8(%edi) + PSUBUSBM %mm0,16(%edi) + PSUBUSBM %mm0,24(%edi) + PSUBUSBM %mm0,32(%edi) + PSUBUSBM %mm0,40(%edi) + PSUBUSBM %mm0,48(%edi) + PSUBUSBM %mm0,56(%edi) + + movd %mm0,%eax + and $0xff,%eax + +done: # swap metrics + movl %esi,%eax + movl %edi,%esi + movl %eax,%edi + jmp 1b + +2: emms + movl 8(%ebp),%ebx # ebx = vp + # stash metric pointers + movl %esi,OLDMETRICS(%ebx) + movl %edi,NEWMETRICS(%ebx) + movl %edx,DP(%ebx) # stash incremented value of vp->dp + xorl %eax,%eax +err: popl %ebx + popl %edx + popl %edi + popl %esi + popl %ebp + + ret + + .data + + .align 16 +thirtyones: + .byte 31,31,31,31,31,31,31,31 + + + diff --git a/ssebfly29.s b/ssebfly29.s new file mode 100644 index 0000000..d7d2149 --- /dev/null +++ b/ssebfly29.s @@ -0,0 +1,271 @@ +/* Intel SIMD SSE implementation of Viterbi ACS butterflies + for 256-state (k=9) convolutional code + Copyright 2004 Phil Karn, KA9Q + This code may be used under the terms of the GNU Lesser General Public License (LGPL) + + void update_viterbi29_blk_sse(struct v29 *vp,unsigned char syms[],int nbits); +*/ + # SSE (64-bit integer SIMD) version + # Requires Pentium III or better + # These are offsets into struct v29, defined in viterbi29.h + .set DP,512 + .set OLDMETRICS,516 + .set NEWMETRICS,520 + .text + .global update_viterbi29_blk_sse,Branchtab29_sse + .type update_viterbi29_blk_sse,@function + .align 16 + +update_viterbi29_blk_sse: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %edi + pushl %edx + pushl %ebx + + movl 8(%ebp),%edx # edx = vp + testl %edx,%edx + jnz 0f + movl -1,%eax + jmp err +0: movl OLDMETRICS(%edx),%esi # esi -> old metrics + movl NEWMETRICS(%edx),%edi # edi -> new metrics + movl DP(%edx),%edx # edx -> decisions + +1: movl 16(%ebp),%eax # eax = nbits + decl %eax + jl 2f # passed zero, we're done + movl %eax,16(%ebp) + + xorl %eax,%eax + movl 12(%ebp),%ebx # ebx = syms + movb (%ebx),%al + movd %eax,%mm6 # mm6[0] = first symbol + movb 1(%ebx),%al + movd %eax,%mm5 # mm5[0] = second symbol + addl $2,%ebx + movl %ebx,12(%ebp) + + punpcklbw %mm6,%mm6 # mm6[1] = mm6[0] + punpcklbw %mm5,%mm5 + + movq thirtyones,%mm7 + pshufw $0,%mm6,%mm6 # copy low word to upper 3 + pshufw $0,%mm5,%mm5 + # mm6 now contains first symbol in each byte, mm5 the second + + # each invocation of this macro does 8 butterflies in parallel + .MACRO butterfly GROUP + # compute branch metrics + movq Branchtab29_sse+(8*\GROUP),%mm4 + movq Branchtab29_sse+128+(8*\GROUP),%mm3 + pxor %mm6,%mm4 + pxor %mm5,%mm3 + pavgb %mm3,%mm4 # mm4 contains branch metrics + psrlw $3,%mm4 + pand %mm7,%mm4 + + movq (8*\GROUP)(%esi),%mm0 # Incoming path metric, high bit = 0 + movq ((8*\GROUP)+128)(%esi),%mm3 # Incoming path metric, high bit = 1 + movq %mm0,%mm2 + movq %mm3,%mm1 + paddusb %mm4,%mm0 + paddusb %mm4,%mm3 + + # invert branch metrics. This works only because they're 5 bits + pxor %mm7,%mm4 + + paddusb %mm4,%mm1 + paddusb %mm4,%mm2 + + # Find survivors, leave in mm0,2 + pminub %mm1,%mm0 + pminub %mm3,%mm2 + # get decisions, leave in mm1,3 + pcmpeqb %mm0,%mm1 + pcmpeqb %mm2,%mm3 + + # interleave and store new branch metrics in mm0,2 + movq %mm0,%mm4 + punpckhbw %mm2,%mm0 # interleave second 8 new metrics + punpcklbw %mm2,%mm4 # interleave first 8 new metrics + movq %mm0,(16*\GROUP+8)(%edi) + movq %mm4,(16*\GROUP)(%edi) + + # interleave decisions, accumulate into %ebx + movq %mm1,%mm4 + punpckhbw %mm3,%mm1 + punpcklbw %mm3,%mm4 + # Due to an error in the Intel instruction set ref (the register + # fields are swapped), gas assembles pmovmskb incorrectly + # See http://mail.gnu.org/pipermail/bug-gnu-utils/2000-August/002341.html + .byte 0x0f,0xd7,0xc1 # pmovmskb %mm1,%eax + shll $((16*\GROUP+8)&31),%eax + orl %eax,%ebx + .byte 0x0f,0xd7,0xc4 # pmovmskb %mm4,%eax + shll $((16*\GROUP)&31),%eax + orl %eax,%ebx + .endm + + # invoke macro 16 times for a total of 128 butterflies + xorl %ebx,%ebx # clear decisions + butterfly GROUP=0 + butterfly GROUP=1 + movl %ebx,(%edx) # stash first 32 decisions + xorl %ebx,%ebx + butterfly GROUP=2 + butterfly GROUP=3 + movl %ebx,4(%edx) # stash second 32 decisions + xorl %ebx,%ebx # clear decisions + butterfly GROUP=4 + butterfly GROUP=5 + movl %ebx,8(%edx) # stash first 32 decisions + xorl %ebx,%ebx + butterfly GROUP=6 + butterfly GROUP=7 + movl %ebx,12(%edx) # stash second 32 decisions + xorl %ebx,%ebx # clear decisions + butterfly GROUP=8 + butterfly GROUP=9 + movl %ebx,16(%edx) # stash first 32 decisions + xorl %ebx,%ebx + butterfly GROUP=10 + butterfly GROUP=11 + movl %ebx,20(%edx) # stash second 32 decisions + xorl %ebx,%ebx # clear decisions + butterfly GROUP=12 + butterfly GROUP=13 + movl %ebx,24(%edx) # stash first 32 decisions + xorl %ebx,%ebx + butterfly GROUP=14 + butterfly GROUP=15 + movl %ebx,28(%edx) # stash second 32 decisions + + addl $32,%edx # bump decision pointer + + # see if we have to normalize + movl (%edi),%eax # extract first output metric + andl $255,%eax + cmp $50,%eax # is it greater than 50? + movl $0,%eax + jle done # No, no need to normalize + + # Normalize by finding smallest metric and subtracting it + # from all metrics + movq (%edi),%mm0 + pminub 8(%edi),%mm0 + pminub 16(%edi),%mm0 + pminub 24(%edi),%mm0 + pminub 32(%edi),%mm0 + pminub 40(%edi),%mm0 + pminub 48(%edi),%mm0 + pminub 56(%edi),%mm0 + pminub 64(%edi),%mm0 + pminub 72(%edi),%mm0 + pminub 80(%edi),%mm0 + pminub 88(%edi),%mm0 + pminub 96(%edi),%mm0 + pminub 104(%edi),%mm0 + pminub 112(%edi),%mm0 + pminub 120(%edi),%mm0 + pminub 128(%edi),%mm0 + pminub 136(%edi),%mm0 + pminub 144(%edi),%mm0 + pminub 152(%edi),%mm0 + pminub 160(%edi),%mm0 + pminub 168(%edi),%mm0 + pminub 176(%edi),%mm0 + pminub 184(%edi),%mm0 + pminub 192(%edi),%mm0 + pminub 200(%edi),%mm0 + pminub 208(%edi),%mm0 + pminub 216(%edi),%mm0 + pminub 224(%edi),%mm0 + pminub 232(%edi),%mm0 + pminub 240(%edi),%mm0 + pminub 248(%edi),%mm0 + # mm0 contains 8 smallest metrics + # crunch down to single lowest metric + movq %mm0,%mm1 + psrlq $32,%mm0 + pminub %mm1,%mm0 + movq %mm0,%mm1 + psrlq $16,%mm0 + pminub %mm1,%mm0 + movq %mm0,%mm1 + psrlq $8,%mm0 + pminub %mm1,%mm0 + movq 8(%edi),%mm1 # reload + punpcklbw %mm0,%mm0 # expand to all 8 bytes + pshufw $0,%mm0,%mm0 + + # mm0 now contains lowest metric in all 8 bytes + # subtract it from every output metric + # Trashes %mm7 + .macro PSUBUSBM REG,MEM + movq \MEM,%mm7 + psubusb \REG,%mm7 + movq %mm7,\MEM + .endm + + PSUBUSBM %mm0,(%edi) + PSUBUSBM %mm0,8(%edi) + PSUBUSBM %mm0,16(%edi) + PSUBUSBM %mm0,24(%edi) + PSUBUSBM %mm0,32(%edi) + PSUBUSBM %mm0,40(%edi) + PSUBUSBM %mm0,48(%edi) + PSUBUSBM %mm0,56(%edi) + PSUBUSBM %mm0,64(%edi) + PSUBUSBM %mm0,72(%edi) + PSUBUSBM %mm0,80(%edi) + PSUBUSBM %mm0,88(%edi) + PSUBUSBM %mm0,96(%edi) + PSUBUSBM %mm0,104(%edi) + PSUBUSBM %mm0,112(%edi) + PSUBUSBM %mm0,120(%edi) + PSUBUSBM %mm0,128(%edi) + PSUBUSBM %mm0,136(%edi) + PSUBUSBM %mm0,144(%edi) + PSUBUSBM %mm0,152(%edi) + PSUBUSBM %mm0,160(%edi) + PSUBUSBM %mm0,168(%edi) + PSUBUSBM %mm0,176(%edi) + PSUBUSBM %mm0,184(%edi) + PSUBUSBM %mm0,192(%edi) + PSUBUSBM %mm0,200(%edi) + PSUBUSBM %mm0,208(%edi) + PSUBUSBM %mm0,216(%edi) + PSUBUSBM %mm0,224(%edi) + PSUBUSBM %mm0,232(%edi) + PSUBUSBM %mm0,240(%edi) + PSUBUSBM %mm0,248(%edi) + +done: + # swap metrics + movl %esi,%eax + movl %edi,%esi + movl %eax,%edi + jmp 1b + +2: emms + movl 8(%ebp),%ebx # ebx = vp + # stash metric pointers + movl %esi,OLDMETRICS(%ebx) + movl %edi,NEWMETRICS(%ebx) + movl %edx,DP(%ebx) # stash incremented value of vp->dp + xorl %eax,%eax +err: popl %ebx + popl %edx + popl %edi + popl %esi + popl %ebp + ret + + .data + .align 8 +thirtyones: + .byte 31,31,31,31,31,31,31,31 + + @@ -0,0 +1,40 @@ +/* Compute the sum of the squares of a vector of signed shorts + + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +#include <stdlib.h> +#include "fec.h" + +unsigned long long sumsq_port(signed short *,int); + +#ifdef __i386__ +unsigned long long sumsq_mmx(signed short *,int); +unsigned long long sumsq_sse(signed short *,int); +unsigned long long sumsq_sse2(signed short *,int); +#endif + +#ifdef __VEC__ +unsigned long long sumsq_av(signed short *,int); +#endif + +unsigned long long sumsq(signed short *in,int cnt){ + switch(Cpu_mode){ + case PORT: + default: + return sumsq_port(in,cnt); +#ifdef __i386__ + case SSE: + case MMX: + return sumsq_mmx(in,cnt); + case SSE2: + return sumsq_sse2(in,cnt); +#endif + +#ifdef __VEC__ + case ALTIVEC: + return sumsq_av(in,cnt); +#endif + } +} diff --git a/sumsq_av.c b/sumsq_av.c new file mode 100644 index 0000000..53c6acf --- /dev/null +++ b/sumsq_av.c @@ -0,0 +1,78 @@ +/* Compute the sum of the squares of a vector of signed shorts + + * This is the Altivec SIMD version. It's a little hairy because Altivec + * does not do 64-bit operations directly, so we have to accumulate separate + * 32-bit sums and carries + + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +#include "fec.h" + +unsigned long long sumsq_av(signed short *in,int cnt){ + long long sum; + vector signed short x; + vector unsigned int sums,carries,s1,s2; + int pad; + union { vector unsigned char cv; vector unsigned int iv; unsigned int w[4]; unsigned char c[16];} s; + + carries = sums = (vector unsigned int)(0); + if((pad = (int)in & 15)!=0){ + /* Load unaligned leading word */ + x = vec_perm(vec_ld(0,in),(vector signed short)(0),vec_lvsl(0,in)); + if(cnt < 8){ /* Shift right to chop stuff beyond end of short block */ + s.c[15] = (8-cnt)<<4; + x = vec_sro(x,s.cv); + } + sums = (vector unsigned int)vec_msum(x,x,(vector signed int)(0)); + in += 8-pad/2; + cnt -= 8-pad/2; + } + /* Everything is now aligned, rip through most of the block */ + while(cnt >= 8){ + x = vec_ld(0,in); + /* A single vec_msum cannot overflow, but we have to sum it with + * the earlier terms separately to handle the carries + * The cast to unsigned is OK because squares are always positive + */ + s1 = (vector unsigned int)vec_msum(x,x,(vector signed int)(0)); + carries = vec_add(carries,vec_addc(sums,s1)); + sums = vec_add(sums,s1); + in += 8; + cnt -= 8; + } + /* Handle trailing fragment, if any */ + if(cnt > 0){ + x = vec_ld(0,in); + s.c[15] = (8-cnt)<<4; + x = vec_sro(x,s.cv); + s1 = (vector unsigned int)vec_msum(x,x,(vector signed int)(0)); + carries = vec_add(carries,vec_addc(sums,s1)); + sums = vec_add(sums,s1); + } + /* Combine 4 sub-sums and carries */ + s.c[15] = 64; /* Shift right two 32-bit words */ + s1 = vec_sro(sums,s.cv); + s2 = vec_sro(carries,s.cv); + carries = vec_add(carries,vec_addc(sums,s1)); + sums = vec_add(sums,s1); + carries = vec_add(carries,s2); + + s.c[15] = 32; /* Shift right one 32-bit word */ + s1 = vec_sro(sums,s.cv); + s2 = vec_sro(carries,s.cv); + carries = vec_add(carries,vec_addc(sums,s1)); + sums = vec_add(sums,s1); + carries = vec_add(carries,s2); + + /* Extract sum and carries from right-hand words and combine into result */ + s.iv = sums; + sum = s.w[3]; + + s.iv = carries; + sum += (long long)s.w[3] << 32; + + return sum; +} + diff --git a/sumsq_mmx.c b/sumsq_mmx.c new file mode 100644 index 0000000..e766831 --- /dev/null +++ b/sumsq_mmx.c @@ -0,0 +1,35 @@ +/* Compute the sum of the squares of a vector of signed shorts + + * MMX-assisted version (also used on SSE) + + * The SSE2 and MMX assist routines both operate on multiples of + * 8 words; they differ only in their alignment requirements (8 bytes + * for MMX, 16 bytes for SSE2) + + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser Public License (LGPL) + */ + +long long sumsq_mmx_assist(signed short *,int); + +long long sumsq_mmx(signed short *in,int cnt){ + long long sum = 0; + + /* Handle stuff before the next 8-byte boundary */ + while(((int)in & 7) != 0 && cnt != 0){ + sum += (long)in[0] * in[0]; + in++; + cnt--; + } + sum += sumsq_mmx_assist(in,cnt); + in += cnt & ~7; + cnt &= 7; + + /* Handle up to 7 words at end */ + while(cnt != 0){ + sum += (long)in[0] * in[0]; + in++; + cnt--; + } + return sum; +} diff --git a/sumsq_mmx_assist.s b/sumsq_mmx_assist.s new file mode 100644 index 0000000..b3bac66 --- /dev/null +++ b/sumsq_mmx_assist.s @@ -0,0 +1,83 @@ +# MMX assist routines for sumsq +# Copyright 2001 Phil Karn, KA9Q +# May be used under the terms of the GNU Public License (GPL) + + .text + +# Evaluate sum of squares of signed 16-bit input samples +# long long sumsq_mmx_assist(signed short *in,int cnt); + .global sumsq_mmx_assist + .type sumsq_mmx_assist,@function + .align 16 +sumsq_mmx_assist: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %ecx + pushl %ebx + + movl 8(%ebp),%esi + movl 12(%ebp),%ecx + xor %eax,%eax + xor %edx,%edx + + # Since 4 * 32767**2 < 2**32, we can accumulate two at a time +1: subl $8,%ecx + jl 2f + movq (%esi),%mm0 # S0 S1 S2 S3 + pmaddwd %mm0,%mm0 # (S0^2+S1^2) (S2^2+S3^2) + movq 8(%esi),%mm6 # S4 S5 S6 S7 + pmaddwd %mm6,%mm6 # (S4^2+S5^2) (S6^2+S7^2) + paddd %mm6,%mm0 # (S0^2+S1^2+S4^2+S5^2)(S2^2+S3^2+S6^2+S7^2) + movd %mm0,%ebx + addl %ebx,%eax + adcl $0,%edx + psrlq $32,%mm0 + movd %mm0,%ebx + addl %ebx,%eax + adcl $0,%edx + addl $16,%esi + jmp 1b + +2: emms + popl %ebx + popl %ecx + popl %esi + popl %ebp + ret + +# Evaluate sum of squares of signed 16-bit input samples +# long sumsq_wd_mmx_assist(signed short *in,int cnt); +# Quick version, only safe for small numbers of small input values... + .global sumsq_wd_mmx_assist + .type sumsq_wd_mmx_assist,@function + .align 16 +sumsq_wd_mmx_assist: + pushl %ebp + movl %esp,%ebp + pushl %esi + + movl 8(%ebp),%esi + movl 12(%ebp),%ecx + pxor %mm2,%mm2 # zero sum + +1: subl $8,%ecx + jl 2f + movq (%esi),%mm0 # S0 S1 S2 S3 + pmaddwd %mm0,%mm0 # (S0*S0+S1*S1) (S2*S2+S3*S3) + movq 8(%esi),%mm1 + pmaddwd %mm1,%mm1 + paddd %mm1,%mm2 + paddd %mm0,%mm2 # accumulate + + addl $16,%esi + jmp 1b + +2: movd %mm2,%eax # even sum + psrlq $32,%mm2 + movd %mm2,%edx # odd sum + addl %edx,%eax + emms + popl %esi + popl %ebp + ret diff --git a/sumsq_port.c b/sumsq_port.c new file mode 100644 index 0000000..6d0b4c1 --- /dev/null +++ b/sumsq_port.c @@ -0,0 +1,16 @@ +/* Compute the sum of the squares of a vector of signed shorts + + * Portable C version + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ + +unsigned long long sumsq_port(signed short *in,int cnt){ + long long sum = 0; + int i; + + for(i=0;i<cnt;i++){ + sum += (int)in[i] * (int)in[i]; + } + return sum; +} diff --git a/sumsq_sse2.c b/sumsq_sse2.c new file mode 100644 index 0000000..b05d2e9 --- /dev/null +++ b/sumsq_sse2.c @@ -0,0 +1,33 @@ +/* Compute the sum of the squares of a vector of signed shorts + + * The SSE2 and MMX assist routines both operate on multiples of + * 8 words; they differ only in their alignment requirements (8 bytes + * for MMX, 16 bytes for SSE2) + + * Copyright 2004 Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser Public License (LGPL) + */ + +long long sumsq_sse2_assist(signed short *,int); + +long long sumsq_sse2(signed short *in,int cnt){ + long long sum = 0; + + /* Handle stuff before the next 8-byte boundary */ + while(((int)in & 15) != 0 && cnt != 0){ + sum += (long)in[0] * in[0]; + in++; + cnt--; + } + sum += sumsq_sse2_assist(in,cnt); + in += cnt & ~7; + cnt &= 7; + + /* Handle up to 7 trailing words */ + while(cnt != 0){ + sum += (long)in[0] * in[0]; + in++; + cnt--; + } + return sum; +} diff --git a/sumsq_sse2_assist.s b/sumsq_sse2_assist.s new file mode 100644 index 0000000..d1c4ee7 --- /dev/null +++ b/sumsq_sse2_assist.s @@ -0,0 +1,49 @@ +# SSE2 assist routines for sumsq +# Copyright 2001 Phil Karn, KA9Q +# May be used under the terms of the GNU Public License (GPL) + + .text +# Evaluate sum of squares of signed 16-bit input samples +# long long sumsq_sse2_assist(signed short *in,int cnt); + .global sumsq_sse2_assist + .type sumsq_sse2_assist,@function + .align 16 +sumsq_sse2_assist: + pushl %ebp + movl %esp,%ebp + pushl %esi + pushl %ecx + + movl 8(%ebp),%esi + movl 12(%ebp),%ecx + pxor %xmm2,%xmm2 # zero sum + movaps low,%xmm3 # load mask + +1: subl $8,%ecx + jl 2f + movaps (%esi),%xmm0 # S0 S1 S2 S3 S4 S5 S6 S7 + pmaddwd %xmm0,%xmm0 # (S0*S0+S1*S1) (S2*S2+S3*S3) (S4*S4+S5*S5) (S6*S6+S7*S7) + movaps %xmm0,%xmm1 + pand %xmm3,%xmm1 # (S0*S0+S1*S1) 0 (S4*S4+S5*S5) 0 + paddq %xmm1,%xmm2 # sum even-numbered dwords + psrlq $32,%xmm0 # (S2*S2+S3*S3) 0 (S6*S6+S7*S7) 0 + paddq %xmm0,%xmm2 # sum odd-numbered dwords + addl $16,%esi + jmp 1b + +2: movaps %xmm2,%xmm0 + psrldq $8,%xmm0 + paddq %xmm2,%xmm0 # combine 64-bit sums + + movd %xmm0,%eax # low 32 bits of sum + psrldq $4,%xmm0 + movd %xmm0,%edx # high 32 bits of sum + + popl %ecx + popl %esi + popl %ebp + ret + + .data + .align 16 +low: .byte 255,255,255,255,0,0,0,0,255,255,255,255,0,0,0,0 diff --git a/sumsq_test.c b/sumsq_test.c new file mode 100644 index 0000000..4debd47 --- /dev/null +++ b/sumsq_test.c @@ -0,0 +1,101 @@ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <time.h> +#include "config.h" +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif +#include "fec.h" + +#if HAVE_GETOPT_LONG +struct option Options[] = { + {"frame-length",1,NULL,'l'}, + {"frame-count",1,NULL,'n'}, + {"verbose",0,NULL,'v'}, + {"force-altivec",0,NULL,'a'}, + {"force-port",0,NULL,'p'}, + {"force-mmx",0,NULL,'m'}, + {"force-sse",0,NULL,'s'}, + {"force-sse2",0,NULL,'t'}, + {NULL}, +}; +#endif + +int Verbose = 0; + +int main(int argc,char *argv[]){ + signed short *buf; + int i,d,trial,trials=10000; + int bufsize = 2048; + long long port_sum,simd_sum; + time_t t; + int timetrials=0; + + find_cpu_mode(); + time(&t); + srandom(t); + +#if HAVE_GETOPT_LONG + while((d = getopt_long(argc,argv,"vapmstl:n:T",Options,NULL)) != EOF){ +#else + while((d = getopt(argc,argv,"vapmstl:n:T")) != EOF){ +#endif + switch(d){ + case 'a': + Cpu_mode = ALTIVEC; + break; + case 'p': + Cpu_mode = PORT; + break; + case 'm': + Cpu_mode = MMX; + break; + case 's': + Cpu_mode = SSE; + break; + case 't': + Cpu_mode = SSE2; + break; + case 'l': + bufsize = atoi(optarg); + break; + case 'n': + trials = atoi(optarg); + break; + case 'v': + Verbose++; + break; + case 'T': + timetrials++; + break; + } + } + + buf = (signed short *)calloc(bufsize,sizeof(signed short)); + if(timetrials){ + for(trial=0;trial<trials;trial++){ + (void)sumsq(buf,bufsize); + } + } else { + for(trial=0;trial<trials;trial++){ + int length,offset; + + offset = random() & 7; + length = (random() % bufsize) - offset; + if(length <= 0) + continue; + for(i=0;i<bufsize;i++) + buf[i] = random(); + + port_sum = sumsq_port(buf+offset,length); + simd_sum = sumsq(buf+offset,length); + if(port_sum != simd_sum){ + printf("offset %d len %d port_sum = %lld simd_sum = %lld ",offset,length,port_sum,simd_sum); + + printf("ERROR! diff = %lld\n",simd_sum-port_sum); + } + } + } + exit(0); +} diff --git a/viterbi27.c b/viterbi27.c new file mode 100644 index 0000000..554da92 --- /dev/null +++ b/viterbi27.c @@ -0,0 +1,161 @@ +/* K=7 r=1/2 Viterbi decoder with optional Intel or PowerPC SIMD + * Copyright Feb 2004, Phil Karn, KA9Q + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include "fec.h" + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi27(int len){ + find_cpu_mode(); + + switch(Cpu_mode){ + case PORT: + default: + return create_viterbi27_port(len); +#ifdef __VEC__ + case ALTIVEC: + return create_viterbi27_av(len); +#endif +#ifdef __i386__ + case MMX: + return create_viterbi27_mmx(len); + case SSE: + return create_viterbi27_sse(len); + case SSE2: + return create_viterbi27_sse2(len); +#endif + } +} + +void set_viterbi27_polynomial(int polys[2]){ + switch(Cpu_mode){ + case PORT: + default: + set_viterbi27_polynomial_port(polys); + break; +#ifdef __VEC__ + case ALTIVEC: + set_viterbi27_polynomial_av(polys); + break; +#endif +#ifdef __i386__ + case MMX: + set_viterbi27_polynomial_mmx(polys); + break; + case SSE: + set_viterbi27_polynomial_sse(polys); + break; + case SSE2: + set_viterbi27_polynomial_sse2(polys); + break; +#endif + } +} + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi27(void *p,int starting_state){ + switch(Cpu_mode){ + case PORT: + default: + return init_viterbi27_port(p,starting_state); +#ifdef __VEC__ + case ALTIVEC: + return init_viterbi27_av(p,starting_state); +#endif +#ifdef __i386__ + case MMX: + return init_viterbi27_mmx(p,starting_state); + case SSE: + return init_viterbi27_sse(p,starting_state); + case SSE2: + return init_viterbi27_sse2(p,starting_state); +#endif + } +} + +/* Viterbi chainback */ +int chainback_viterbi27( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + + switch(Cpu_mode){ + case PORT: + default: + return chainback_viterbi27_port(p,data,nbits,endstate); +#ifdef __VEC__ + case ALTIVEC: + return chainback_viterbi27_av(p,data,nbits,endstate); +#endif +#ifdef __i386__ + case MMX: + return chainback_viterbi27_mmx(p,data,nbits,endstate); + case SSE: + return chainback_viterbi27_sse(p,data,nbits,endstate); + case SSE2: + return chainback_viterbi27_sse2(p,data,nbits,endstate); +#endif + } +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi27(void *p){ + switch(Cpu_mode){ + case PORT: + default: + delete_viterbi27_port(p); + break; +#ifdef __VEC__ + case ALTIVEC: + delete_viterbi27_av(p); + break; +#endif +#ifdef __i386__ + case MMX: + delete_viterbi27_mmx(p); + break; + case SSE: + delete_viterbi27_sse(p); + break; + case SSE2: + delete_viterbi27_sse2(p); + break; +#endif + } +} + +/* Update decoder with a block of demodulated symbols + * Note that nbits is the number of decoded data bits, not the number + * of symbols! + */ +int update_viterbi27_blk(void *p,unsigned char syms[],int nbits){ + if(p == NULL) + return -1; + + switch(Cpu_mode){ + case PORT: + default: + update_viterbi27_blk_port(p,syms,nbits); + break; +#ifdef __VEC__ + case ALTIVEC: + update_viterbi27_blk_av(p,syms,nbits); + break; +#endif +#ifdef __i386__ + case MMX: + update_viterbi27_blk_mmx(p,syms,nbits); + break; + case SSE: + update_viterbi27_blk_sse(p,syms,nbits); + break; + case SSE2: + update_viterbi27_blk_sse2(p,syms,nbits); + break; +#endif + } + return 0; +} diff --git a/viterbi27_av.c b/viterbi27_av.c new file mode 100644 index 0000000..98d7344 --- /dev/null +++ b/viterbi27_av.c @@ -0,0 +1,210 @@ +/* K=7 r=1/2 Viterbi decoder for PowerPC G4/G5 Altivec instructions + * Feb 2004, Phil Karn, KA9Q + */ +#include <stdio.h> +#include <memory.h> +#include <stdlib.h> +#include "fec.h" + +typedef union { long long p; unsigned char c[64]; vector bool char v[4]; } decision_t; +typedef union { long long p; unsigned char c[64]; vector unsigned char v[4]; } metric_t; + +static union branchtab27 { unsigned char c[32]; vector unsigned char v[2];} Branchtab27[2]; +static int Init = 0; + +/* State info for instance of Viterbi decoder + * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s! + */ +struct v27 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi27_av(void *p,int starting_state){ + struct v27 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<4;i++) + vp->metrics1.v[i] = (vector unsigned char)(63); + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */ + return 0; +} + +void set_viterbi27_polynomial_av(int polys[2]){ + int state; + + for(state=0;state < 32;state++){ + Branchtab27[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; + Branchtab27[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; + } + Init++; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi27_av(int len){ + struct v27 *vp; + + if(!Init){ + int polys[2] = { V27POLYA,V27POLYB }; + set_viterbi27_polynomial_av(polys); + } + if((vp = (struct v27 *)malloc(sizeof(struct v27))) == NULL) + return NULL; + if((vp->decisions = (decision_t *)malloc((len+6)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi27_av(vp,0); + return vp; +} + +/* Viterbi chainback */ +int chainback_viterbi27_av( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v27 *vp = p; + decision_t *d = (decision_t *)vp->decisions; + + if(p == NULL) + return -1; + + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 64; + endstate <<= 2; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 6; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = d[nbits].c[endstate>>2] & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi27_av(void *p){ + struct v27 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + +/* Process received symbols */ +int update_viterbi27_blk_av(void *p,unsigned char *syms,int nbits){ + struct v27 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + d = (decision_t *)vp->dp; + while(nbits--){ + vector unsigned char survivor0,survivor1,sym0v,sym1v; + vector bool char decision0,decision1; + vector unsigned char metric,m_metric,m0,m1,m2,m3; + void *tmp; + + /* sym0v.0 = syms[0]; sym0v.1 = syms[1] */ + sym0v = vec_perm(vec_ld(0,syms),vec_ld(1,syms),vec_lvsl(0,syms)); + + sym1v = vec_splat(sym0v,1); /* Splat syms[1] across sym1v */ + sym0v = vec_splat(sym0v,0); /* Splat syms[0] across sym0v */ + syms += 2; + + /* Do the 32 butterflies as two interleaved groups of 16 each to keep the pipes full */ + + /* Form first set of 16 branch metrics */ + metric = vec_avg(vec_xor(Branchtab27[0].v[0],sym0v),vec_xor(Branchtab27[1].v[0],sym1v)); + metric = vec_sr(metric,(vector unsigned char)(3)); + m_metric = vec_sub((vector unsigned char)(31),metric); + + /* Form first set of path metrics */ + m0 = vec_adds(vp->old_metrics->v[0],metric); + m3 = vec_adds(vp->old_metrics->v[2],metric); + m1 = vec_adds(vp->old_metrics->v[2],m_metric); + m2 = vec_adds(vp->old_metrics->v[0],m_metric); + + /* Form second set of 16 branch metrics */ + metric = vec_avg(vec_xor(Branchtab27[0].v[1],sym0v),vec_xor(Branchtab27[1].v[1],sym1v)); + metric = vec_sr(metric,(vector unsigned char)(3)); + m_metric = vec_sub((vector unsigned char)(31),metric); + + /* Compare and select first set */ + decision0 = vec_cmpgt(m0,m1); + decision1 = vec_cmpgt(m2,m3); + survivor0 = vec_min(m0,m1); + survivor1 = vec_min(m2,m3); + + /* Compute second set of path metrics */ + m0 = vec_adds(vp->old_metrics->v[1],metric); + m3 = vec_adds(vp->old_metrics->v[3],metric); + m1 = vec_adds(vp->old_metrics->v[3],m_metric); + m2 = vec_adds(vp->old_metrics->v[1],m_metric); + + /* Interleave and store first decisions and survivors */ + d->v[0] = vec_mergeh(decision0,decision1); + d->v[1] = vec_mergel(decision0,decision1); + vp->new_metrics->v[0] = vec_mergeh(survivor0,survivor1); + vp->new_metrics->v[1] = vec_mergel(survivor0,survivor1); + + /* Compare and select second set */ + decision0 = vec_cmpgt(m0,m1); + decision1 = vec_cmpgt(m2,m3); + survivor0 = vec_min(m0,m1); + survivor1 = vec_min(m2,m3); + + /* Interleave and store second set of decisions and survivors */ + d->v[2] = vec_mergeh(decision0,decision1); + d->v[3] = vec_mergel(decision0,decision1); + vp->new_metrics->v[2] = vec_mergeh(survivor0,survivor1); + vp->new_metrics->v[3] = vec_mergel(survivor0,survivor1); + + /* renormalize if necessary */ + if(vp->new_metrics->c[0] >= 105){ + vector unsigned char scale0,scale1; + + /* Find smallest metric and splat */ + scale0 = vec_min(vp->new_metrics->v[0],vp->new_metrics->v[1]); + scale1 = vec_min(vp->new_metrics->v[2],vp->new_metrics->v[3]); + scale0 = vec_min(scale0,scale1); + scale0 = vec_min(scale0,vec_sld(scale0,scale0,8)); + scale0 = vec_min(scale0,vec_sld(scale0,scale0,4)); + scale0 = vec_min(scale0,vec_sld(scale0,scale0,2)); + scale0 = vec_min(scale0,vec_sld(scale0,scale0,1)); + + /* Now subtract from all metrics */ + vp->new_metrics->v[0] = vec_subs(vp->new_metrics->v[0],scale0); + vp->new_metrics->v[1] = vec_subs(vp->new_metrics->v[1],scale0); + vp->new_metrics->v[2] = vec_subs(vp->new_metrics->v[2],scale0); + vp->new_metrics->v[3] = vec_subs(vp->new_metrics->v[3],scale0); + } + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + + return 0; +} + diff --git a/viterbi27_mmx.c b/viterbi27_mmx.c new file mode 100644 index 0000000..a6d5125 --- /dev/null +++ b/viterbi27_mmx.c @@ -0,0 +1,115 @@ +/* K=7 r=1/2 Viterbi decoder for MMX + * Copyright Feb 2004, Phil Karn, KA9Q + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <mmintrin.h> +#include "fec.h" + +typedef union { char c[64]; __m64 v[8];} decision_t; +typedef union { unsigned char c[64]; __m64 v[8];} metric_t; + +unsigned char Mettab27_1[256][32] __attribute__ ((aligned(16))); +unsigned char Mettab27_2[256][32] __attribute__ ((aligned(16))); +static int Init = 0; + +/* State info for instance of Viterbi decoder + * Don't change this without also changing references in mmxbfly27.s! + */ +struct v27 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi27_mmx(void *p,int starting_state){ + struct v27 *vp = (struct v27 *)p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<64;i++) + vp->metrics1.c[i] = 63; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */ + return 0; +} + +void set_viterbi27_polynomial_mmx(int polys[2]){ + int state; + + for(state=0;state < 32;state++){ + int symbol; + for(symbol = 0;symbol < 256;symbol++){ + int sym; + + sym = parity((2*state) & abs(polys[0])) ^ (polys[0] < 0); + Mettab27_1[symbol][state] = (sym ? (255-symbol):symbol) / 16; + + sym = parity((2*state) & abs(polys[1])) ^ (polys[1] < 0); + Mettab27_2[symbol][state] = (sym ? (255-symbol):symbol) / 16; + } + } + Init++; +} + + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi27_mmx(int len){ + struct v27 *vp; + int polys[2] = { V27POLYA, V27POLYB }; + + if(Init == 0){ + set_viterbi27_polynomial_mmx(polys); + } + if((vp = (struct v27 *)malloc(sizeof(struct v27))) == NULL) + return NULL; + + if((vp->decisions = (decision_t *)malloc((len+6)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi27_mmx(vp,0); + return vp; +} + +/* Viterbi chainback */ +int chainback_viterbi27_mmx( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + + struct v27 *vp = (struct v27 *)p; + decision_t *d; + + if(p == NULL) + return -1; + d = (decision_t *)vp->decisions; + endstate &= 63; + d += 6; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = d[nbits].c[endstate>>2] & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi27_mmx(void *p){ + struct v27 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} diff --git a/viterbi27_port.c b/viterbi27_port.c new file mode 100644 index 0000000..7cac2b3 --- /dev/null +++ b/viterbi27_port.c @@ -0,0 +1,191 @@ +/* K=7 r=1/2 Viterbi decoder in portable C + * Copyright Feb 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <limits.h> +#include "fec.h" + + +typedef union { unsigned int w[64]; } metric_t; +typedef union { unsigned long w[2];} decision_t; +static union branchtab27 { unsigned char c[32]; } Branchtab27[2] __attribute__ ((aligned(16))); +static int Init = 0; + +/* State info for instance of Viterbi decoder + * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s! + */ +struct v27 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi27_port(void *p,int starting_state){ + struct v27 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<64;i++) + vp->metrics1.w[i] = 63; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->w[starting_state & 63] = 0; /* Bias known start state */ + return 0; +} + +void set_viterbi27_polynomial_port(int polys[2]){ + int state; + + for(state=0;state < 32;state++){ + Branchtab27[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; + Branchtab27[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; + } + Init++; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi27_port(int len){ + struct v27 *vp; + + if(!Init){ + int polys[2] = { V27POLYA, V27POLYB }; + set_viterbi27_polynomial_port(polys); + } + if((vp = malloc(sizeof(struct v27))) == NULL) + return NULL; + if((vp->decisions = malloc((len+6)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi27_port(vp,0); + + return vp; +} + +/* Viterbi chainback */ +int chainback_viterbi27_port( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v27 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + d = vp->decisions; + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 64; + endstate <<= 2; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 6; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].w[(endstate>>2)/32] >> ((endstate>>2)%32)) & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi27_port(void *p){ + struct v27 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + +/* C-language butterfly */ +#define BFLY(i) {\ +unsigned int metric,m0,m1,decision;\ + metric = (Branchtab27[0].c[i] ^ sym0) + (Branchtab27[1].c[i] ^ sym1);\ + m0 = vp->old_metrics->w[i] + metric;\ + m1 = vp->old_metrics->w[i+32] + (510 - metric);\ + decision = (signed int)(m0-m1) > 0;\ + vp->new_metrics->w[2*i] = decision ? m1 : m0;\ + d->w[i/16] |= decision << ((2*i)&31);\ + m0 -= (metric+metric-510);\ + m1 += (metric+metric-510);\ + decision = (signed int)(m0-m1) > 0;\ + vp->new_metrics->w[2*i+1] = decision ? m1 : m0;\ + d->w[i/16] |= decision << ((2*i+1)&31);\ +} + +/* Update decoder with a block of demodulated symbols + * Note that nbits is the number of decoded data bits, not the number + * of symbols! + */ +int update_viterbi27_blk_port(void *p,unsigned char *syms,int nbits){ + struct v27 *vp = p; + void *tmp; + decision_t *d; + + if(p == NULL) + return -1; + d = (decision_t *)vp->dp; + while(nbits--){ + unsigned char sym0,sym1; + + d->w[0] = d->w[1] = 0; + sym0 = *syms++; + sym1 = *syms++; + + BFLY(0); + BFLY(1); + BFLY(2); + BFLY(3); + BFLY(4); + BFLY(5); + BFLY(6); + BFLY(7); + BFLY(8); + BFLY(9); + BFLY(10); + BFLY(11); + BFLY(12); + BFLY(13); + BFLY(14); + BFLY(15); + BFLY(16); + BFLY(17); + BFLY(18); + BFLY(19); + BFLY(20); + BFLY(21); + BFLY(22); + BFLY(23); + BFLY(24); + BFLY(25); + BFLY(26); + BFLY(27); + BFLY(28); + BFLY(29); + BFLY(30); + BFLY(31); + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + return 0; +} diff --git a/viterbi27_sse.c b/viterbi27_sse.c new file mode 100644 index 0000000..cd1f287 --- /dev/null +++ b/viterbi27_sse.c @@ -0,0 +1,113 @@ +/* K=7 r=1/2 Viterbi decoder for SSE + * Feb 2004, Phil Karn, KA9Q + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <xmmintrin.h> +#include "fec.h" + +typedef union { unsigned char c[64]; } metric_t; +typedef union { unsigned long w[2]; unsigned char c[8]; __m64 v[1];} decision_t; +union branchtab27 { unsigned char c[32]; __m64 v[4];} Branchtab27_sse[2]; +static int Init = 0; + +/* State info for instance of Viterbi decoder + * Don't change this without also changing references in ssebfly27.s! + */ +struct v27 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi27_sse(int len){ + struct v27 *vp; + + if(!Init){ + int polys[2] = { V27POLYA, V27POLYB }; + + set_viterbi27_polynomial_sse(polys); + } + if((vp = malloc(sizeof(struct v27))) == NULL) + return NULL; + if((vp->decisions = malloc((len+6)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi27(vp,0); + return vp; +} + +void set_viterbi27_polynomial_sse(int polys[2]){ + int state; + + for(state=0;state < 32;state++){ + Branchtab27_sse[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; + Branchtab27_sse[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; + } + Init++; +} + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi27_sse(void *p,int starting_state){ + struct v27 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<64;i++) + vp->metrics1.c[i] = 63; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */ + return 0; +} + +/* Viterbi chainback */ +int chainback_viterbi27_sse( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v27 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + + d = vp->decisions; + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 64; + endstate <<= 2; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 6; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi27_sse(void *p){ + struct v27 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} diff --git a/viterbi27_sse2.c b/viterbi27_sse2.c new file mode 100644 index 0000000..bc01710 --- /dev/null +++ b/viterbi27_sse2.c @@ -0,0 +1,180 @@ +/* K=7 r=1/2 Viterbi decoder for SSE2 + * Feb 2004, Phil Karn, KA9Q + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <xmmintrin.h> +#include "fec.h" + +typedef union { unsigned char c[64]; __m128i v[4]; } metric_t; +typedef union { unsigned long w[2]; unsigned char c[8]; unsigned short s[4]; __m64 v[1];} decision_t; +union branchtab27 { unsigned char c[32]; __m128i v[2];} Branchtab27_sse2[2]; +static int Init = 0; + +/* State info for instance of Viterbi decoder + * Don't change this without also changing references in sse2bfly27.s! + */ +struct v27 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi27_sse2(void *p,int starting_state){ + struct v27 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<64;i++) + vp->metrics1.c[i] = 63; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */ + return 0; +} + +void set_viterbi27_polynomial_sse2(int polys[2]){ + int state; + + for(state=0;state < 32;state++){ + Branchtab27_sse2[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; + Branchtab27_sse2[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; + } + Init++; +} + + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi27_sse2(int len){ + void *p; + struct v27 *vp; + + if(!Init){ + int polys[2] = { V27POLYA, V27POLYB }; + set_viterbi27_polynomial_sse2(polys); + } + /* Ordinary malloc() only returns 8-byte alignment, we need 16 */ + if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v27))) + return NULL; + vp = (struct v27 *)p; + + if((p = malloc((len+6)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + vp->decisions = (decision_t *)p; + init_viterbi27_sse2(vp,0); + + return vp; +} + +/* Viterbi chainback */ +int chainback_viterbi27_sse2( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v27 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + d = vp->decisions; + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 64; + endstate <<= 2; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 6; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].c[(endstate>>2)/8] >> ((endstate>>2)%8)) & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi27_sse2(void *p){ + struct v27 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + + +#if 0 +/* This code is turned off because it's slower than my hand-crafted assembler in sse2bfly27.s. But it does work. */ +void update_viterbi27_blk_sse2(void *p,unsigned char *syms,int nbits){ + struct v27 *vp = p; + decision_t *d; + + if(p == NULL) + return; + d = (decision_t *)vp->dp; + while(nbits--){ + __m128i sym0v,sym1v; + void *tmp; + int i; + + /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ + sym0v = _mm_set1_epi8(syms[0]); + sym1v = _mm_set1_epi8(syms[1]); + syms += 2; + + for(i=0;i<2;i++){ + __m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics */ + metric = _mm_avg_epu8(_mm_xor_si128(Branchtab27_sse2[0].v[i],sym0v),_mm_xor_si128(Branchtab27_sse2[1].v[i],sym1v)); + /* There's no packed bytes right shift in SSE2, so we use the word version and mask + * (I'm *really* starting to like Altivec...) + */ + metric = _mm_srli_epi16(metric,3); + metric = _mm_and_si128(metric,_mm_set1_epi8(31)); + m_metric = _mm_sub_epi8(_mm_set1_epi8(31),metric); + + /* Add branch metrics to path metrics */ + m0 = _mm_add_epi8(vp->old_metrics->v[i],metric); + m3 = _mm_add_epi8(vp->old_metrics->v[2+i],metric); + m1 = _mm_add_epi8(vp->old_metrics->v[2+i],m_metric); + m2 = _mm_add_epi8(vp->old_metrics->v[i],m_metric); + + /* Compare and select, using modulo arithmetic */ + decision0 = _mm_cmpgt_epi8(_mm_sub_epi8(m0,m1),_mm_setzero_si128()); + decision1 = _mm_cmpgt_epi8(_mm_sub_epi8(m2,m3),_mm_setzero_si128()); + survivor0 = _mm_or_si128(_mm_and_si128(decision0,m1),_mm_andnot_si128(decision0,m0)); + survivor1 = _mm_or_si128(_mm_and_si128(decision1,m3),_mm_andnot_si128(decision1,m2)); + + /* Pack each set of decisions into 16 bits */ + d->s[2*i] = _mm_movemask_epi8(_mm_unpacklo_epi8(decision0,decision1)); + d->s[2*i+1] = _mm_movemask_epi8(_mm_unpackhi_epi8(decision0,decision1)); + + /* Store surviving metrics */ + vp->new_metrics->v[2*i] = _mm_unpacklo_epi8(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = _mm_unpackhi_epi8(survivor0,survivor1); + } + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; +} +#endif diff --git a/viterbi29.c b/viterbi29.c new file mode 100644 index 0000000..80cbb33 --- /dev/null +++ b/viterbi29.c @@ -0,0 +1,152 @@ +/* Switch to K=9 r=1/2 Viterbi decoder with optional Intel or PowerPC SIMD + * Copyright Feb 2004, Phil Karn, KA9Q + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include "fec.h" + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi29(int len){ + find_cpu_mode(); + + switch(Cpu_mode){ + case PORT: + default: + return create_viterbi29_port(len); +#ifdef __VEC__ + case ALTIVEC: + return create_viterbi29_av(len); +#endif +#ifdef __i386__ + case MMX: + return create_viterbi29_mmx(len); + case SSE: + return create_viterbi29_sse(len); + case SSE2: + return create_viterbi29_sse2(len); +#endif + } +} + +void set_viterbi29_polynomial(int polys[2]){ + switch(Cpu_mode){ + case PORT: + default: + set_viterbi29_polynomial_port(polys); + break; +#ifdef __VEC__ + case ALTIVEC: + set_viterbi29_polynomial_av(polys); + break; +#endif +#ifdef __i386__ + case MMX: + set_viterbi29_polynomial_mmx(polys); + break; + case SSE: + set_viterbi29_polynomial_sse(polys); + break; + case SSE2: + set_viterbi29_polynomial_sse2(polys); + break; +#endif + } +} + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi29(void *p,int starting_state){ + switch(Cpu_mode){ + case PORT: + default: + return init_viterbi29_port(p,starting_state); +#ifdef __VEC__ + case ALTIVEC: + return init_viterbi29_av(p,starting_state); +#endif +#ifdef __i386__ + case MMX: + return init_viterbi29_mmx(p,starting_state); + case SSE: + return init_viterbi29_sse(p,starting_state); + case SSE2: + return init_viterbi29_sse2(p,starting_state); +#endif + } +} + +/* Viterbi chainback */ +int chainback_viterbi29( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + + switch(Cpu_mode){ + case PORT: + default: + return chainback_viterbi29_port(p,data,nbits,endstate); +#ifdef __VEC__ + case ALTIVEC: + return chainback_viterbi29_av(p,data,nbits,endstate); +#endif +#ifdef __i386__ + case MMX: + return chainback_viterbi29_mmx(p,data,nbits,endstate); + case SSE: + return chainback_viterbi29_sse(p,data,nbits,endstate); + case SSE2: + return chainback_viterbi29_sse2(p,data,nbits,endstate); +#endif + } +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi29(void *p){ + switch(Cpu_mode){ + case PORT: + default: + delete_viterbi29_port(p); + break; +#ifdef __VEC__ + case ALTIVEC: + delete_viterbi29_av(p); + break; +#endif +#ifdef __i386__ + case MMX: + delete_viterbi29_mmx(p); + break; + case SSE: + delete_viterbi29_sse(p); + break; + case SSE2: + delete_viterbi29_sse2(p); + break; +#endif + } +} + +/* Update decoder with a block of demodulated symbols + * Note that nbits is the number of decoded data bits, not the number + * of symbols! + */ +int update_viterbi29_blk(void *p,unsigned char syms[],int nbits){ + switch(Cpu_mode){ + case PORT: + default: + return update_viterbi29_blk_port(p,syms,nbits); +#ifdef __VEC__ + case ALTIVEC: + return update_viterbi29_blk_av(p,syms,nbits); +#endif +#ifdef __i386__ + case MMX: + return update_viterbi29_blk_mmx(p,syms,nbits); + case SSE: + return update_viterbi29_blk_sse(p,syms,nbits); + case SSE2: + return update_viterbi29_blk_sse2(p,syms,nbits); +#endif + } +} diff --git a/viterbi29_av.c b/viterbi29_av.c new file mode 100644 index 0000000..31c8d27 --- /dev/null +++ b/viterbi29_av.c @@ -0,0 +1,190 @@ +/* K=9 r=1/2 Viterbi decoder for PowerPC G4/G5 Altivec + * Copyright Feb 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <sys/sysctl.h> +#include "fec.h" + +typedef union { unsigned char c[256]; vector bool char v[16]; } decision_t; +typedef union { unsigned char c[256]; vector unsigned char v[16]; } metric_t; + +static union branchtab29 { unsigned char c[128]; vector unsigned char v[8]; } Branchtab29[2]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v29 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi29_av(void *p,int starting_state){ + struct v29 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<16;i++) + vp->metrics1.v[i] = (vector unsigned char)(63); + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->c[starting_state & 255] = 0; /* Bias known start state */ + return 0; +} + +void set_viterbi29_polynomial_av(int polys[2]){ + int state; + + for(state=0;state < 128;state++){ + Branchtab29[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; + Branchtab29[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; + } + Init++; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi29_av(int len){ + struct v29 *vp; + + if(!Init){ + int polys[2] = { V29POLYA,V29POLYB }; + set_viterbi29_polynomial_av(polys); + } + if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL) + return NULL; + if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi29_av(vp,0); + return vp; +} + +/* Viterbi chainback */ +int chainback_viterbi29_av( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v29 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + d = (decision_t *)vp->decisions; + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 256; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 8; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = d[nbits].c[endstate] & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi29_av(void *p){ + struct v29 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + + +int update_viterbi29_blk_av(void *p,unsigned char *syms,int nbits){ + struct v29 *vp = p; + decision_t *d; + int i; + + if(p == NULL) + return -1; + d = (decision_t *)vp->dp; + + while(nbits--){ + vector unsigned char sym1v,sym2v; + void *tmp; + + /* All this seems necessary just to load a byte into all elements of a vector! */ + sym1v = vec_perm(vec_ld(0,syms),vec_ld(1,syms),vec_lvsl(0,syms)); /* sym1v.0 = syms[0]; sym1v.1 = syms[1] */ + sym2v = vec_splat(sym1v,1); /* Splat syms[1] across sym2v */ + sym1v = vec_splat(sym1v,0); /* Splat syms[0] across sym1v */ + syms += 2; + + for(i=0;i<8;i++){ + vector bool char decision0,decision1; + vector unsigned char metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics */ + metric = vec_avg(vec_xor(Branchtab29[0].v[i],sym1v),vec_xor(Branchtab29[1].v[i],sym2v)); + metric = vec_sr(metric,(vector unsigned char)(3)); + m_metric = (vector unsigned char)(31) - metric; + + /* Add branch metrics to path metrics */ + m0 = vec_adds(vp->old_metrics->v[i],metric); + m3 = vec_adds(vp->old_metrics->v[8+i],metric); + m1 = vec_adds(vp->old_metrics->v[8+i],m_metric); + m2 = vec_adds(vp->old_metrics->v[i],m_metric); + + /* Compare and select first set */ + decision0 = vec_cmpgt(m0,m1); + decision1 = vec_cmpgt(m2,m3); + survivor0 = vec_min(m0,m1); + survivor1 = vec_min(m2,m3); + + /* Interleave and store decisions and survivors */ + d->v[2*i] = vec_mergeh(decision0,decision1); + d->v[2*i+1] = vec_mergel(decision0,decision1); + vp->new_metrics->v[2*i] = vec_mergeh(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = vec_mergel(survivor0,survivor1); + } + d++; + /* renormalize if necessary */ + if(vp->new_metrics->c[0] >= 50){ + int i; + vector unsigned char scale0,scale1; + + /* Find smallest metric and splat */ + scale0 = vp->new_metrics->v[0]; + scale1 = vp->new_metrics->v[1]; + for(i=2;i<16;i+=2){ + scale0 = vec_min(scale0,vp->new_metrics->v[i]); + scale1 = vec_min(scale1,vp->new_metrics->v[i+1]); + } + scale0 = vec_min(scale0,scale1); + scale0 = vec_min(scale0,vec_sld(scale0,scale0,8)); + scale0 = vec_min(scale0,vec_sld(scale0,scale0,4)); + scale0 = vec_min(scale0,vec_sld(scale0,scale0,2)); + scale0 = vec_min(scale0,vec_sld(scale0,scale0,1)); + + /* Now subtract from all metrics */ + for(i=0;i<16;i++) + vp->new_metrics->v[i] = vec_subs(vp->new_metrics->v[i],scale0); + } + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + return 0; +} diff --git a/viterbi29_mmx.c b/viterbi29_mmx.c new file mode 100644 index 0000000..563f40a --- /dev/null +++ b/viterbi29_mmx.c @@ -0,0 +1,118 @@ +/* K=9 r=1/2 Viterbi decoder for MMX + * Copyright Feb 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <mmintrin.h> +#include "fec.h" + +typedef union { char c[256]; __m64 v[32];} decision_t; +typedef union { unsigned char c[256]; __m64 v[32];} metric_t; + +unsigned char Mettab29_1[256][128] __attribute__ ((aligned(8))); +unsigned char Mettab29_2[256][128] __attribute__ ((aligned(8))); +static int Init = 0; + +/* State info for instance of Viterbi decoder + * Don't change this without also changing references in mmxbfly29.s! + */ +struct v29 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi29_mmx(int len){ + struct v29 *vp; + + if(Init == 0){ + int polys[2] = {V29POLYA,V29POLYB}; + + set_viterbi29_polynomial_mmx(polys); + } + if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL) + return NULL; + + if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi29(vp,0); + return vp; +} + +void set_viterbi29_polynomial_mmx(int polys[2]){ + int state; + + for(state=0;state < 128;state++){ + int symbol; + + for(symbol = 0;symbol < 256;symbol++){ + int sym; + + sym = parity((2*state) & abs(polys[0])) ^ (polys[0] < 0); + Mettab29_1[symbol][state] = (sym ? (255-symbol):symbol) / 16; + + sym = parity((2*state) & abs(polys[1])) ^ (polys[1] < 0); + Mettab29_2[symbol][state] = (sym ? (255-symbol):symbol) / 16; + } + } + Init++; +} + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi29_mmx(void *p,int starting_state){ + struct v29 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<256;i++) + vp->metrics1.c[i] = 63; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->c[starting_state & 255] = 0; /* Bias known start state */ + return 0; +} + +/* Viterbi chainback */ +int chainback_viterbi29_mmx( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + + struct v29 *vp = (struct v29 *)p; + decision_t *d; + + if(p == NULL) + return -1; + + d = (decision_t *)vp->decisions; + endstate &= 255; + d += 8; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = d[nbits].c[endstate] & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi29_mmx(void *p){ + struct v29 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} diff --git a/viterbi29_port.c b/viterbi29_port.c new file mode 100644 index 0000000..292dce8 --- /dev/null +++ b/viterbi29_port.c @@ -0,0 +1,166 @@ +/* K=9 r=1/2 Viterbi decoder in portable C + * Copyright Feb 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include "fec.h" + +typedef union { unsigned int w[256]; } metric_t; +typedef union { unsigned long w[8];} decision_t; + +static union { unsigned char c[128]; } Branchtab29[2]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v29 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi29_port(void *p,int starting_state){ + struct v29 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<256;i++) + vp->metrics1.w[i] = 63; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */ + return 0; +} + +void set_viterbi29_polynomial_port(int polys[2]){ + int state; + + for(state=0;state < 128;state++){ + Branchtab29[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; + Branchtab29[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; + } + Init++; +} + + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi29_port(int len){ + struct v29 *vp; + + if(!Init){ + int polys[2] = {V29POLYA,V29POLYB}; + set_viterbi29_polynomial_port(polys); + } + if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL) + return NULL; + + if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi29_port(vp,0); + + return vp; +} + + +/* Viterbi chainback */ +int chainback_viterbi29_port( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v29 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + + d = vp->decisions; + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 256; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 8; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].w[(endstate)/32] >> (endstate%32)) & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi29_port(void *p){ + struct v29 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + +/* C-language butterfly */ +#define BFLY(i) {\ +unsigned int metric,m0,m1,decision;\ + metric = (Branchtab29[0].c[i] ^ sym0) + (Branchtab29[1].c[i] ^ sym1);\ + m0 = vp->old_metrics->w[i] + metric;\ + m1 = vp->old_metrics->w[i+128] + (510 - metric);\ + decision = (signed int)(m0-m1) > 0;\ + vp->new_metrics->w[2*i] = decision ? m1 : m0;\ + d->w[i/16] |= decision << ((2*i)&31);\ + m0 -= (metric+metric-510);\ + m1 += (metric+metric-510);\ + decision = (signed int)(m0-m1) > 0;\ + vp->new_metrics->w[2*i+1] = decision ? m1 : m0;\ + d->w[i/16] |= decision << ((2*i+1)&31);\ +} + +/* Update decoder with a block of demodulated symbols + * Note that nbits is the number of decoded data bits, not the number + * of symbols! + */ + +int update_viterbi29_blk_port(void *p,unsigned char *syms,int nbits){ + struct v29 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + + d = (decision_t *)vp->dp; + while(nbits--){ + void *tmp; + unsigned char sym0,sym1; + int i; + + for(i=0;i<8;i++) + d->w[i] = 0; + sym0 = *syms++; + sym1 = *syms++; + + for(i=0;i<128;i++) + BFLY(i); + + d++; + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + return 0; +} diff --git a/viterbi29_sse.c b/viterbi29_sse.c new file mode 100644 index 0000000..4a92e5f --- /dev/null +++ b/viterbi29_sse.c @@ -0,0 +1,114 @@ +/* K=9 r=1/2 Viterbi decoder for SSE + * Copyright Feb 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <xmmintrin.h> +#include "fec.h" + +typedef union { unsigned char w[256]; __m64 v[32];} metric_t; +typedef union { unsigned long w[8]; unsigned char c[32]; __m64 v[4];} decision_t; + +union branchtab29 { unsigned char c[128]; } Branchtab29_sse[2]; +static int Init = 0; + +/* State info for instance of Viterbi decoder + * Don't change this without also changing references in [mmx|sse|sse2]bfly29.s! + */ +struct v29 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi29_sse(int len){ + struct v29 *vp; + + if(!Init){ + int polys[2] = { V29POLYA,V29POLYB }; + + set_viterbi29_polynomial_sse(polys); + } + if((vp = (struct v29 *)malloc(sizeof(struct v29))) == NULL) + return NULL; + if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi29(vp,0); + return vp; +} + +void set_viterbi29_polynomial_sse(int polys[2]){ + int state; + + for(state=0;state < 128;state++){ + Branchtab29_sse[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; + Branchtab29_sse[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; + } + Init++; +} + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi29_sse(void *p,int starting_state){ + struct v29 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<256;i++) + vp->metrics1.w[i] = 200; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */ + return 0; +} + +/* Viterbi chainback */ +int chainback_viterbi29_sse( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v29 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + d = vp->decisions; + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 256; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 8; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi29_sse(void *p){ + struct v29 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} diff --git a/viterbi29_sse2.c b/viterbi29_sse2.c new file mode 100644 index 0000000..4c7336c --- /dev/null +++ b/viterbi29_sse2.c @@ -0,0 +1,119 @@ +/* K=9 r=1/2 Viterbi decoder for SSE2 + * Copyright Feb 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <emmintrin.h> +#include "fec.h" + +typedef union { unsigned char c[256]; __m128i v[16];} metric_t; +typedef union { unsigned long w[8]; unsigned char c[32];} decision_t; + +union branchtab29 { unsigned char c[128]; } Branchtab29_sse2[2]; +static int Init = 0; + +/* State info for instance of Viterbi decoder + * Don't change this without also changing references in sse2bfly29.s! + */ +struct v29 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi29_sse2(void *p,int starting_state){ + struct v29 *vp = p; + int i; + + for(i=0;i<256;i++) + vp->metrics1.c[i] = 63; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->c[starting_state & 255] = 0; /* Bias known start state */ + return 0; +} + +void set_viterbi29_polynomial_sse2(int polys[2]){ + int state; + + for(state=0;state < 128;state++){ + Branchtab29_sse2[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; + Branchtab29_sse2[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; + } + Init++; +} + + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi29_sse2(int len){ + void *p; + struct v29 *vp; + + if(!Init){ + int polys[2] = {V29POLYA,V29POLYB}; + + set_viterbi29_polynomial(polys); + } + /* Ordinary malloc() only returns 8-byte alignment, we need 16 */ + if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v29))) + return NULL; + vp = (struct v29 *)p; + if((p = malloc((len+8)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + vp->decisions = (decision_t *)p; + init_viterbi29_sse2(vp,0); + return vp; +} + + +/* Viterbi chainback */ +int chainback_viterbi29_sse2( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v29 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + d = vp->decisions; + + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 256; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 8; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi29_sse2(void *p){ + struct v29 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} diff --git a/viterbi39.c b/viterbi39.c new file mode 100644 index 0000000..ac28c2c --- /dev/null +++ b/viterbi39.c @@ -0,0 +1,153 @@ +/* Switch to K=9 r=1/3 Viterbi decoder with optional Intel or PowerPC SIMD + * Copyright Aug 2006, Phil Karn, KA9Q + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include "fec.h" + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi39(int len){ + find_cpu_mode(); + + switch(Cpu_mode){ + case PORT: + default: + return create_viterbi39_port(len); +#ifdef __VEC__ + case ALTIVEC: + return create_viterbi39_av(len); +#endif +#ifdef __i386__ + case MMX: + return create_viterbi39_mmx(len); + case SSE: + return create_viterbi39_sse(len); + case SSE2: + return create_viterbi39_sse2(len); +#endif + } +} + +void set_viterbi39_polynomial(int polys[3]){ + switch(Cpu_mode){ + case PORT: + default: + set_viterbi39_polynomial_port(polys); + break; +#ifdef __VEC__ + case ALTIVEC: + set_viterbi39_polynomial_av(polys); + break; +#endif +#ifdef __i386__ + case MMX: + set_viterbi39_polynomial_mmx(polys); + break; + case SSE: + set_viterbi39_polynomial_sse(polys); + break; + case SSE2: + set_viterbi39_polynomial_sse2(polys); + break; +#endif + } +} + + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi39(void *p,int starting_state){ + switch(Cpu_mode){ + case PORT: + default: + return init_viterbi39_port(p,starting_state); +#ifdef __VEC__ + case ALTIVEC: + return init_viterbi39_av(p,starting_state); +#endif +#ifdef __i386__ + case MMX: + return init_viterbi39_mmx(p,starting_state); + case SSE: + return init_viterbi39_sse(p,starting_state); + case SSE2: + return init_viterbi39_sse2(p,starting_state); +#endif + } +} + +/* Viterbi chainback */ +int chainback_viterbi39( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + + switch(Cpu_mode){ + case PORT: + default: + return chainback_viterbi39_port(p,data,nbits,endstate); +#ifdef __VEC__ + case ALTIVEC: + return chainback_viterbi39_av(p,data,nbits,endstate); +#endif +#ifdef __i386__ + case MMX: + return chainback_viterbi39_mmx(p,data,nbits,endstate); + case SSE: + return chainback_viterbi39_sse(p,data,nbits,endstate); + case SSE2: + return chainback_viterbi39_sse2(p,data,nbits,endstate); +#endif + } +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi39(void *p){ + switch(Cpu_mode){ + case PORT: + default: + delete_viterbi39_port(p); + break; +#ifdef __VEC__ + case ALTIVEC: + delete_viterbi39_av(p); + break; +#endif +#ifdef __i386__ + case MMX: + delete_viterbi39_mmx(p); + break; + case SSE: + delete_viterbi39_sse(p); + break; + case SSE2: + delete_viterbi39_sse2(p); + break; +#endif + } +} + +/* Update decoder with a block of demodulated symbols + * Note that nbits is the number of decoded data bits, not the number + * of symbols! + */ +int update_viterbi39_blk(void *p,unsigned char syms[],int nbits){ + switch(Cpu_mode){ + case PORT: + default: + return update_viterbi39_blk_port(p,syms,nbits); +#ifdef __VEC__ + case ALTIVEC: + return update_viterbi39_blk_av(p,syms,nbits); +#endif +#ifdef __i386__ + case MMX: + return update_viterbi39_blk_mmx(p,syms,nbits); + case SSE: + return update_viterbi39_blk_sse(p,syms,nbits); + case SSE2: + return update_viterbi39_blk_sse2(p,syms,nbits); +#endif + } +} diff --git a/viterbi39_av.c b/viterbi39_av.c new file mode 100644 index 0000000..2deed51 --- /dev/null +++ b/viterbi39_av.c @@ -0,0 +1,251 @@ +/* K=9 r=1/3 Viterbi decoder for PowerPC G4/G5 Altivec vector instructions + * 8-bit offset-binary soft decision samples + * Copyright Aug 2006, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <limits.h> +#include "fec.h" + +typedef union { unsigned char c[2][16]; vector unsigned char v[2]; } decision_t; +typedef union { unsigned short s[256]; vector unsigned short v[32]; } metric_t; + +static union branchtab39 { unsigned short s[128]; vector unsigned short v[16];} Branchtab39[3]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v39 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + void *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + void *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi39_av(void *p,int starting_state){ + struct v39 *vp = p; + int i; + + for(i=0;i<32;i++) + vp->metrics1.v[i] = (vector unsigned short)(1000); + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->s[starting_state & 255] = 0; /* Bias known start state */ + return 0; +} + +void set_viterbi39_polynomial_av(int polys[3]){ + int state; + + for(state=0;state < 128;state++){ + Branchtab39[0].s[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; + Branchtab39[1].s[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; + Branchtab39[2].s[state] = (polys[2] < 0) ^ parity((2*state) & abs(polys[2])) ? 255 : 0; + } + Init++; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi39_av(int len){ + struct v39 *vp; + + if(!Init){ + int polys[3] = { V39POLYA, V39POLYB, V39POLYC }; + + set_viterbi39_polynomial_av(polys); + } + vp = (struct v39 *)malloc(sizeof(struct v39)); + vp->decisions = malloc(sizeof(decision_t)*(len+8)); + init_viterbi39_av(vp,0); + return vp; +} + +/* Viterbi chainback */ +int chainback_viterbi39_av( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v39 *vp = p; + decision_t *d = (decision_t *)vp->decisions; + int path_metric; + + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 256; + + path_metric = vp->old_metrics->s[endstate]; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 8; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].c[endstate >> 7][endstate & 15] & (0x80 >> ((endstate>>4)&7)) ) ? 1 : 0; + endstate = (k << 7) | (endstate >> 1); + data[nbits>>3] = endstate; + } + return path_metric; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi39_av(void *p){ + struct v39 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + +int update_viterbi39_blk_av(void *p,unsigned char *syms,int nbits){ + struct v39 *vp = p; + decision_t *d = (decision_t *)vp->dp; + int path_metric = 0; + vector unsigned char decisions = (vector unsigned char)(0); + + while(nbits--){ + vector unsigned short symv,sym0v,sym1v,sym2v; + vector unsigned char s; + void *tmp; + int i; + + /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ + s = (vector unsigned char)vec_perm(vec_ld(0,syms),vec_ld(5,syms),vec_lvsl(0,syms)); + + symv = (vector unsigned short)vec_mergeh((vector unsigned char)(0),s); /* Unsigned byte->word unpack */ + sym0v = vec_splat(symv,0); + sym1v = vec_splat(symv,1); + sym2v = vec_splat(symv,2); + syms += 3; + + for(i=0;i<16;i++){ + vector bool short decision0,decision1; + vector unsigned short metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics + * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, + * the XOR operations constitute conditional negation. + * the metrics are in the range 0-765 + */ + m0 = vec_add(vec_xor(Branchtab39[0].v[i],sym0v),vec_xor(Branchtab39[1].v[i],sym1v)); + m1 = vec_xor(Branchtab39[2].v[i],sym2v); + metric = vec_add(m0,m1); + m_metric = vec_sub((vector unsigned short)(765),metric); + + /* Add branch metrics to path metrics */ + m0 = vec_adds(vp->old_metrics->v[i],metric); + m3 = vec_adds(vp->old_metrics->v[16+i],metric); + m1 = vec_adds(vp->old_metrics->v[16+i],m_metric); + m2 = vec_adds(vp->old_metrics->v[i],m_metric); + + /* Compare and select */ + decision0 = vec_cmpgt(m0,m1); + decision1 = vec_cmpgt(m2,m3); + survivor0 = vec_min(m0,m1); + survivor1 = vec_min(m2,m3); + + /* Store decisions and survivors. + * To save space without SSE2's handy PMOVMSKB instruction, we pack and store them in + * a funny interleaved fashion that we undo in the chainback function. + */ + decisions = vec_add(decisions,decisions); /* Shift each byte 1 bit to the left */ + + /* Booleans are either 0xff or 0x00. Subtracting 0x00 leaves the lsb zero; subtracting + * 0xff is equivalent to adding 1, which sets the lsb. + */ + decisions = vec_sub(decisions,(vector unsigned char)vec_pack(vec_mergeh(decision0,decision1),vec_mergel(decision0,decision1))); + + vp->new_metrics->v[2*i] = vec_mergeh(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = vec_mergel(survivor0,survivor1); + + if((i % 8) == 7){ + /* We've accumulated a total of 128 decisions, stash and start again */ + d->v[i>>3] = decisions; /* No need to clear, the new bits will replace the old */ + } + } +#if 0 + /* Experimentally determine metric spread + * The results are fixed for a given code and input symbol size + */ + { + int i; + vector unsigned short min_metric; + vector unsigned short max_metric; + union { vector unsigned short v; unsigned short s[8];} t; + int minimum,maximum; + static int max_spread = 0; + + min_metric = max_metric = vp->new_metrics->v[0]; + for(i=1;i<32;i++){ + min_metric = vec_min(min_metric,vp->new_metrics->v[i]); + max_metric = vec_max(max_metric,vp->new_metrics->v[i]); + } + min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,8)); + max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,8)); + min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,4)); + max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,4)); + min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,2)); + max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,2)); + + t.v = min_metric; + minimum = t.s[0]; + t.v = max_metric; + maximum = t.s[0]; + if(maximum-minimum > max_spread){ + max_spread = maximum-minimum; + printf("metric spread = %d\n",max_spread); + } + } +#endif + + /* Renormalize if necessary. This deserves some explanation. + * The maximum possible spread, found by experiment, for 8 bit symbols is about 3825 + * So by looking at one arbitrary metric we can tell if any of them have possibly saturated. + * However, this is very conservative. Large spreads occur only at very high Eb/No, where + * saturating a bad path metric doesn't do much to increase its chances of being erroneously chosen as a survivor. + + * At more interesting (low) Eb/No ratios, the spreads are much smaller so our chances of saturating a metric + * by not not normalizing when we should are extremely low. So either way, the risk to performance is small. + + * All this is borne out by experiment. + */ + if(vp->new_metrics->s[0] >= USHRT_MAX-5000){ + vector unsigned short scale; + union { vector unsigned short v; unsigned short s[8];} t; + + /* Find smallest metric and splat */ + scale = vp->new_metrics->v[0]; + for(i=1;i<32;i++) + scale = vec_min(scale,vp->new_metrics->v[i]); + + scale = vec_min(scale,vec_sld(scale,scale,8)); + scale = vec_min(scale,vec_sld(scale,scale,4)); + scale = vec_min(scale,vec_sld(scale,scale,2)); + + /* Subtract it from all metrics + * Work backwards to try to improve the cache hit ratio, assuming LRU + */ + for(i=31;i>=0;i--) + vp->new_metrics->v[i] = vec_subs(vp->new_metrics->v[i],scale); + t.v = scale; + path_metric += t.s[0]; + } + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + return path_metric; +} diff --git a/viterbi39_mmx.c b/viterbi39_mmx.c new file mode 100644 index 0000000..875391a --- /dev/null +++ b/viterbi39_mmx.c @@ -0,0 +1,185 @@ +/* K=9 r=1/3 Viterbi decoder for x86 MMX + * Aug 2006, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <mmintrin.h> +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include "fec.h" + +typedef union { unsigned char c[256]; __m64 v[32];} decision_t; +typedef union { unsigned short s[256]; __m64 v[64];} metric_t; + +static union branchtab39 { unsigned short s[128]; __m64 v[32];} Branchtab39[3]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v39 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + void *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + void *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi39_mmx(void *p,int starting_state){ + struct v39 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<256;i++) + vp->metrics1.s[i] = 1000; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->s[starting_state & 255] = 0; /* Bias known start state */ + return 0; +} + +void set_viterbi39_polynomial_mmx(int polys[3]){ + int state; + + for(state=0;state < 128;state++){ + Branchtab39[0].s[state] = (polys[0] < 0) ^ parity((2*state) & polys[0]) ? 255:0; + Branchtab39[1].s[state] = (polys[1] < 0) ^ parity((2*state) & polys[1]) ? 255:0; + Branchtab39[2].s[state] = (polys[2] < 0) ^ parity((2*state) & polys[2]) ? 255:0; + } + Init++; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi39_mmx(int len){ + struct v39 *vp; + + if(!Init){ + int polys[3] = { V39POLYA,V39POLYB,V39POLYC }; + set_viterbi39_polynomial_mmx(polys); + } + if((vp = (struct v39 *)malloc(sizeof(struct v39))) == NULL) + return NULL; + if((vp->decisions = malloc((len+8)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi39_mmx(vp,0); + return vp; +} + + + +/* Viterbi chainback */ +int chainback_viterbi39_mmx( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v39 *vp = p; + decision_t *d; + int path_metric; + + if(p == NULL) + return -1; + + d = (decision_t *)vp->decisions; + + endstate %= 256; + + path_metric = vp->old_metrics->s[endstate]; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 8; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = d[nbits].c[endstate] & 1; + endstate = (k << 7) | (endstate >> 1); + data[nbits>>3] = endstate; + } + return path_metric; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi39_mmx(void *p){ + struct v39 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + + +int update_viterbi39_blk_mmx(void *p,unsigned char *syms,int nbits){ + struct v39 *vp = p; + decision_t *d; + int path_metric = 0; + + if(p == NULL) + return -1; + + d = (decision_t *)vp->dp; + + while(nbits--){ + __m64 sym0v,sym1v,sym2v; + void *tmp; + int i; + + /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ + sym0v = _mm_set1_pi16(syms[0]); + sym1v = _mm_set1_pi16(syms[1]); + sym2v = _mm_set1_pi16(syms[2]); + syms += 3; + + for(i=0;i<32;i++){ + __m64 decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics + * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, + * the XOR operations constitute conditional negation. + * metric and m_metric (-metric) are in the range 0-1530 + */ + m0 = _mm_add_pi16(_mm_xor_si64(Branchtab39[0].v[i],sym0v),_mm_xor_si64(Branchtab39[1].v[i],sym1v)); + metric = _mm_add_pi16(_mm_xor_si64(Branchtab39[2].v[i],sym2v),m0); + m_metric = _mm_sub_pi16(_mm_set1_pi16(765),metric); + + /* Add branch metrics to path metrics */ + m0 = _mm_add_pi16(vp->old_metrics->v[i],metric); + m3 = _mm_add_pi16(vp->old_metrics->v[32+i],metric); + m1 = _mm_add_pi16(vp->old_metrics->v[32+i],m_metric); + m2 = _mm_add_pi16(vp->old_metrics->v[i],m_metric); + + /* Compare and select + * There's no packed min instruction in MMX, so we use modulo arithmetic + * to form the decisions and then do the select the hard way + */ + decision0 = _mm_cmpgt_pi16(_mm_sub_pi16(m0,m1),_mm_setzero_si64()); + decision1 = _mm_cmpgt_pi16(_mm_sub_pi16(m2,m3),_mm_setzero_si64()); + survivor0 = _mm_or_si64(_mm_and_si64(decision0,m1),_mm_andnot_si64(decision0,m0)); + survivor1 = _mm_or_si64(_mm_and_si64(decision1,m3),_mm_andnot_si64(decision1,m2)); + + /* Merge decisions and store as bytes */ + d->v[i] = _mm_unpacklo_pi8(_mm_packs_pi16(decision0,_mm_setzero_si64()),_mm_packs_pi16(decision1,_mm_setzero_si64())); + + /* Store surviving metrics */ + vp->new_metrics->v[2*i] = _mm_unpacklo_pi16(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = _mm_unpackhi_pi16(survivor0,survivor1); + } + if(vp->new_metrics->s[0] < vp->old_metrics->s[0]) + path_metric += 65536; /* Hack: wraparound probably occured */ + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + _mm_empty(); + return path_metric; +} diff --git a/viterbi39_port.c b/viterbi39_port.c new file mode 100644 index 0000000..5685c90 --- /dev/null +++ b/viterbi39_port.c @@ -0,0 +1,168 @@ +/* K=9 r=1/3 Viterbi decoder in portable C + * Copyright Aug 2006, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include "fec.h" + +typedef union { unsigned int w[256]; } metric_t; +typedef union { unsigned long w[8];} decision_t; + +static union { unsigned char c[128]; } Branchtab39[3]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v39 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi39_port(void *p,int starting_state){ + struct v39 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<256;i++) + vp->metrics1.w[i] = 63; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */ + return 0; +} + +void set_viterbi39_polynomial_port(int polys[3]){ + int state; + + for(state=0;state < 128;state++){ + Branchtab39[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0; + Branchtab39[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0; + Branchtab39[2].c[state] = (polys[2] < 0) ^ parity((2*state) & abs(polys[2])) ? 255 : 0; + } + Init++; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi39_port(int len){ + struct v39 *vp; + + if(!Init){ + int polys[3] = {V39POLYA,V39POLYB,V39POLYC}; + set_viterbi39_polynomial_port(polys); + } + if((vp = (struct v39 *)malloc(sizeof(struct v39))) == NULL) + return NULL; + + if((vp->decisions = (decision_t *)malloc((len+8)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi39_port(vp,0); + + return vp; +} + + +/* Viterbi chainback */ +int chainback_viterbi39_port( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v39 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + + d = vp->decisions; + /* Make room beyond the end of the encoder register so we can + * accumulate a full byte of decoded data + */ + endstate %= 256; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 8; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].w[(endstate)/32] >> (endstate%32)) & 1; + data[nbits>>3] = endstate = (endstate >> 1) | (k << 7); + } + return 0; +} + + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi39_port(void *p){ + struct v39 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + +/* C-language butterfly */ +#define BFLY(i) {\ +unsigned int metric,m0,m1,decision;\ + metric = (Branchtab39[0].c[i] ^ sym0) + (Branchtab39[1].c[i] ^ sym1) + \ + (Branchtab39[2].c[i] ^ sym2);\ + m0 = vp->old_metrics->w[i] + metric;\ + m1 = vp->old_metrics->w[i+128] + (765 - metric);\ + decision = (signed int)(m0-m1) > 0;\ + vp->new_metrics->w[2*i] = decision ? m1 : m0;\ + d->w[i/16] |= decision << ((2*i)&31);\ + m0 -= (metric+metric-765);\ + m1 += (metric+metric-765);\ + decision = (signed int)(m0-m1) > 0;\ + vp->new_metrics->w[2*i+1] = decision ? m1 : m0;\ + d->w[i/16] |= decision << ((2*i+1)&31);\ +} + +/* Update decoder with a block of demodulated symbols + * Note that nbits is the number of decoded data bits, not the number + * of symbols! + */ + +int update_viterbi39_blk_port(void *p,unsigned char *syms,int nbits){ + struct v39 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + + d = (decision_t *)vp->dp; + while(nbits--){ + void *tmp; + unsigned char sym0,sym1,sym2; + int i; + + for(i=0;i<8;i++) + d->w[i] = 0; + sym0 = *syms++; + sym1 = *syms++; + sym2 = *syms++; + + for(i=0;i<128;i++) + BFLY(i); + + d++; + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + return 0; +} diff --git a/viterbi39_sse.c b/viterbi39_sse.c new file mode 100644 index 0000000..c2f2865 --- /dev/null +++ b/viterbi39_sse.c @@ -0,0 +1,201 @@ +/* K=9 r=1/3 Viterbi decoder for x86 SSE + * Copyright Aug 2006, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <xmmintrin.h> +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <limits.h> +#include "fec.h" + +typedef union { unsigned long w[8]; unsigned char c[32];} decision_t; +typedef union { signed short s[256]; __m64 v[64];} metric_t; + +static union branchtab39 { unsigned short s[128]; __m64 v[32];} Branchtab39[3]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v39 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + void *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + void *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi39_sse(void *p,int starting_state){ + struct v39 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<256;i++) + vp->metrics1.s[i] = (SHRT_MIN+1000); + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->s[starting_state & 255] = SHRT_MIN; /* Bias known start state */ + return 0; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi39_sse(int len){ + struct v39 *vp; + + if(!Init){ + int polys[3] = { V39POLYA, V39POLYB, V39POLYC }; + + set_viterbi39_polynomial_sse(polys); + } + if((vp = (struct v39 *)malloc(sizeof(struct v39))) == NULL){ + return NULL; + } + if((vp->decisions = malloc((len+8)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi39_sse(vp,0); + return vp; +} + +void set_viterbi39_polynomial_sse(int polys[3]){ + int state; + + for(state=0;state < 128;state++){ + Branchtab39[0].s[state] = (polys[0] < 0) ^ parity((2*state) & polys[0]) ? 255:0; + Branchtab39[1].s[state] = (polys[1] < 0) ^ parity((2*state) & polys[1]) ? 255:0; + Branchtab39[2].s[state] = (polys[2] < 0) ^ parity((2*state) & polys[2]) ? 255:0; + } + Init++; +} + +/* Viterbi chainback */ +int chainback_viterbi39_sse( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v39 *vp = p; + decision_t *d; + int path_metric; + + if(p == NULL) + return -1; + d = (decision_t *)vp->decisions; + endstate %= 256; + + path_metric = vp->old_metrics->s[endstate]; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 8; /* Look past tail */ + while(nbits-- != 0){ + int k; + + /* k = (d[nbits].w[endstate/32] >> (endstate%32)) & 1;*/ + k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1; + endstate = (k << 7) | (endstate >> 1); + data[nbits>>3] = endstate; + } + return path_metric - SHRT_MIN; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi39_sse(void *p){ + struct v39 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + + +int update_viterbi39_blk_sse(void *p,unsigned char *syms,int nbits){ + struct v39 *vp = p; + decision_t *d; + int path_metric = 0; + + if(p == NULL) + return -1; + d = (decision_t *)vp->dp; + while(nbits--){ + __m64 sym0v,sym1v,sym2v; + void *tmp; + int i; + + /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ + sym0v = _mm_set1_pi16(syms[0]); + sym1v = _mm_set1_pi16(syms[1]); + sym2v = _mm_set1_pi16(syms[2]); + syms += 3; + + for(i=0;i<32;i++){ + __m64 decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics + * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, + * the XOR operations constitute conditional negation. + * metric and m_metric (-metric) are in the range 0-765 + */ + m0 = _mm_add_pi16(_mm_xor_si64(Branchtab39[0].v[i],sym0v),_mm_xor_si64(Branchtab39[1].v[i],sym1v)); + metric = _mm_add_pi16(_mm_xor_si64(Branchtab39[2].v[i],sym2v),m0); + m_metric = _mm_sub_pi16(_mm_set1_pi16(765),metric); + + /* Add branch metrics to path metrics */ + m0 = _mm_adds_pi16(vp->old_metrics->v[i],metric); + m3 = _mm_adds_pi16(vp->old_metrics->v[32+i],metric); + m1 = _mm_adds_pi16(vp->old_metrics->v[32+i],m_metric); + m2 = _mm_adds_pi16(vp->old_metrics->v[i],m_metric); + + /* Compare and select */ + survivor0 = _mm_min_pi16(m0,m1); + survivor1 = _mm_min_pi16(m2,m3); + decision0 = _mm_cmpeq_pi16(survivor0,m1); + decision1 = _mm_cmpeq_pi16(survivor1,m3); + + /* Pack decisions into 8 bits and store */ + d->c[i] = _mm_movemask_pi8(_mm_unpacklo_pi8(_mm_packs_pi16(decision0,_mm_setzero_si64()),_mm_packs_pi16(decision1,_mm_setzero_si64()))); + + /* Store surviving metrics */ + vp->new_metrics->v[2*i] = _mm_unpacklo_pi16(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = _mm_unpackhi_pi16(survivor0,survivor1); + } + /* See if we need to renormalize + * Max metric spread for this code with 0-255 branch metrics is 12750 + */ + if(vp->new_metrics->s[0] >= SHRT_MAX-5000){ + int i,adjust; + __m64 adjustv; + union { __m64 v; signed short w[4]; } t; + + /* Find smallest metric and set adjustv to bring it down to SHRT_MIN */ + adjustv = vp->new_metrics->v[0]; + for(i=1;i<64;i++) + adjustv = _mm_min_pi16(adjustv,vp->new_metrics->v[i]); + + adjustv = _mm_min_pi16(adjustv,_mm_srli_si64(adjustv,32)); + adjustv = _mm_min_pi16(adjustv,_mm_srli_si64(adjustv,16)); + t.v = adjustv; + adjust = t.w[0] - SHRT_MIN; + path_metric += adjust; + adjustv = _mm_set1_pi16(adjust); + + for(i=0;i<64;i++) + vp->new_metrics->v[i] = _mm_sub_pi16(vp->new_metrics->v[i],adjustv); + } + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + _mm_empty(); + return path_metric; +} diff --git a/viterbi39_sse2.c b/viterbi39_sse2.c new file mode 100644 index 0000000..f13794e --- /dev/null +++ b/viterbi39_sse2.c @@ -0,0 +1,200 @@ +/* K=15 r=1/6 Viterbi decoder for x86 SSE2 + * Copyright Mar 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <emmintrin.h> +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <limits.h> +#include "fec.h" + +typedef union { unsigned long w[8]; unsigned short s[16];} decision_t; +typedef union { signed short s[256]; __m128i v[32];} metric_t; + +static union branchtab39 { unsigned short s[128]; __m128i v[16];} Branchtab39[3]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v39 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + void *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + void *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi39_sse2(void *p,int starting_state){ + struct v39 *vp = p; + int i; + + for(i=0;i<256;i++) + vp->metrics1.s[i] = (SHRT_MIN+1000); + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->s[starting_state & 255] = SHRT_MIN; /* Bias known start state */ + return 0; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi39_sse2(int len){ + void *p; + struct v39 *vp; + + if(!Init){ + int polys[3] = { V39POLYA, V39POLYB, V39POLYC }; + + set_viterbi39_polynomial_sse2(polys); + } + /* Ordinary malloc() only returns 8-byte alignment, we need 16 */ + if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v39))) + return NULL; + + vp = (struct v39 *)p; + if((p = malloc((len+8)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + vp->decisions = (decision_t *)p; + init_viterbi39_sse2(vp,0); + return vp; +} + +void set_viterbi39_polynomial_sse2(int polys[3]){ + int state; + + for(state=0;state < 128;state++){ + Branchtab39[0].s[state] = (polys[0] < 0) ^ parity((2*state) & polys[0]) ? 255:0; + Branchtab39[1].s[state] = (polys[1] < 0) ^ parity((2*state) & polys[1]) ? 255:0; + Branchtab39[2].s[state] = (polys[2] < 0) ^ parity((2*state) & polys[2]) ? 255:0; + } + Init++; +} + +/* Viterbi chainback */ +int chainback_viterbi39_sse2( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v39 *vp = p; + decision_t *d = (decision_t *)vp->decisions; + int path_metric; + + endstate %= 256; + + path_metric = vp->old_metrics->s[endstate]; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 8; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].w[endstate/32] >> (endstate%32)) & 1; + endstate = (k << 7) | (endstate >> 1); + data[nbits>>3] = endstate; + } + return path_metric; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi39_sse2(void *p){ + struct v39 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + + +int update_viterbi39_blk_sse2(void *p,unsigned char *syms,int nbits){ + struct v39 *vp = p; + decision_t *d = (decision_t *)vp->dp; + int path_metric = 0; + + while(nbits--){ + __m128i sym0v,sym1v,sym2v; + void *tmp; + int i; + + /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ + sym0v = _mm_set1_epi16(syms[0]); + sym1v = _mm_set1_epi16(syms[1]); + sym2v = _mm_set1_epi16(syms[2]); + syms += 3; + + /* SSE2 doesn't support saturated adds on unsigned shorts, so we have to use signed shorts */ + for(i=0;i<16;i++){ + __m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics + * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, + * the XOR operations constitute conditional negation. + * metric and m_metric (-metric) are in the range 0-765 + */ + m0 = _mm_add_epi16(_mm_xor_si128(Branchtab39[0].v[i],sym0v),_mm_xor_si128(Branchtab39[1].v[i],sym1v)); + metric = _mm_add_epi16(_mm_xor_si128(Branchtab39[2].v[i],sym2v),m0); + m_metric = _mm_sub_epi16(_mm_set1_epi16(765),metric); + + /* Add branch metrics to path metrics */ + m0 = _mm_adds_epi16(vp->old_metrics->v[i],metric); + m3 = _mm_adds_epi16(vp->old_metrics->v[16+i],metric); + m1 = _mm_adds_epi16(vp->old_metrics->v[16+i],m_metric); + m2 = _mm_adds_epi16(vp->old_metrics->v[i],m_metric); + + /* Compare and select */ + survivor0 = _mm_min_epi16(m0,m1); + survivor1 = _mm_min_epi16(m2,m3); + decision0 = _mm_cmpeq_epi16(survivor0,m1); + decision1 = _mm_cmpeq_epi16(survivor1,m3); + + /* Pack each set of decisions into 8 8-bit bytes, then interleave them and compress into 16 bits */ + d->s[i] = _mm_movemask_epi8(_mm_unpacklo_epi8(_mm_packs_epi16(decision0,_mm_setzero_si128()),_mm_packs_epi16(decision1,_mm_setzero_si128()))); + + /* Store surviving metrics */ + vp->new_metrics->v[2*i] = _mm_unpacklo_epi16(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = _mm_unpackhi_epi16(survivor0,survivor1); + } + /* See if we need to renormalize */ + if(vp->new_metrics->s[0] >= SHRT_MAX-5000){ + int i,adjust; + __m128i adjustv; + union { __m128i v; signed short w[8]; } t; + + /* Find smallest metric and set adjustv to bring it down to SHRT_MIN */ + adjustv = vp->new_metrics->v[0]; + for(i=1;i<32;i++) + adjustv = _mm_min_epi16(adjustv,vp->new_metrics->v[i]); + + adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,8)); + adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,4)); + adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,2)); + t.v = adjustv; + adjust = t.w[0] - SHRT_MIN; + path_metric += adjust; + adjustv = _mm_set1_epi16(adjust); + + /* We cannot use a saturated subtract, because we often have to adjust by more than SHRT_MAX + * This is okay since it can't overflow anyway + */ + for(i=0;i<32;i++) + vp->new_metrics->v[i] = _mm_sub_epi16(vp->new_metrics->v[i],adjustv); + } + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + return path_metric; +} + + diff --git a/viterbi615.c b/viterbi615.c new file mode 100644 index 0000000..6dda51f --- /dev/null +++ b/viterbi615.c @@ -0,0 +1,155 @@ +/* K=15 r=1/6 Viterbi decoder with optional Intel or PowerPC SIMD + * Copyright Feb 2004, Phil Karn, KA9Q + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include "fec.h" + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi615(int len){ + + find_cpu_mode(); + + switch(Cpu_mode){ + case PORT: + default: + return create_viterbi615_port(len); +#ifdef __VEC__ + case ALTIVEC: + return create_viterbi615_av(len); +#endif +#ifdef __i386__ + case MMX: + return create_viterbi615_mmx(len); + case SSE: + return create_viterbi615_sse(len); + case SSE2: + return create_viterbi615_sse2(len); +#endif + } +} + +void set_viterbi615_polynomial(int polys[6]){ + + switch(Cpu_mode){ + case PORT: + default: + set_viterbi615_polynomial_port(polys); + break; +#ifdef __VEC__ + case ALTIVEC: + set_viterbi615_polynomial_av(polys); + break; +#endif +#ifdef __i386__ + case MMX: + set_viterbi615_polynomial_mmx(polys); + break; + case SSE: + set_viterbi615_polynomial_sse(polys); + break; + case SSE2: + set_viterbi615_polynomial_sse2(polys); + break; +#endif + } +} + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi615(void *p,int starting_state){ + switch(Cpu_mode){ + case PORT: + default: + return init_viterbi615_port(p,starting_state); +#ifdef __VEC__ + case ALTIVEC: + return init_viterbi615_av(p,starting_state); +#endif +#ifdef __i386__ + case MMX: + return init_viterbi615_mmx(p,starting_state); + case SSE: + return init_viterbi615_sse(p,starting_state); + case SSE2: + return init_viterbi615_sse2(p,starting_state); +#endif + } +} + +/* Viterbi chainback */ +int chainback_viterbi615( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + + switch(Cpu_mode){ + case PORT: + default: + return chainback_viterbi615_port(p,data,nbits,endstate); +#ifdef __VEC__ + case ALTIVEC: + return chainback_viterbi615_av(p,data,nbits,endstate); +#endif +#ifdef __i386__ + case MMX: + return chainback_viterbi615_mmx(p,data,nbits,endstate); + case SSE: + return chainback_viterbi615_sse(p,data,nbits,endstate); + case SSE2: + return chainback_viterbi615_sse2(p,data,nbits,endstate); +#endif + } +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi615(void *p){ + switch(Cpu_mode){ + case PORT: + default: + delete_viterbi615_port(p); + break; +#ifdef __VEC__ + case ALTIVEC: + delete_viterbi615_av(p); + break; +#endif +#ifdef __i386__ + case MMX: + delete_viterbi615_mmx(p); + break; + case SSE: + delete_viterbi615_sse(p); + break; + case SSE2: + delete_viterbi615_sse2(p); + break; +#endif + } +} + +/* Update decoder with a block of demodulated symbols + * Note that nbits is the number of decoded data bits, not the number + * of symbols! + */ +int update_viterbi615_blk(void *p,unsigned char syms[],int nbits){ + switch(Cpu_mode){ + case PORT: + default: + return update_viterbi615_blk_port(p,syms,nbits); +#ifdef __VEC__ + case ALTIVEC: + return update_viterbi615_blk_av(p,syms,nbits); +#endif +#ifdef __i386__ + case MMX: + return update_viterbi615_blk_mmx(p,syms,nbits); + case SSE: + return update_viterbi615_blk_sse(p,syms,nbits); + case SSE2: + return update_viterbi615_blk_sse2(p,syms,nbits); +#endif + } +} + diff --git a/viterbi615_av.c b/viterbi615_av.c new file mode 100644 index 0000000..4a6ce9c --- /dev/null +++ b/viterbi615_av.c @@ -0,0 +1,257 @@ +/* K=15 r=1/6 Viterbi decoder for PowerPC G4/G5 Altivec vector instructions + * 8-bit offset-binary soft decision samples + * Copyright Mar 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <limits.h> +#include "fec.h" + +typedef union { unsigned char c[128][16]; vector unsigned char v[128]; } decision_t; +typedef union { unsigned short s[16384]; vector unsigned short v[2048]; } metric_t; + +static union branchtab615 { unsigned short s[8192]; vector unsigned short v[1024];} Branchtab615[6]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v615 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + void *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + void *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi615_av(void *p,int starting_state){ + struct v615 *vp = p; + int i; + + if(p == NULL) + return -1; + + for(i=0;i<2048;i++) + vp->metrics1.v[i] = (vector unsigned short)(5000); + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->s[starting_state & 16383] = 0; /* Bias known start state */ + return 0; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi615_av(int len){ + struct v615 *vp; + + if(!Init){ + int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF }; + set_viterbi615_polynomial_av(polys); + } + vp = (struct v615 *)malloc(sizeof(struct v615)); + vp->decisions = malloc(sizeof(decision_t)*(len+14)); + init_viterbi615_av(vp,0); + return vp; +} + +void set_viterbi615_polynomial_av(int polys[6]){ + int state; + int i; + + for(state=0;state < 8192;state++){ + for(i=0;i<6;i++) + Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0; + } + Init++; +} + + +/* Viterbi chainback */ +int chainback_viterbi615_av( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v615 *vp = p; + decision_t *d = (decision_t *)vp->decisions; + int path_metric; + + endstate %= 16384; + + path_metric = vp->old_metrics->s[endstate]; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 14; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].c[endstate >> 7][endstate & 15] & (0x80 >> ((endstate>>4)&7)) ) ? 1 : 0; + endstate = (k << 13) | (endstate >> 1); + data[nbits>>3] = endstate >> 6; + } + return path_metric; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi615_av(void *p){ + struct v615 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + +int update_viterbi615_blk_av(void *p,unsigned char *syms,int nbits){ + struct v615 *vp = p; + decision_t *d = (decision_t *)vp->dp; + int path_metric = 0; + vector unsigned char decisions = (vector unsigned char)(0); + + while(nbits--){ + vector unsigned short symv,sym0v,sym1v,sym2v,sym3v,sym4v,sym5v; + vector unsigned char s; + void *tmp; + int i; + + /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ + s = (vector unsigned char)vec_perm(vec_ld(0,syms),vec_ld(5,syms),vec_lvsl(0,syms)); + + symv = (vector unsigned short)vec_mergeh((vector unsigned char)(0),s); /* Unsigned byte->word unpack */ + sym0v = vec_splat(symv,0); + sym1v = vec_splat(symv,1); + sym2v = vec_splat(symv,2); + sym3v = vec_splat(symv,3); + sym4v = vec_splat(symv,4); + sym5v = vec_splat(symv,5); + syms += 6; + + for(i=0;i<1024;i++){ + vector bool short decision0,decision1; + vector unsigned short metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics + * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, + * the XOR operations constitute conditional negation. + * metric and m_metric (-metric) are in the range 0-1530 + */ + m0 = vec_add(vec_xor(Branchtab615[0].v[i],sym0v),vec_xor(Branchtab615[1].v[i],sym1v)); + m1 = vec_add(vec_xor(Branchtab615[2].v[i],sym2v),vec_xor(Branchtab615[3].v[i],sym3v)); + m2 = vec_add(vec_xor(Branchtab615[4].v[i],sym4v),vec_xor(Branchtab615[5].v[i],sym5v)); + metric = vec_add(m0,m1); + metric = vec_add(metric,m2); + m_metric = vec_sub((vector unsigned short)(1530),metric); + + /* Add branch metrics to path metrics */ + m0 = vec_adds(vp->old_metrics->v[i],metric); + m3 = vec_adds(vp->old_metrics->v[1024+i],metric); + m1 = vec_adds(vp->old_metrics->v[1024+i],m_metric); + m2 = vec_adds(vp->old_metrics->v[i],m_metric); + + /* Compare and select */ + decision0 = vec_cmpgt(m0,m1); + decision1 = vec_cmpgt(m2,m3); + survivor0 = vec_min(m0,m1); + survivor1 = vec_min(m2,m3); + + /* Store decisions and survivors. + * To save space without SSE2's handy PMOVMSKB instruction, we pack and store them in + * a funny interleaved fashion that we undo in the chainback function. + */ + decisions = vec_add(decisions,decisions); /* Shift each byte 1 bit to the left */ + + /* Booleans are either 0xff or 0x00. Subtracting 0x00 leaves the lsb zero; subtracting + * 0xff is equivalent to adding 1, which sets the lsb. + */ + decisions = vec_sub(decisions,(vector unsigned char)vec_pack(vec_mergeh(decision0,decision1),vec_mergel(decision0,decision1))); + + vp->new_metrics->v[2*i] = vec_mergeh(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = vec_mergel(survivor0,survivor1); + + if((i % 8) == 7){ + /* We've accumulated a total of 128 decisions, stash and start again */ + d->v[i>>3] = decisions; /* No need to clear, the new bits will replace the old */ + } + } +#if 0 + /* Experimentally determine metric spread + * The results are fixed for a given code and input symbol size + */ + { + int i; + vector unsigned short min_metric; + vector unsigned short max_metric; + union { vector unsigned short v; unsigned short s[8];} t; + int minimum,maximum; + static int max_spread = 0; + + min_metric = max_metric = vp->new_metrics->v[0]; + for(i=1;i<2048;i++){ + min_metric = vec_min(min_metric,vp->new_metrics->v[i]); + max_metric = vec_max(max_metric,vp->new_metrics->v[i]); + } + min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,8)); + max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,8)); + min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,4)); + max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,4)); + min_metric = vec_min(min_metric,vec_sld(min_metric,min_metric,2)); + max_metric = vec_max(max_metric,vec_sld(max_metric,max_metric,2)); + + t.v = min_metric; + minimum = t.s[0]; + t.v = max_metric; + maximum = t.s[0]; + if(maximum-minimum > max_spread){ + max_spread = maximum-minimum; + printf("metric spread = %d\n",max_spread); + } + } +#endif + + /* Renormalize if necessary. This deserves some explanation. + + * The maximum possible spread, found by experiment, for 4-bit symbols is 405; for 8 bit symbols, it's 12750. + * So by looking at one arbitrary metric we can tell if any of them have possibly saturated. + * However, this is very conservative. Large spreads occur only at very high Eb/No, where + * saturating a bad path metric doesn't do much to increase its chances of being erroneously chosen as a survivor. + + * At more interesting (low) Eb/No ratios, the spreads are much smaller so our chances of saturating a metric + * by not not normalizing when we should are extremely low. So either way, the risk to performance is small. + + * All this is borne out by experiment. + */ + if(vp->new_metrics->s[0] >= USHRT_MAX-12750){ + vector unsigned short scale; + union { vector unsigned short v; unsigned short s[8];} t; + + /* Find smallest metric and splat */ + scale = vp->new_metrics->v[0]; + for(i=1;i<2048;i++) + scale = vec_min(scale,vp->new_metrics->v[i]); + + scale = vec_min(scale,vec_sld(scale,scale,8)); + scale = vec_min(scale,vec_sld(scale,scale,4)); + scale = vec_min(scale,vec_sld(scale,scale,2)); + + /* Subtract it from all metrics + * Work backwards to try to improve the cache hit ratio, assuming LRU + */ + for(i=2047;i>=0;i--) + vp->new_metrics->v[i] = vec_subs(vp->new_metrics->v[i],scale); + t.v = scale; + path_metric += t.s[0]; + } + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + return path_metric; +} diff --git a/viterbi615_mmx.c b/viterbi615_mmx.c new file mode 100644 index 0000000..89a56f7 --- /dev/null +++ b/viterbi615_mmx.c @@ -0,0 +1,183 @@ +/* K=15 r=1/6 Viterbi decoder for x86 MMX + * Mar 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <mmintrin.h> +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include "fec.h" + +typedef union { unsigned char c[16384]; __m64 v[2048];} decision_t; +typedef union { unsigned short s[16384]; __m64 v[4096];} metric_t; + +static union branchtab615 { unsigned short s[8192]; __m64 v[2048];} Branchtab615[6]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v615 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + void *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + void *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi615_mmx(void *p,int starting_state){ + struct v615 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<16384;i++) + vp->metrics1.s[i] = 5000; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->s[starting_state & 16383] = 0; /* Bias known start state */ + return 0; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi615_mmx(int len){ + struct v615 *vp; + + if(!Init){ + int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF }; + set_viterbi615_polynomial_mmx(polys); + } + + if((vp = (struct v615 *)malloc(sizeof(struct v615))) == NULL) + return NULL; + if((vp->decisions = malloc((len+14)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi615_mmx(vp,0); + return vp; +} + +void set_viterbi615_polynomial_mmx(int polys[6]){ + int state; + int i; + + for(state=0;state < 8192;state++){ + for(i=0;i<6;i++) + Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0; + } + Init++; +} + +/* Viterbi chainback */ +int chainback_viterbi615_mmx( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v615 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + + d = (decision_t *)vp->decisions; + + endstate %= 16384; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 14; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = d[nbits].c[endstate] & 1; + endstate = (k << 13) | (endstate >> 1); + data[nbits>>3] = endstate >> 6; + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi615_mmx(void *p){ + struct v615 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + + +int update_viterbi615_blk_mmx(void *p,unsigned char *syms,int nbits){ + struct v615 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + + d = (decision_t *)vp->dp; + + while(nbits--){ + __m64 sym0v,sym1v,sym2v,sym3v,sym4v,sym5v; + void *tmp; + int i; + + /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ + sym0v = _mm_set1_pi16(syms[0]); + sym1v = _mm_set1_pi16(syms[1]); + sym2v = _mm_set1_pi16(syms[2]); + sym3v = _mm_set1_pi16(syms[3]); + sym4v = _mm_set1_pi16(syms[4]); + sym5v = _mm_set1_pi16(syms[5]); + syms += 6; + + for(i=0;i<2048;i++){ + __m64 decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics + * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, + * the XOR operations constitute conditional negation. + * metric and m_metric (-metric) are in the range 0-1530 + */ + m0 = _mm_add_pi16(_mm_xor_si64(Branchtab615[0].v[i],sym0v),_mm_xor_si64(Branchtab615[1].v[i],sym1v)); + m1 = _mm_add_pi16(_mm_xor_si64(Branchtab615[2].v[i],sym2v),_mm_xor_si64(Branchtab615[3].v[i],sym3v)); + m2 = _mm_add_pi16(_mm_xor_si64(Branchtab615[4].v[i],sym4v),_mm_xor_si64(Branchtab615[5].v[i],sym5v)); + metric = _mm_add_pi16(m0,_mm_add_pi16(m1,m2)); + m_metric = _mm_sub_pi16(_mm_set1_pi16(1530),metric); + + /* Add branch metrics to path metrics */ + m0 = _mm_add_pi16(vp->old_metrics->v[i],metric); + m3 = _mm_add_pi16(vp->old_metrics->v[2048+i],metric); + m1 = _mm_add_pi16(vp->old_metrics->v[2048+i],m_metric); + m2 = _mm_add_pi16(vp->old_metrics->v[i],m_metric); + + /* Compare and select + * There's no packed min instruction in MMX, so we use modulo arithmetic + * to form the decisions and then do the select the hard way + */ + decision0 = _mm_cmpgt_pi16(_mm_sub_pi16(m0,m1),_mm_setzero_si64()); + decision1 = _mm_cmpgt_pi16(_mm_sub_pi16(m2,m3),_mm_setzero_si64()); + survivor0 = _mm_or_si64(_mm_and_si64(decision0,m1),_mm_andnot_si64(decision0,m0)); + survivor1 = _mm_or_si64(_mm_and_si64(decision1,m3),_mm_andnot_si64(decision1,m2)); + + /* Merge decisions and store as bytes */ + d->v[i] = _mm_unpacklo_pi8(_mm_packs_pi16(decision0,_mm_setzero_si64()),_mm_packs_pi16(decision1,_mm_setzero_si64())); + + /* Store surviving metrics */ + vp->new_metrics->v[2*i] = _mm_unpacklo_pi16(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = _mm_unpackhi_pi16(survivor0,survivor1); + } + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + _mm_empty(); + return 0; +} diff --git a/viterbi615_port.c b/viterbi615_port.c new file mode 100644 index 0000000..89bdd80 --- /dev/null +++ b/viterbi615_port.c @@ -0,0 +1,156 @@ +/* K=15 r=1/6 Viterbi decoder in portable C + * Copyright Mar 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <limits.h> +#include "fec.h" + +typedef union { unsigned long w[512]; unsigned char c[2048];} decision_t; +typedef union { unsigned long w[16384]; } metric_t; + +static union branchtab615 { unsigned long w[8192]; } Branchtab615[6] __attribute__ ((aligned(16))); +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v615 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + decision_t *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + decision_t *decisions; /* Beginning of decisions for block */ +}; + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi615_port(int len){ + struct v615 *vp; + + if(!Init){ + int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF }; + set_viterbi615_polynomial_port(polys); + } + if((vp = (struct v615 *)malloc(sizeof(struct v615))) == NULL) + return NULL; + if((vp->decisions = malloc((len+14)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi615(vp,0); + return vp; +} + +void set_viterbi615_polynomial_port(int polys[6]){ + int state; + int i; + + for(state=0;state < 8192;state++){ + for(i=0;i<6;i++) + Branchtab615[i].w[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0; + } + Init++; +} + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi615_port(void *p,int starting_state){ + struct v615 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<16384;i++) + vp->metrics1.w[i] = 1000; + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->w[starting_state & 16383] = 0; /* Bias known start state */ + return 0; +} + +/* Viterbi chainback */ +int chainback_viterbi615_port( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v615 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + d = (decision_t *)vp->decisions; + endstate %= 16384; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 14; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1; + endstate = (k << 13) | (endstate >> 1); + data[nbits>>3] = endstate >> 6; + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi615_port(void *p){ + struct v615 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + +/* C-language butterfly */ +#define BFLY(i) {\ +unsigned long metric,m0,m1,m2,m3,decision0,decision1;\ + metric = ((Branchtab615[0].w[i] ^ syms[0]) + (Branchtab615[1].w[i] ^ syms[1])\ + +(Branchtab615[2].w[i] ^ syms[2]) + (Branchtab615[3].w[i] ^ syms[3])\ + +(Branchtab615[4].w[i] ^ syms[4]) + (Branchtab615[5].w[i] ^ syms[5]));\ + m0 = vp->old_metrics->w[i] + metric;\ + m1 = vp->old_metrics->w[i+8192] + (1530 - metric);\ + m2 = vp->old_metrics->w[i] + (1530-metric);\ + m3 = vp->old_metrics->w[i+8192] + metric;\ + decision0 = (signed long)(m0-m1) >= 0;\ + decision1 = (signed long)(m2-m3) >= 0;\ + vp->new_metrics->w[2*i] = decision0 ? m1 : m0;\ + vp->new_metrics->w[2*i+1] = decision1 ? m3 : m2;\ + d->c[i/4] |= ((decision0|(decision1<<1)) << ((2*i)&7));\ +} +/* Update decoder with a block of demodulated symbols + * Note that nbits is the number of decoded data bits, not the number + * of symbols! + */ + +int update_viterbi615_blk_port(void *p,unsigned char *syms,int nbits){ + struct v615 *vp = p; + void *tmp; + decision_t *d; + int i; + + if(p == NULL) + return -1; + d = (decision_t *)vp->dp; + while(nbits--){ + memset(d,0,sizeof(decision_t)); + for(i=0;i<8192;i++) + BFLY(i); + + syms += 6; + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + return 0; +} + diff --git a/viterbi615_sse.c b/viterbi615_sse.c new file mode 100644 index 0000000..de0f8af --- /dev/null +++ b/viterbi615_sse.c @@ -0,0 +1,201 @@ +/* K=15 r=1/6 Viterbi decoder for x86 SSE + * Copyright Mar 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <xmmintrin.h> +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <limits.h> +#include "fec.h" + +typedef union { unsigned long w[512]; unsigned char c[2048];} decision_t; +typedef union { signed short s[16384]; __m64 v[4096];} metric_t; + +static union branchtab615 { unsigned short s[8192]; __m64 v[2048];} Branchtab615[6]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v615 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + void *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + void *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi615_sse(void *p,int starting_state){ + struct v615 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<16384;i++) + vp->metrics1.s[i] = (SHRT_MIN+5000); + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->s[starting_state & 16383] = SHRT_MIN; /* Bias known start state */ + return 0; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi615_sse(int len){ + struct v615 *vp; + + if(!Init){ + int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF }; + set_viterbi615_polynomial_sse(polys); + } + + if((vp = (struct v615 *)malloc(sizeof(struct v615))) == NULL){ + return NULL; + } + if((vp->decisions = malloc((len+14)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + init_viterbi615_sse(vp,0); + return vp; +} + +void set_viterbi615_polynomial_sse(int polys[6]){ + int state; + int i; + + for(state=0;state < 8192;state++){ + for(i=0;i<6;i++) + Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0; + } + Init++; +} + +/* Viterbi chainback */ +int chainback_viterbi615_sse( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v615 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + d = (decision_t *)vp->decisions; + endstate %= 16384; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 14; /* Look past tail */ + while(nbits-- != 0){ + int k; + + /* k = (d[nbits].w[endstate/32] >> (endstate%32)) & 1;*/ + k = (d[nbits].c[endstate/8] >> (endstate%8)) & 1; + endstate = (k << 13) | (endstate >> 1); + data[nbits>>3] = endstate >> 6; + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi615_sse(void *p){ + struct v615 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + + +int update_viterbi615_blk_sse(void *p,unsigned char *syms,int nbits){ + struct v615 *vp = p; + decision_t *d; + + if(p == NULL) + return -1; + d = (decision_t *)vp->dp; + while(nbits--){ + __m64 sym0v,sym1v,sym2v,sym3v,sym4v,sym5v; + void *tmp; + int i; + + /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ + sym0v = _mm_set1_pi16(syms[0]); + sym1v = _mm_set1_pi16(syms[1]); + sym2v = _mm_set1_pi16(syms[2]); + sym3v = _mm_set1_pi16(syms[3]); + sym4v = _mm_set1_pi16(syms[4]); + sym5v = _mm_set1_pi16(syms[5]); + syms += 6; + + for(i=0;i<2048;i++){ + __m64 decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics + * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, + * the XOR operations constitute conditional negation. + * metric and m_metric (-metric) are in the range 0-1530 + */ + m0 = _mm_add_pi16(_mm_xor_si64(Branchtab615[0].v[i],sym0v),_mm_xor_si64(Branchtab615[1].v[i],sym1v)); + m1 = _mm_add_pi16(_mm_xor_si64(Branchtab615[2].v[i],sym2v),_mm_xor_si64(Branchtab615[3].v[i],sym3v)); + m2 = _mm_add_pi16(_mm_xor_si64(Branchtab615[4].v[i],sym4v),_mm_xor_si64(Branchtab615[5].v[i],sym5v)); + metric = _mm_add_pi16(m0,_mm_add_pi16(m1,m2)); + m_metric = _mm_sub_pi16(_mm_set1_pi16(1530),metric); + + /* Add branch metrics to path metrics */ + m0 = _mm_adds_pi16(vp->old_metrics->v[i],metric); + m3 = _mm_adds_pi16(vp->old_metrics->v[2048+i],metric); + m1 = _mm_adds_pi16(vp->old_metrics->v[2048+i],m_metric); + m2 = _mm_adds_pi16(vp->old_metrics->v[i],m_metric); + + /* Compare and select */ + survivor0 = _mm_min_pi16(m0,m1); + survivor1 = _mm_min_pi16(m2,m3); + decision0 = _mm_cmpeq_pi16(survivor0,m1); + decision1 = _mm_cmpeq_pi16(survivor1,m3); + + /* Pack decisions into 8 bits and store */ + d->c[i] = _mm_movemask_pi8(_mm_unpacklo_pi8(_mm_packs_pi16(decision0,_mm_setzero_si64()),_mm_packs_pi16(decision1,_mm_setzero_si64()))); + + /* Store surviving metrics */ + vp->new_metrics->v[2*i] = _mm_unpacklo_pi16(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = _mm_unpackhi_pi16(survivor0,survivor1); + } + /* See if we need to renormalize + * Max metric spread for this code with 0-255 branch metrics is 12750 + */ + if(vp->new_metrics->s[0] >= SHRT_MAX-12750){ + int i,adjust; + __m64 adjustv; + union { __m64 v; signed short w[4]; } t; + + /* Find smallest metric and set adjustv to bring it down to SHRT_MIN */ + adjustv = vp->new_metrics->v[0]; + for(i=1;i<4096;i++) + adjustv = _mm_min_pi16(adjustv,vp->new_metrics->v[i]); + + adjustv = _mm_min_pi16(adjustv,_mm_srli_si64(adjustv,32)); + adjustv = _mm_min_pi16(adjustv,_mm_srli_si64(adjustv,16)); + t.v = adjustv; + adjust = t.w[0] - SHRT_MIN; + adjustv = _mm_set1_pi16(adjust); + + for(i=0;i<4096;i++) + vp->new_metrics->v[i] = _mm_sub_pi16(vp->new_metrics->v[i],adjustv); + } + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + _mm_empty(); + return 0; +} diff --git a/viterbi615_sse2.c b/viterbi615_sse2.c new file mode 100644 index 0000000..7f711e5 --- /dev/null +++ b/viterbi615_sse2.c @@ -0,0 +1,204 @@ +/* K=15 r=1/6 Viterbi decoder for x86 SSE2 + * Copyright Mar 2004, Phil Karn, KA9Q + * May be used under the terms of the GNU Lesser General Public License (LGPL) + */ +#include <emmintrin.h> +#include <stdio.h> +#include <stdlib.h> +#include <memory.h> +#include <limits.h> +#include "fec.h" + +typedef union { unsigned long w[512]; unsigned short s[1024];} decision_t; +typedef union { signed short s[16384]; __m128i v[2048];} metric_t; + +static union branchtab615 { unsigned short s[8192]; __m128i v[1024];} Branchtab615[6]; +static int Init = 0; + +/* State info for instance of Viterbi decoder */ +struct v615 { + metric_t metrics1; /* path metric buffer 1 */ + metric_t metrics2; /* path metric buffer 2 */ + void *dp; /* Pointer to current decision */ + metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */ + void *decisions; /* Beginning of decisions for block */ +}; + +/* Initialize Viterbi decoder for start of new frame */ +int init_viterbi615_sse2(void *p,int starting_state){ + struct v615 *vp = p; + int i; + + if(p == NULL) + return -1; + for(i=0;i<16384;i++) + vp->metrics1.s[i] = (SHRT_MIN+5000); + + vp->old_metrics = &vp->metrics1; + vp->new_metrics = &vp->metrics2; + vp->dp = vp->decisions; + vp->old_metrics->s[starting_state & 16383] = SHRT_MIN; /* Bias known start state */ + return 0; +} + +/* Create a new instance of a Viterbi decoder */ +void *create_viterbi615_sse2(int len){ + void *p; + struct v615 *vp; + + if(!Init){ + int polys[6] = { V615POLYA,V615POLYB,V615POLYC,V615POLYD,V615POLYE,V615POLYF }; + set_viterbi615_polynomial_sse2(polys); + } + + /* Ordinary malloc() only returns 8-byte alignment, we need 16 */ + if(posix_memalign(&p, sizeof(__m128i),sizeof(struct v615))) + return NULL; + + vp = (struct v615 *)p; + if((p = malloc((len+14)*sizeof(decision_t))) == NULL){ + free(vp); + return NULL; + } + vp->decisions = (decision_t *)p; + init_viterbi615_sse2(vp,0); + return vp; +} + +void set_viterbi615_polynomial_sse2(int polys[6]){ + int state; + int i; + + for(state=0;state < 8192;state++){ + for(i=0;i<6;i++) + Branchtab615[i].s[state] = (polys[i] < 0) ^ parity((2*state) & abs(polys[i])) ? 255 : 0; + } + Init++; +} + +/* Viterbi chainback */ +int chainback_viterbi615_sse2( + void *p, + unsigned char *data, /* Decoded output data */ + unsigned int nbits, /* Number of data bits */ + unsigned int endstate){ /* Terminal encoder state */ + struct v615 *vp = p; + decision_t *d = (decision_t *)vp->decisions; + + endstate %= 16384; + + /* The store into data[] only needs to be done every 8 bits. + * But this avoids a conditional branch, and the writes will + * combine in the cache anyway + */ + d += 14; /* Look past tail */ + while(nbits-- != 0){ + int k; + + k = (d[nbits].w[endstate/32] >> (endstate%32)) & 1; + endstate = (k << 13) | (endstate >> 1); + data[nbits>>3] = endstate >> 6; + } + return 0; +} + +/* Delete instance of a Viterbi decoder */ +void delete_viterbi615_sse2(void *p){ + struct v615 *vp = p; + + if(vp != NULL){ + free(vp->decisions); + free(vp); + } +} + + +int update_viterbi615_blk_sse2(void *p,unsigned char *syms,int nbits){ + struct v615 *vp = p; + decision_t *d = (decision_t *)vp->dp; + + while(nbits--){ + __m128i sym0v,sym1v,sym2v,sym3v,sym4v,sym5v; + void *tmp; + int i; + + /* Splat the 0th symbol across sym0v, the 1st symbol across sym1v, etc */ + sym0v = _mm_set1_epi16(syms[0]); + sym1v = _mm_set1_epi16(syms[1]); + sym2v = _mm_set1_epi16(syms[2]); + sym3v = _mm_set1_epi16(syms[3]); + sym4v = _mm_set1_epi16(syms[4]); + sym5v = _mm_set1_epi16(syms[5]); + syms += 6; + + /* SSE2 doesn't support saturated adds on unsigned shorts, so we have to use signed shorts */ + for(i=0;i<1024;i++){ + __m128i decision0,decision1,metric,m_metric,m0,m1,m2,m3,survivor0,survivor1; + + /* Form branch metrics + * Because Branchtab takes on values 0 and 255, and the values of sym?v are offset binary in the range 0-255, + * the XOR operations constitute conditional negation. + * metric and m_metric (-metric) are in the range 0-1530 + */ + m0 = _mm_add_epi16(_mm_xor_si128(Branchtab615[0].v[i],sym0v),_mm_xor_si128(Branchtab615[1].v[i],sym1v)); + m1 = _mm_add_epi16(_mm_xor_si128(Branchtab615[2].v[i],sym2v),_mm_xor_si128(Branchtab615[3].v[i],sym3v)); + m2 = _mm_add_epi16(_mm_xor_si128(Branchtab615[4].v[i],sym4v),_mm_xor_si128(Branchtab615[5].v[i],sym5v)); + metric = _mm_add_epi16(m0,_mm_add_epi16(m1,m2)); + m_metric = _mm_sub_epi16(_mm_set1_epi16(1530),metric); + + /* Add branch metrics to path metrics */ + m0 = _mm_adds_epi16(vp->old_metrics->v[i],metric); + m3 = _mm_adds_epi16(vp->old_metrics->v[1024+i],metric); + m1 = _mm_adds_epi16(vp->old_metrics->v[1024+i],m_metric); + m2 = _mm_adds_epi16(vp->old_metrics->v[i],m_metric); + + /* Compare and select */ + survivor0 = _mm_min_epi16(m0,m1); + survivor1 = _mm_min_epi16(m2,m3); + decision0 = _mm_cmpeq_epi16(survivor0,m1); + decision1 = _mm_cmpeq_epi16(survivor1,m3); + + /* Pack each set of decisions into 8 8-bit bytes, then interleave them and compress into 16 bits */ + d->s[i] = _mm_movemask_epi8(_mm_unpacklo_epi8(_mm_packs_epi16(decision0,_mm_setzero_si128()),_mm_packs_epi16(decision1,_mm_setzero_si128()))); + + /* Store surviving metrics */ + vp->new_metrics->v[2*i] = _mm_unpacklo_epi16(survivor0,survivor1); + vp->new_metrics->v[2*i+1] = _mm_unpackhi_epi16(survivor0,survivor1); + } + /* See if we need to renormalize + * Max metric spread for this code with 0-90 branch metrics is 405 + */ + if(vp->new_metrics->s[0] >= SHRT_MAX-12750){ + int i,adjust; + __m128i adjustv; + union { __m128i v; signed short w[8]; } t; + + /* Find smallest metric and set adjustv to bring it down to SHRT_MIN */ + adjustv = vp->new_metrics->v[0]; + for(i=1;i<2048;i++) + adjustv = _mm_min_epi16(adjustv,vp->new_metrics->v[i]); + + adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,8)); + adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,4)); + adjustv = _mm_min_epi16(adjustv,_mm_srli_si128(adjustv,2)); + t.v = adjustv; + adjust = t.w[0] - SHRT_MIN; + adjustv = _mm_set1_epi16(adjust); + + /* We cannot use a saturated subtract, because we often have to adjust by more than SHRT_MAX + * This is okay since it can't overflow anyway + */ + for(i=0;i<2048;i++) + vp->new_metrics->v[i] = _mm_sub_epi16(vp->new_metrics->v[i],adjustv); + } + d++; + /* Swap pointers to old and new metrics */ + tmp = vp->old_metrics; + vp->old_metrics = vp->new_metrics; + vp->new_metrics = tmp; + } + vp->dp = d; + return 0; +} + + diff --git a/vtest27.c b/vtest27.c new file mode 100644 index 0000000..7256483 --- /dev/null +++ b/vtest27.c @@ -0,0 +1,184 @@ +/* Test viterbi decoder speeds */ +#include "config.h" +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <math.h> +#include <memory.h> +#include <sys/time.h> +#include <sys/resource.h> +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif +#include "fec.h" + +#if HAVE_GETOPT_LONG +struct option Options[] = { + {"frame-length",1,NULL,'l'}, + {"frame-count",1,NULL,'n'}, + {"ebn0",1,NULL,'e'}, + {"gain",1,NULL,'g'}, + {"verbose",0,NULL,'v'}, + {"force-altivec",0,NULL,'a'}, + {"force-port",0,NULL,'p'}, + {"force-mmx",0,NULL,'m'}, + {"force-sse",0,NULL,'s'}, + {"force-sse2",0,NULL,'t'}, + {NULL}, +}; +#endif + +#define RATE (1./2.) +#define MAXBYTES 10000 + +double Gain = 32.0; +int Verbose = 0; + +int main(int argc,char *argv[]){ + int i,d,tr; + int sr=0,trials = 10000,errcnt,framebits=2048; + long long int tot_errs=0; + unsigned char bits[MAXBYTES]; + unsigned char data[MAXBYTES]; + unsigned char xordata[MAXBYTES]; + unsigned char symbols[8*2*(MAXBYTES+6)]; + void *vp; + extern char *optarg; + struct rusage start,finish; + double extime; + double gain,esn0,ebn0; + time_t t; + int badframes=0; + + time(&t); + srandom(t); + ebn0 = -100; +#if HAVE_GETOPT_LONG + while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){ +#else + while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){ +#endif + switch(d){ + case 'a': + Cpu_mode = ALTIVEC; + break; + case 'p': + Cpu_mode = PORT; + break; + case 'm': + Cpu_mode = MMX; + break; + case 's': + Cpu_mode = SSE; + break; + case 't': + Cpu_mode = SSE2; + break; + case 'l': + framebits = atoi(optarg); + break; + case 'n': + trials = atoi(optarg); + break; + case 'e': + ebn0 = atof(optarg); + break; + case 'g': + Gain = atof(optarg); + break; + case 'v': + Verbose++; + break; + } + } + if(framebits > 8*MAXBYTES){ + fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8); + framebits = MAXBYTES*8; + } + if((vp = create_viterbi27(framebits)) == NULL){ + printf("create_viterbi27 failed\n"); + exit(1); + } + if(ebn0 != -100){ + esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */ + /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing + * only half the noise power, and the sqrt() converts power to + * voltage. + */ + gain = 1./sqrt(0.5/pow(10.,esn0/10.)); + + printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); + + for(tr=0;tr<trials;tr++){ + /* Encode a frame of random data */ + for(i=0;i<framebits+6;i++){ + int bit = (i < framebits) ? (random() & 1) : 0; + + sr = (sr << 1) | bit; + bits[i/8] = sr & 0xff; + symbols[2*i+0] = addnoise(parity(sr & V27POLYA),gain,Gain,127.5,255); + symbols[2*i+1] = addnoise(parity(sr & V27POLYB),gain,Gain,127.5,255); + } + /* Decode it and make sure we get the right answer */ + /* Initialize Viterbi decoder */ + init_viterbi27(vp,0); + + /* Decode block */ + update_viterbi27_blk(vp,symbols,framebits+6); + + /* Do Viterbi chainback */ + chainback_viterbi27(vp,data,framebits,0); + errcnt = 0; + for(i=0;i<framebits/8;i++){ + int e = Bitcnt[xordata[i] = data[i] ^ bits[i]]; + errcnt += e; + tot_errs += e; + } + if(errcnt != 0) + badframes++; + if(Verbose > 1 && errcnt != 0){ + printf("frame %d, %d errors: ",tr,errcnt); + for(i=0;i<framebits/8;i++){ + printf("%02x",xordata[i]); + } + printf("\n"); + } + if(Verbose) + printf("BER %lld/%lld (%10.3g) FER %d/%d (%10.3g)\r", + tot_errs,(long long)framebits*(tr+1),tot_errs/((double)framebits*(tr+1)), + badframes,tr+1,(double)badframes/(tr+1)); + fflush(stdout); + } + if(Verbose > 1) + printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); + else if(Verbose == 0) + printf("BER %lld/%lld (%.3g) FER %d/%d (%.3g)\n", + tot_errs,(long long)framebits*trials,tot_errs/((double)framebits*trials), + badframes,tr+1,(double)badframes/(tr+1)); + else + printf("\n"); + + } else { + /* Do time trials */ + memset(symbols,127,sizeof(symbols)); + printf("Starting time trials\n"); + getrusage(RUSAGE_SELF,&start); + for(tr=0;tr < trials;tr++){ + /* Initialize Viterbi decoder */ + init_viterbi27(vp,0); + + /* Decode block */ + update_viterbi27_blk(vp,symbols,framebits); + + /* Do Viterbi chainback */ + chainback_viterbi27(vp,data,framebits,0); + } + getrusage(RUSAGE_SELF,&finish); + extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec); + printf("Execution time for %d %d-bit frames: %.2f sec\n",trials, + framebits,extime); + printf("decoder speed: %g bits/s\n",trials*framebits/extime); + } + exit(0); +} diff --git a/vtest29.c b/vtest29.c new file mode 100644 index 0000000..8471b54 --- /dev/null +++ b/vtest29.c @@ -0,0 +1,185 @@ +/* Test viterbi decoder speeds */ +#include "config.h" +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <math.h> +#include <memory.h> +#include <sys/time.h> +#include <sys/resource.h> +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif +#include "fec.h" + +#if HAVE_GETOPT_LONG +struct option Options[] = { + {"frame-length",1,NULL,'l'}, + {"frame-count",1,NULL,'n'}, + {"ebn0",1,NULL,'e'}, + {"gain",1,NULL,'g'}, + {"verbose",0,NULL,'v'}, + {"force-altivec",0,NULL,'a'}, + {"force-port",0,NULL,'p'}, + {"force-mmx",0,NULL,'m'}, + {"force-sse",0,NULL,'s'}, + {"force-sse2",0,NULL,'t'}, + {NULL}, +}; +#endif + +#define RATE (1./2.) +#define MAXBYTES 10000 + +double Gain = 32.0; +int Verbose = 0; + +int main(int argc,char *argv[]){ + int i,d,tr; + int sr=0,trials = 10000,errcnt,framebits=2048; + long long tot_errs=0; + unsigned char bits[MAXBYTES]; + unsigned char data[MAXBYTES]; + unsigned char xordata[MAXBYTES]; + unsigned char symbols[8*2*(MAXBYTES+8)]; + void *vp; + extern char *optarg; + struct rusage start,finish; + double extime; + double gain,esn0,ebn0; + time_t t; + int badframes=0; + + time(&t); + srandom(t); + ebn0 = -100; +#if HAVE_GETOPT_LONG + while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){ +#else + while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){ +#endif + switch(d){ + case 'a': + Cpu_mode = ALTIVEC; + break; + case 'p': + Cpu_mode = PORT; + break; + case 'm': + Cpu_mode = MMX; + break; + case 's': + Cpu_mode = SSE; + break; + case 't': + Cpu_mode = SSE2; + break; + case 'l': + framebits = atoi(optarg); + break; + case 'n': + trials = atoi(optarg); + break; + case 'e': + ebn0 = atof(optarg); + break; + case 'g': + Gain = atof(optarg); + break; + case 'v': + Verbose++; + break; + } + } + if(framebits > 8*MAXBYTES){ + fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8); + framebits = MAXBYTES*8; + } + if((vp = create_viterbi29(framebits)) == NULL){ + printf("create_viterbi29 failed\n"); + exit(1); + } + if(ebn0 != -100){ + esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */ + /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing + * only half the noise power, and the sqrt() converts power to + * voltage. + */ + gain = 1./sqrt(0.5/pow(10.,esn0/10.)); + + printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); + + for(tr=0;tr<trials;tr++){ + /* Encode a frame of random data */ + for(i=0;i<framebits+8;i++){ + int bit = (i < framebits) ? (random() & 1) : 0; + + sr = (sr << 1) | bit; + bits[i/8] = sr & 0xff; + symbols[2*i+0] = addnoise(parity(sr & V29POLYA),gain,Gain,127.5,255); + symbols[2*i+1] = addnoise(parity(sr & V29POLYB),gain,Gain,127.5,255); + } + /* Decode it and make sure we get the right answer */ + /* Initialize Viterbi decoder */ + init_viterbi29(vp,0); + + /* Decode block */ + update_viterbi29_blk(vp,symbols,framebits+8); + + /* Do Viterbi chainback */ + chainback_viterbi29(vp,data,framebits,0); + errcnt = 0; + for(i=0;i<framebits/8;i++){ + int e = Bitcnt[xordata[i] = data[i] ^ bits[i]]; + errcnt += e; + tot_errs += e; + } + if(errcnt != 0) + badframes++; + if(Verbose > 1 && errcnt != 0){ + printf("frame %d, %d errors: ",tr,errcnt); + for(i=0;i<framebits/8;i++){ + printf("%02x",xordata[i]); + } + printf("\n"); + } + if(Verbose) + printf("BER %lld/%lld (%10.3g) FER %d/%d (%10.3g)\r", + tot_errs,(long long)framebits*(tr+1),tot_errs/((double)framebits*(tr+1)), + badframes,tr+1,(double)badframes/(tr+1)); + fflush(stdout); + } + if(Verbose > 1) + printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); + else if(Verbose == 0) + printf("BER %lld/%lld (%.3g) FER %d/%d (%.3g)\n", + tot_errs,(long long)framebits*trials,tot_errs/((double)framebits*trials), + badframes,tr+1,(double)badframes/(tr+1)); + else + printf("\n"); + } else { + /* Do time trials */ + memset(symbols,127,sizeof(symbols)); + printf("Starting time trials\n"); + getrusage(RUSAGE_SELF,&start); + for(tr=0;tr < trials;tr++){ + /* Initialize Viterbi decoder */ + init_viterbi29(vp,0); + + /* Decode block */ + update_viterbi29_blk(vp,symbols,framebits); + + /* Do Viterbi chainback */ + chainback_viterbi29(vp,data,framebits,0); + } + getrusage(RUSAGE_SELF,&finish); + extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec); + printf("Execution time for %d %d-bit frames: %.2f sec\n",trials, + framebits,extime); + printf("decoder speed: %g bits/s\n",trials*framebits/extime); + } + exit(0); +} + + diff --git a/vtest39.c b/vtest39.c new file mode 100644 index 0000000..76723b2 --- /dev/null +++ b/vtest39.c @@ -0,0 +1,186 @@ +/* Test viterbi decoder speeds */ +#include "config.h" +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <math.h> +#include <memory.h> +#include <sys/time.h> +#include <sys/resource.h> +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif +#include "fec.h" + +#if HAVE_GETOPT_LONG +struct option Options[] = { + {"frame-length",1,NULL,'l'}, + {"frame-count",1,NULL,'n'}, + {"ebn0",1,NULL,'e'}, + {"gain",1,NULL,'g'}, + {"verbose",0,NULL,'v'}, + {"force-altivec",0,NULL,'a'}, + {"force-port",0,NULL,'p'}, + {"force-mmx",0,NULL,'m'}, + {"force-sse",0,NULL,'s'}, + {"force-sse2",0,NULL,'t'}, + {NULL}, +}; +#endif + +#define RATE (1./3.) +#define MAXBYTES 10000 + +double Gain = 32.0; +int Verbose = 0; + +int main(int argc,char *argv[]){ + int i,d,tr; + int sr=0,trials = 10000,errcnt,framebits=2048; + long long tot_errs=0; + unsigned char bits[MAXBYTES]; + unsigned char data[MAXBYTES]; + unsigned char xordata[MAXBYTES]; + unsigned char symbols[8*3*(MAXBYTES+8)]; + void *vp; + extern char *optarg; + struct rusage start,finish; + double extime; + double gain,esn0,ebn0; + time_t t; + int badframes=0; + + time(&t); + srandom(t); + ebn0 = -100; +#if HAVE_GETOPT_LONG + while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){ +#else + while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){ +#endif + switch(d){ + case 'a': + Cpu_mode = ALTIVEC; + break; + case 'p': + Cpu_mode = PORT; + break; + case 'm': + Cpu_mode = MMX; + break; + case 's': + Cpu_mode = SSE; + break; + case 't': + Cpu_mode = SSE2; + break; + case 'l': + framebits = atoi(optarg); + break; + case 'n': + trials = atoi(optarg); + break; + case 'e': + ebn0 = atof(optarg); + break; + case 'g': + Gain = atof(optarg); + break; + case 'v': + Verbose++; + break; + } + } + if(framebits > 8*MAXBYTES){ + fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8); + framebits = MAXBYTES*8; + } + if((vp = create_viterbi39(framebits)) == NULL){ + printf("create_viterbi39 failed\n"); + exit(1); + } + if(ebn0 != -100){ + esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */ + /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing + * only half the noise power, and the sqrt() converts power to + * voltage. + */ + gain = 1./sqrt(0.5/pow(10.,esn0/10.)); + + printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); + + for(tr=0;tr<trials;tr++){ + /* Encode a frame of random data */ + for(i=0;i<framebits+8;i++){ + int bit = (i < framebits) ? (random() & 1) : 0; + + sr = (sr << 1) | bit; + bits[i/8] = sr & 0xff; + symbols[3*i+0] = addnoise(parity(sr & V39POLYA),gain,Gain,127.5,255); + symbols[3*i+1] = addnoise(parity(sr & V39POLYB),gain,Gain,127.5,255); + symbols[3*i+2] = addnoise(parity(sr & V39POLYC),gain,Gain,127.5,255); + } + /* Decode it and make sure we get the right answer */ + /* Initialize Viterbi decoder */ + init_viterbi39(vp,0); + + /* Decode block */ + update_viterbi39_blk(vp,symbols,framebits+8); + + /* Do Viterbi chainback */ + chainback_viterbi39(vp,data,framebits,0); + errcnt = 0; + for(i=0;i<framebits/8;i++){ + int e = Bitcnt[xordata[i] = data[i] ^ bits[i]]; + errcnt += e; + tot_errs += e; + } + if(errcnt != 0) + badframes++; + if(Verbose > 1 && errcnt != 0){ + printf("frame %d, %d errors: ",tr,errcnt); + for(i=0;i<framebits/8;i++){ + printf("%02x",xordata[i]); + } + printf("\n"); + } + if(Verbose) + printf("BER %lld/%lld (%10.3g) FER %d/%d (%10.3g)\r", + tot_errs,(long long)framebits*(tr+1),tot_errs/((double)framebits*(tr+1)), + badframes,tr+1,(double)badframes/(tr+1)); + fflush(stdout); + } + if(Verbose > 1) + printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); + else if(Verbose == 0) + printf("BER %lld/%lld (%.3g) FER %d/%d (%.3g)\n", + tot_errs,(long long)framebits*trials,tot_errs/((double)framebits*trials), + badframes,tr+1,(double)badframes/(tr+1)); + else + printf("\n"); + } else { + /* Do time trials */ + memset(symbols,127,sizeof(symbols)); + printf("Starting time trials\n"); + getrusage(RUSAGE_SELF,&start); + for(tr=0;tr < trials;tr++){ + /* Initialize Viterbi decoder */ + init_viterbi39(vp,0); + + /* Decode block */ + update_viterbi39_blk(vp,symbols,framebits); + + /* Do Viterbi chainback */ + chainback_viterbi39(vp,data,framebits,0); + } + getrusage(RUSAGE_SELF,&finish); + extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec); + printf("Execution time for %d %d-bit frames: %.2f sec\n",trials, + framebits,extime); + printf("decoder speed: %g bits/s\n",trials*framebits/extime); + } + exit(0); +} + + diff --git a/vtest615.c b/vtest615.c new file mode 100644 index 0000000..4bd8c4f --- /dev/null +++ b/vtest615.c @@ -0,0 +1,191 @@ +/* Test viterbi decoder speeds */ +#include "config.h" +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <math.h> +#include <memory.h> +#include <sys/time.h> +#include <sys/resource.h> +#ifdef HAVE_GETOPT_H +#include <getopt.h> +#endif +#include "fec.h" + +#if HAVE_GETOPT_LONG +struct option Options[] = { + {"frame-length",1,NULL,'l'}, + {"frame-count",1,NULL,'n'}, + {"ebn0",1,NULL,'e'}, + {"gain",1,NULL,'g'}, + {"verbose",0,NULL,'v'}, + {"force-altivec",0,NULL,'a'}, + {"force-port",0,NULL,'p'}, + {"force-mmx",0,NULL,'m'}, + {"force-sse",0,NULL,'s'}, + {"force-sse2",0,NULL,'t'}, + {NULL}, +}; +#endif + +#define RATE (1./6.) +#define MAXBYTES 10000 +#define OFFSET (127.5) +#define CLIP 255 + +double Gain = 24.0; +int Verbose = 0; + +int main(int argc,char *argv[]){ + int i,d,tr; + int sr=0,trials = 10,errcnt,framebits=2048; + int tot_errs=0; + unsigned char bits[MAXBYTES]; + unsigned char data[MAXBYTES]; + unsigned char xordata[MAXBYTES]; + unsigned char symbols[8*6*(MAXBYTES+14)]; + void *vp; + extern char *optarg; + struct rusage start,finish; + double extime; + double gain,esn0,ebn0; + time_t t; + int badframes=0; + + time(&t); + srandom(t); + ebn0 = -100; +#if HAVE_GETOPT_LONG + while((d = getopt_long(argc,argv,"l:n:te:g:vapmst",Options,NULL)) != EOF){ +#else + while((d = getopt(argc,argv,"l:n:te:g:vapmst")) != EOF){ +#endif + switch(d){ + case 'a': + Cpu_mode = ALTIVEC; + break; + case 'p': + Cpu_mode = PORT; + break; + case 'm': + Cpu_mode = MMX; + break; + case 's': + Cpu_mode = SSE; + break; + case 't': + Cpu_mode = SSE2; + break; + case 'l': + framebits = atoi(optarg); + break; + case 'n': + trials = atoi(optarg); + break; + case 'e': + ebn0 = atof(optarg); + break; + case 'g': + Gain = atof(optarg); + break; + case 'v': + Verbose++; + break; + } + } + if(framebits > 8*MAXBYTES){ + fprintf(stderr,"Frame limited to %d bits\n",MAXBYTES*8); + framebits = MAXBYTES*8; + } + if((vp = create_viterbi615(framebits)) == NULL){ + printf("create_viterbi615 failed\n"); + exit(1); + } + if(ebn0 != -100){ + esn0 = ebn0 + 10*log10((double)RATE); /* Es/No in dB */ + /* Compute noise voltage. The 0.5 factor accounts for BPSK seeing + * only half the noise power, and the sqrt() converts power to + * voltage. + */ + gain = 1./sqrt(0.5/pow(10.,esn0/10.)); + + printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); + + for(tr=0;tr<trials;tr++){ + /* Encode a frame of random data */ + for(i=0;i<framebits+14;i++){ + int bit = (i < framebits) ? (random() & 1) : 0; + + sr = (sr << 1) | bit; + bits[i/8] = sr & 0xff; + symbols[6*i+0] = addnoise(parity(sr & V615POLYA),gain,Gain,OFFSET,CLIP); + symbols[6*i+1] = addnoise(parity(sr & V615POLYB),gain,Gain,OFFSET,CLIP); + symbols[6*i+2] = addnoise(parity(sr & V615POLYC),gain,Gain,OFFSET,CLIP); + symbols[6*i+3] = addnoise(parity(sr & V615POLYD),gain,Gain,OFFSET,CLIP); + symbols[6*i+4] = addnoise(parity(sr & V615POLYE),gain,Gain,OFFSET,CLIP); + symbols[6*i+5] = addnoise(parity(sr & V615POLYF),gain,Gain,OFFSET,CLIP); + } + /* Decode it and make sure we get the right answer */ + /* Initialize Viterbi decoder */ + init_viterbi615(vp,0); + + /* Decode block */ + update_viterbi615_blk(vp,symbols,framebits+14); + + /* Do Viterbi chainback */ + chainback_viterbi615(vp,data,framebits,0); + errcnt = 0; + for(i=0;i<framebits/8;i++){ + int e = Bitcnt[xordata[i] = data[i] ^ bits[i]]; + errcnt += e; + tot_errs += e; + } + if(errcnt != 0) + badframes++; + if(Verbose > 1 && errcnt != 0){ + printf("frame %d, %d errors: ",tr,errcnt); + for(i=0;i<framebits/8;i++){ + printf("%02x",xordata[i]); + } + printf("\n"); + } + if(Verbose) + printf("BER %d/%d (%10.3g) FER %d/%d (%10.3g)\r", + tot_errs,framebits*(tr+1),tot_errs/((double)framebits*(tr+1)), + badframes,(tr+1),(double)badframes/(tr+1)); + fflush(stdout); + + } + + if(Verbose > 1) + printf("nframes = %d framesize = %d ebn0 = %.2f dB gain = %g\n",trials,framebits,ebn0,Gain); + else if(Verbose == 0) + printf("BER %d/%d (%.3g) FER %d/%d (%.3g)\n", + tot_errs,framebits*(tr+1),tot_errs/((double)framebits*(tr+1)), + badframes,(tr+1),(double)badframes/(tr+1)); + else + printf("\n"); + } else { + /* Do time trials */ + memset(symbols,127,sizeof(symbols)); + printf("Starting time trials\n"); + getrusage(RUSAGE_SELF,&start); + for(tr=0;tr < trials;tr++){ + /* Initialize Viterbi decoder */ + init_viterbi615(vp,0); + + /* Decode block */ + update_viterbi615_blk(vp,symbols,framebits+14); + + /* Do Viterbi chainback */ + chainback_viterbi615(vp,data,framebits,0); + } + getrusage(RUSAGE_SELF,&finish); + extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec); + printf("Execution time for %d %d-bit frames: %.2f sec\n",trials, + framebits,extime); + printf("decoder speed: %g bits/s\n",trials*framebits/extime); + } + exit(0); +} |